/*
* DebugImporter.java
*/
package org.ngbw.utils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.InputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.ngbw.sdk.Workbench;
import org.ngbw.sdk.api.core.GenericDataRecordCollection;
import org.ngbw.sdk.core.shared.IndexedDataRecord;
import org.ngbw.sdk.core.types.DataFormat;
import org.ngbw.sdk.core.types.DataType;
import org.ngbw.sdk.core.types.EntityType;
import org.ngbw.sdk.database.Folder;
import org.ngbw.sdk.database.User;
import org.ngbw.sdk.database.UserDataItem;
import org.ngbw.sdk.database.UserItemDataRecord;
/**
*
* @author Paul Hoover
*
*/
class DebugImporter {
private static class SessionDirFilter implements FilenameFilter {
public boolean accept(File dir, String name)
{
return name.matches(SESSION_DIR_PREFIX + "\\S+$");
}
}
private static final String TARGET_FOLDER_LABEL = "Imported from BWB";
private static final String BWB_ROOT_DIR = "/misc/workbench/3.2/";
private static final String SESSION_DIR_PREFIX = "^S\\.[0-9]\\.[0-9]_[0-9]+_";
private static final int MAX_LABEL_LENGTH = 1023;
private final Workbench m_workbench;
/**
* Constructs an instance of the class.
*
* @param workbench
*/
DebugImporter(Workbench workbench)
{
m_workbench = workbench;
}
/**
* Imports data from the Biology Workbench to the Next Generation Biology Workbench.
*
* @param username a BWB user name
* @param parentFolder a Folder
object that will contain the imported data items
* @return the number of data items imported
* @throws Exception
*/
public int importBwbData(String username, Folder parentFolder) throws Exception
{
if (!authenticateUser(username))
throw new Exception("Authentication failed for user " + username);
File dataDir = new File(BWB_ROOT_DIR + "USER/" + username);
// users who haven't logged in to BWB recently might have had their data erased by the BWB
// administrator, so having an account doesn't guarantee that there's anything to import
if (!dataDir.exists() || !dataDir.isDirectory())
return 0;
int numItems = 0;
String[] sessionDirNames = dataDir.list(new SessionDirFilter());
List subFolders = parentFolder.findSubFolders();
for (int i = 0 ; i < sessionDirNames.length ; i += 1) {
File sessionFile = new File(dataDir.getAbsolutePath() + "/" + sessionDirNames[i] + "/Session_File");
if (!sessionFile.exists() || sessionFile.length() < 1)
continue;
Folder newFolder = new Folder(parentFolder);
String folderLabel = createUniqueLabel(subFolders, sessionDirNames[i].replaceFirst(SESSION_DIR_PREFIX, "").replaceAll("_+", " ").trim());
newFolder.setLabel(folderLabel);
newFolder.save();
BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(sessionFile));
try {
while (true) {
String typeSection = readSection(inputStream);
// this will only occur if we've reached EOF
if (typeSection == null)
break;
String fromSection = readSection(inputStream);
String labelSection = readSection(inputStream);
String sequenceSection = readSection(inputStream);
DataType dataType;
EntityType entityType;
if(typeSection.equalsIgnoreCase("Alignment")) {
dataType = DataType.SEQUENCE_ALIGNMENT;
if(fromSection.matches(".+\\sProtein$"))
entityType = EntityType.PROTEIN;
else if (fromSection.matches(".+\\sNucleic$"))
entityType = EntityType.NUCLEIC_ACID;
else
entityType = EntityType.UNKNOWN;
}
else {
dataType = DataType.SEQUENCE;
if(typeSection.equals("Protein"))
entityType = EntityType.PROTEIN;
else if (typeSection.equals("Nucleic"))
entityType = EntityType.NUCLEIC_ACID;
else
entityType = EntityType.UNKNOWN;
}
if (labelSection.length() > MAX_LABEL_LENGTH) {
System.err.println("Truncating a label in session " + folderLabel + " for BWB user " + username + " from " + labelSection.length() + " characters to " + String.valueOf(MAX_LABEL_LENGTH));
labelSection = labelSection.substring(0, MAX_LABEL_LENGTH);
}
UserDataItem newItem = new UserDataItem(newFolder);
newItem.setLabel(labelSection);
newItem.setDataFormat(DataFormat.FASTA);
newItem.setDataType(dataType);
newItem.setEntityType(entityType);
newItem.setData(sequenceSection);
try {
GenericDataRecordCollection records = m_workbench.extractDataRecords(newItem);
for (Iterator elements = records.toList().iterator() ; elements.hasNext() ; )
newItem.dataRecords().add(new UserItemDataRecord(elements.next(), newItem));
}
catch (Exception err) {
// torpedoed by one of Hannes' RuntimeExceptions. We have no way of knowing
// if this indicates something serious or not, so we just ignore it
System.err.println("Caught an exception while extracting data records:");
err.printStackTrace(System.err);
}
newItem.metaData().put("importedFrom", "BWB");
newItem.save();
numItems += 1;
}
}
finally {
inputStream.close();
}
}
return numItems;
}
/**
* Authenticates a BWB user using the given name and password. These credentials are compared to
* entries in an Apache htpasswd file that the BWB uses.
*
* @param username a BWB user name
* @return true
if the user name was found, and the password matches the stored password
* @throws Exception
*/
private boolean authenticateUser(String username) throws Exception
{
BufferedReader htpasswdReader = new BufferedReader(new FileReader(BWB_ROOT_DIR + "CONF/htpasswd"));
try {
String line;
while ((line = htpasswdReader.readLine()) != null) {
int colonChar = line.indexOf(':');
if (colonChar == -1)
continue;
String name = line.substring(0, colonChar);
if (!name.equals(username))
continue;
return true;
}
}
finally {
htpasswdReader.close();
}
return false;
}
/**
* Ensures that a given label is unique with respect to a given list of folders. If the label is
* unique, the unaltered label is returned. If not, a unique version of the label is produced by
* appending a numeric value to the original label.
*
* @param folders a List
object containing the folders to search
* @param label a String
object which is the original label
* @return
*/
private String createUniqueLabel(List folders, String label)
{
int suffix = 1;
String newLabel = label;
while (labelIsDuplicate(folders, newLabel)) {
suffix += 1;
newLabel = label + ' ' + String.valueOf(suffix);
}
return newLabel;
}
/**
* Searches a list of folders for the given label.
*
* @param folders a List
object containing the folders to search
* @param label a String
object which is the label to search for
* @return true
if the given label was found in the list of folders
*/
private boolean labelIsDuplicate(List folders, String label)
{
for (Iterator elements = folders.iterator() ; elements.hasNext() ; ) {
if (elements.next().getLabel().equals(label))
return true;
}
return false;
}
/**
* Reads the data from a section of a BWB session file. Each data item stored in a session file has
* four sections associated with it, TYPE, FROM, LABEL, and SEQ. Sections are written in the file
* in this order. Each section is preceded by the length of the section, in characters. The data
* for each section is written as a key / value pair, with the following format:
*
* KEY[ n ]=[ data... ]
*
* where KEY is one of the section names and n is the ordinal of the data item. There can be any
* number of data items stored in a particular session file.
*
* @param input an InputStream
object opened on the session file
* @return a String
object containing the characters from this section
* @throws IOException
*/
private String readSection(InputStream input) throws IOException
{
int length = 0;
// the length of a section is a 4-byte value written in network byte order, which is big-endian.
// The following operations reverse the byte order to little-endian, so hopefully we're running
// on that sort of architecture
length |= input.read() << 24;
length |= input.read() << 16;
length |= input.read() << 8;
length |= input.read();
// we've reached EOF
if (length < 0)
return null;
byte[] buffer = new byte[length];
input.read(buffer, 0, length);
int offset = 0;
// skip over the section key
while ((char) buffer[offset] != '=')
offset += 1;
// skip over the equals sign
offset += 1;
return new String(buffer, offset, length - offset);
}
public static void main(String[] args)
{
try {
if (args.length != 2)
throw new Exception("usage: DebugImporter ngbw_account bwb_account");
Workbench workbench = Workbench.getInstance();
User owner = User.findUser(args[0]);
Folder homeFolder = owner.getHomeFolder();
Folder target = homeFolder.findOrCreateSubFolder(TARGET_FOLDER_LABEL);
int numItems = (new DebugImporter(workbench)).importBwbData(args[1], target);
System.out.println("Imported " + numItems + " items");
}
catch (Exception err) {
err.printStackTrace(System.err);
System.exit(-1);
}
}
}