Skip to content

Commit

Permalink
Fix for RT ticket #642944. ACT can now handle NCBI Blast web site com…
Browse files Browse the repository at this point in the history
…parison files. This is a workaround for the fact that DoubleACT and WebACT are currently down
  • Loading branch information
kpepper committed Nov 19, 2018
1 parent 814662a commit aa30c42
Show file tree
Hide file tree
Showing 10 changed files with 1,017 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/* BlastServerTableComparisonData.java
*
* This file is part of Artemis
*
* Copyright (C) 2018 Genome Research Limited
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/

package uk.ac.sanger.artemis;

import uk.ac.sanger.artemis.util.LinePushBackReader;

import java.io.*;
import java.util.List;
import java.util.StringTokenizer;

/**
* This class implements the SimpleComparisonData interface
* for blast web site hit table output.
*
* @author kp11
*/
public class BlastWebSiteHitTableComparisonData extends SimpleComparisonData {

/** Min number of fields in web site blastn/tblastx file. */
public static final int MIN_NUM_FIELDS = 12;

/** Max number of fields in web site blastn/tblastx file. */
public static final int MAX_NUM_FIELDS = 14;

/** Comparison file descriptive type. */
public static final String TYPE = "Blast web site hit table comparison data";

/**
* Create a new BlastWebSiteHitTableComparisonData by reading from the given
* LinePushBackReader.
*/
public BlastWebSiteHitTableComparisonData (final LinePushBackReader stream)
throws IOException {
super (stream);
}

/**
* Create a new, empty instance of BlastServerTableComparisonData.
*/
public BlastWebSiteHitTableComparisonData () {

}

/**
* Returns a new, empty instance of this type of object;
*/
protected SimpleComparisonData getNewSimpleComparisonData () {
return new BlastWebSiteHitTableComparisonData ();
}


/**
* Make an AlignMatch object from the given String.
*/
private static AlignMatch makeMatchFromStringStatic (String line)
throws IOException {

if (line.trim ().length () == 0 || line.startsWith ("#")) {
return null;
}

final StringTokenizer tokenizer = new StringTokenizer (line, "\t");

int numTokens = tokenizer.countTokens ();
if (
numTokens < BlastWebSiteHitTableComparisonData.MIN_NUM_FIELDS ||
numTokens > BlastWebSiteHitTableComparisonData.MAX_NUM_FIELDS) {

final String message = "while reading " +
BlastWebSiteHitTableComparisonData.TYPE +
": unexpected number of fields for this line: " + line;
throw new ComparisonDataParseException (message);

}

// Parse fields from line...

// throw away the query name
tokenizer.nextToken ();

// throw away the subject name
tokenizer.nextToken ();

// % ident
final String percentIdentToken = tokenizer.nextToken ();

// throw away alignment length
tokenizer.nextToken ();

// throw away mismatches
tokenizer.nextToken ();

// throw away gap opens
tokenizer.nextToken ();

// Query start/end
final String qStartToken = tokenizer.nextToken ();
final String qEndToken = tokenizer.nextToken ();

final String sStartToken = tokenizer.nextToken ();
final String sEndToken = tokenizer.nextToken ();

// throw away evalue
tokenizer.nextToken ();

// % bit score
final String scoreToken = tokenizer.nextToken ();

// And ignore all the end fields for tblastx

try {

final int score = (int)(Float.valueOf (scoreToken).floatValue ());
final int percentIdent = (int)(Float.valueOf (percentIdentToken).floatValue ());
final int qStart = Integer.valueOf (qStartToken).intValue ();
final int qEnd = Integer.valueOf (qEndToken).intValue ();
final int sStart = Integer.valueOf (sStartToken).intValue ();
final int sEnd = Integer.valueOf (sEndToken).intValue ();

return makeAlignMatch (sStart, sEnd, qStart, qEnd, score,
percentIdent);

} catch (NumberFormatException e) {
throw new IOException ("while reading " +
BlastWebSiteHitTableComparisonData.TYPE +
": failed to parse a number from this string: " +
e.getMessage ());
}
}

/**
* Make an AlignMatch object from the given String. The String must be in
* a format appropriate for this object.
*/
@Override
protected AlignMatch makeMatchFromString (final String line)
throws IOException {
return makeMatchFromStringStatic (line);
}

/**
* Returns true if and only if the given line is in the correct format for
* this type of ComparisonData. This should be as strict as possible.
*/
public static boolean formatCorrect (final List<String> headers) {

boolean result = false;

if (headers.size() >= 4) {

String blastTypeHeader = headers.get(0);
String iterationHeader = headers.get(1);
String queryHeader = headers.get(2);
String ridHeader = headers.get(3);

if (
(blastTypeHeader.startsWith ("# tblastx") || blastTypeHeader.startsWith ("# blastn")) &&
iterationHeader.startsWith ("# Iteration:") &&
queryHeader.startsWith ("# Query:") &&
ridHeader.startsWith ("# RID:")) {

result = true;
}
}

return result;
}

}
116 changes: 97 additions & 19 deletions src/main/java/uk/ac/sanger/artemis/ComparisonDataFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*
* This file is part of Artemis
*
* Copyright (C) 1999-2002 Genome Research Limited
* Copyright (C) 1999-2018 Genome Research Limited
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
Expand All @@ -20,7 +20,6 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/ComparisonDataFactory.java,v 1.1 2004-06-09 09:44:16 tjc Exp $
*/

package uk.ac.sanger.artemis;
Expand All @@ -29,6 +28,10 @@
import uk.ac.sanger.artemis.util.LinePushBackReader;

import java.io.*;
import java.util.LinkedList;
import java.util.List;

import org.apache.log4j.Logger;

/**
* This class contains the method readComparisonData (), which returns an
Expand All @@ -39,47 +42,122 @@
**/

public class ComparisonDataFactory {

/** Logging instance. */
private static Logger logger4j = Logger.getLogger(ComparisonDataFactory.class);

/**
* This method creates an appropriate ComparisonData object from a Document.
**/
*/
static public ComparisonData readComparisonData (Document data_document)
throws IOException {

final Reader in_file = data_document.getReader ();
String fileName = data_document.getName();

final LinePushBackReader pushback_reader =
new LinePushBackReader (in_file);

final String line = pushback_reader.readLine ();

if (line == null) {
throw new IOException ("End of file while reading from: " +
data_document);

String line = null;
List<String> headers = null;
try {
line = peekFirstLine(pushback_reader, fileName);
headers = readHeaders(pushback_reader, fileName);
} catch (IOException e) {

try {
// close the reader
pushback_reader.close();
} catch (IOException ioe) {
// Ignore
}

throw e;
}


pushback_reader.pushBack (line);

if (BlastWebSiteHitTableComparisonData.formatCorrect (headers)) {
logger4j.info("Loading Blast web site hit table comparison file: " + fileName);
return new BlastWebSiteHitTableComparisonData (pushback_reader);
}

if (MSPcrunchComparisonData.formatCorrect (line)) {
logger4j.info("Loading crunch comparison file: " + fileName);
return new MSPcrunchComparisonData (pushback_reader);
} else {
if (SSAHAComparisonData.formatCorrect (line)) {
logger4j.info("Loading SSAHA comparison file: " + fileName);
return new SSAHAComparisonData (pushback_reader);
} else {
if (BlastM8ComparisonData.formatCorrect (line)) {
return new BlastM8ComparisonData (pushback_reader);
if (MegaBlastComparisonData.formatCorrect (line)) {
logger4j.info("Loading mega blast comparison file: " + fileName);
return new MegaBlastComparisonData (pushback_reader);
} else {
if (MegaBlastComparisonData.formatCorrect (line)) {
return new MegaBlastComparisonData (pushback_reader);
if (BlastM8ComparisonData.formatCorrect (line)) {
logger4j.info("Loading Blast m8 comparison file: " + fileName);
return new BlastM8ComparisonData (pushback_reader);
} else {
// if (tokenizer.countTokens () < 8) {
// return new MUMmerComparisonData (pushback_reader);
// } else {
throw new IOException ("cannot understand the comparison file format");
// }

try {
// close the reader
pushback_reader.close();
} catch (IOException ioe) {
// Ignore
}

logger4j.info("Failed to load ACT comparison file: " + fileName);
throw new IOException ("cannot understand the comparison file format");
}
}
}
}

}

protected static List<String> readHeaders(LinePushBackReader reader, String fileName) throws IOException {

List<String> headerList = new LinkedList<String>();
String line = null;
boolean finished = false;

do {
line = reader.readLine();

if (line == null) {
throw new IOException (
"End of file while reading from: " +
fileName);
}

if (line.startsWith("#")) {
headerList.add(line);
} else {
finished = true;
}

} while (!finished);

reader.pushBack(line);

return headerList;
}

protected static String peekFirstLine(LinePushBackReader reader, String fileName) throws IOException {

final String line = reader.readLine ();

if (line == null) {

throw new IOException (
"End of file while reading from: " +
fileName);
}

reader.pushBack(line);

return line;

}
}

Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

import java.io.*;
import java.util.StringTokenizer;
import java.util.Vector;

/**
* This class implements the ComparisonData interface for MegaBlast output.
Expand Down
Loading

0 comments on commit aa30c42

Please sign in to comment.