Skip to content

Commit

Permalink
Handle different databases for the different BLAST applications
Browse files Browse the repository at this point in the history
  • Loading branch information
bosborne committed Feb 19, 2017
1 parent 1e6ee72 commit 621a17e
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 118 deletions.
57 changes: 23 additions & 34 deletions BioPythonUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
import sys
import os
import json
# BioPython 1.68 is bundled with this package
sys.path.append(os.path.dirname(__file__))
from Bio import SeqIO, Entrez
Expand All @@ -14,13 +15,15 @@
from Bio.Alphabet import IUPAC
from Bio.Blast import NCBIWWW

# Use globals which can be set by show_quick_panel()
# Get BLAST details from the JSON config file
app_info = json.load(open(os.path.join(os.path.dirname(__file__),
"config.json")))
blast_formats = app_info['blast_formats']
blast_info = app_info['blast_info']
# Globals which are used or set by show_quick_panel()
blast_db = None
blast_app = None
blast_format = None
blast_apps = ['blastp', 'blastn', 'blastx', 'tblastn', 'tblastx']
blast_formats = ['HTML', 'Text', 'ASN.1', 'XML']
blast_dbs = ['nr', 'refseq', 'swissprot', 'pat', 'month', 'pdb', 'env_nr']


# "Download Sequence by Search"
Expand All @@ -31,10 +34,6 @@ def run(self, edit):
entrez_retmax = sublime.load_settings(
'BioPythonUtils.sublime-settings').get('entrez_retmax')

# Default is 20
if not entrez_retmax:
entrez_retmax = 20

email_for_eutils = sublime.load_settings(
'BioPythonUtils.sublime-settings').get('email_for_eutils')

Expand Down Expand Up @@ -403,14 +402,6 @@ def run(self, edit):
sublime.error_message("No BLAST database specified")
return

if not blast_app:
sublime.error_message("No BLAST application specified")
return

if not blast_format:
sublime.error_message("No BLAST format specified")
return

# 1 page is written for each report if there are multiple selections
for region in self.view.sel():
seq_str = self.view.substr(region)
Expand Down Expand Up @@ -451,43 +442,41 @@ def run(self, edit):
class SelectBlastDatabase(sublime_plugin.WindowCommand):

def run(self):
global blast_dbs
sublime.active_window().show_quick_panel(blast_dbs, setBlastDatabase)
sublime.active_window().show_quick_panel(
blast_info[blast_app], setBlastDatabase)


def setBlastDatabase(index):
global blast_db
if index > -1:
blast_db = blast_info[blast_app][index]


class SelectBlastApplication(sublime_plugin.WindowCommand):

def run(self):
global blast_apps
sublime.active_window().show_quick_panel(
blast_apps, setBlastApplication)
list(blast_info.keys()), setBlastApplication)


def setBlastApplication(index):
global blast_app
if index > -1:
blast_app = list(blast_info.keys())[index]


class SelectBlastFormat(sublime_plugin.WindowCommand):

def run(self):
global blast_formats
sublime.active_window().show_quick_panel(blast_formats, setBlastFormat)


def setBlastFormat(index):
global blast_format, blast_formats
global blast_format
if index > -1:
blast_format = blast_formats[index]


def setBlastDatabase(index):
global blast_db, blast_dbs
if index > -1:
blast_db = blast_dbs[index]


def setBlastApplication(index):
global blast_app, blast_apps
if index > -1:
blast_app = blast_apps[index]


def validate_nt(seq):
# Valid: {'G', 'T', 'U', 'C', 'A'}
valid_bases = set(IUPAC.unambiguous_dna.letters +
Expand Down
10 changes: 5 additions & 5 deletions BioPythonUtils.sublime-settings
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"email_for_eutils": "", // This email address will be used for EUtils queries
"entrez_retmax": "", // Maximum number of Entrez records downloaded
"remote_blast_app": "", // blastp, blastn, blastx, tblastn, tblastx
"remote_blast_format": "", // HTML, Text, ASN.1, XML
"remote_blast_db": "" // nr, refseq, swissprot, pat, month, pdb, env_nr
"email_for_eutils": "", // This email address will be used for EUtils queries
"entrez_retmax": "1000", // Maximum number of Entrez records downloaded
"remote_blast_app": "blastp", // blastp, blastn, blastx, tblastn, tblastx
"remote_blast_format": "Text", // HTML, Text, ASN.1, XML
"remote_blast_db": "nr" // Will depend on the BLAST application
}
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ Downloads a taxon as GenBank format entries from [NCBI](http://www.ncbi.nlm.nih.

#### "Remote BLAST"

Sends the selected Fasta format or "plain" sequence(s) to the [BLAST server at NCBI](http://blast.ncbi.nlm.nih.gov/Blast.cgi) and retrieves the results. Set the application, database, and result format using the Command Palette. You can also set default values
for these in your "Settings - User" file ("remote_blast_app", "remote_blast_db", "remote_blast_format").
Sends the selected Fasta format or "plain" sequence(s) to the [BLAST server at NCBI](http://blast.ncbi.nlm.nih.gov/Blast.cgi) and retrieves the results. You can set the application, database, and result format using the Command Palette. You can also set some default values in your "Settings - User" file ("remote_blast_app", "remote_blast_format"). Note that the available databases changes depending on the BLAST application.

#### "Genbank To Fasta"

Expand Down
160 changes: 84 additions & 76 deletions config.json
Original file line number Diff line number Diff line change
@@ -1,79 +1,87 @@
{
"blastn": [
"alu_repeats",
"Bacteria and Archaea",
"chromosome",
"dbsts",
"est",
"gss",
"HTGS",
"nr/nt",
"pat",
"pdb",
"refseq_gene",
"refseq_genomic",
"refseq_representative_genomes",
"refseq_rna",
"SRA",
"TSA",
"wgs"
"blast_formats": [
"HTML",
"Text",
"ASN.1",
"XML"
],
"blastp": [
"env_nr",
"landmark"
"nr",
"pat",
"pdb",
"refseq_protein",
"swissprot",
"tsa_nr",
],
"blastx": [
"env_nr",
"landmark"
"nr",
"pat",
"pdb",
"refseq_protein",
"swissprot",
"tsa_nr",
],
"tblastn": [
"alu_repeats",
"Bacteria and Archaea",
"chromosome",
"dbsts",
"est",
"gss",
"HTGS",
"nr/nt",
"pat",
"pdb",
"refseq_gene",
"refseq_genomic",
"refseq_representative_genomes",
"refseq_rna",
"SRA",
"TSA",
"wgs"
],
"tblastx": [
"alu_repeats",
"Bacteria and Archaea",
"chromosome",
"dbsts",
"est",
"gss",
"HTGS",
"nr/nt",
"pat",
"pdb",
"refseq_gene",
"refseq_genomic",
"refseq_representative_genomes",
"refseq_rna",
"SRA",
"TSA",
"wgs"
]
"blast_info": {
"blastn": [
"alu_repeats",
"Bacteria and Archaea",
"chromosome",
"dbsts",
"est",
"gss",
"HTGS",
"nt",
"pat",
"pdb",
"refseq_gene",
"refseq_genomic",
"refseq_representative_genomes",
"refseq_rna",
"SRA",
"TSA",
"wgs"
],
"blastp": [
"env_nr",
"landmark",
"nr",
"pat",
"pdb",
"refseq_protein",
"swissprot",
"tsa_nr"
],
"blastx": [
"env_nr",
"landmark",
"nr",
"pat",
"pdb",
"refseq_protein",
"swissprot",
"tsa_nr"
],
"tblastn": [
"alu_repeats",
"Bacteria and Archaea",
"chromosome",
"dbsts",
"est",
"gss",
"HTGS",
"nt",
"pat",
"pdb",
"refseq_gene",
"refseq_genomic",
"refseq_representative_genomes",
"refseq_rna",
"SRA",
"TSA",
"wgs"
],
"tblastx": [
"alu_repeats",
"Bacteria and Archaea",
"chromosome",
"dbsts",
"est",
"gss",
"HTGS",
"nt",
"pat",
"pdb",
"refseq_gene",
"refseq_genomic",
"refseq_representative_genomes",
"refseq_rna",
"SRA",
"TSA",
"wgs"
]
}
}
2 changes: 1 addition & 1 deletion package-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
],
"description": "BioPython Utilities for Sublime Text 3",
"url": "https://github.com/bosborne/BioPythonUtils",
"version": "2017-01-19_12-21"
"version": "2017-02-18_12-21"
}
}
20 changes: 20 additions & 0 deletions test/test.fa
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,23 @@ IDGWYGFRHQNAEGTGTAADLKSTQAAIDQINGKLNRLIEKTNEKYHQIEKEFEQVEGRI
QDLEKYVEDTKIDLWSYNAELLVALENQHTIDVTDSEMNKLFERVRRQLRENAEDKGNGC
FEIFHQCDNNCIESIRNGTYDHDIYRDEAINNRFQIQGVKLTQGYKDIILWISFSISCFL
LVALLLAFILWACQNGNIRCQICI
>S000320704 uncultured eubacterium TRA2-10; AF047642
GACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGgtttataagggc
ttgcccttatagatAGTGGCGAACGGGTGCGTAACACGTGAGCAACCTGCCCCAAAGTTTGGAATAACACCGGGAAACCG
ATGCTAATACCAAATATGcTCACACTATCACAAGATAGAGTGAgGAAAGTtttTCGCTTTGGGAGGGGCTCGCGGCCTAT
CAGCTTGTTGGTGAGGTAACGGCTCACCAAGGCATCGACGGGTAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTG
AGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCG
TGGAGGATGAAGGCCTTAGGGTCGTAAACTCCTTTCAGCAGGAACGAaaaTGACGGTACCTGCAGAAGAAGCTCCGGCCA
ACTACGTGCCAGCAGCCGCGGTAATACGTAGGGAGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGC
TTGGCAAGTCGGATGTGAAACCCCCAGGCTTAACCTGGGGCCGCCATTCGATACTGCTAtGGCTTGAGTTCGGTAGGGGA
TTGTGGAATTCCCGGTGTAGCGATGAAATGCGCAGATATCGGGAGGAACACCAATGGCGAAGGCAGCAATCTGGGCCGAC
ACTGACGCTGAGGAGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGTTGGGCACTA
GGTGTGGGACctacttcgacggGTTCCGTGCCGTAGCTAACGCATTAAGTGCCCCGCCTGGGGAGTACGGCCGCAAGGCT
AAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGTGGCTTAATTCGATGCAACGCGAAGAACCTCACC
TGGGCTTGACATGTTGGGaAAAGCCGTAGAGATACGGTGtccattagggCCCTT-CACAGGTGGTGCATGGCTGTCGTCA
GCTCGTGTCGTGAGATGTTGAGTTAAGTCCCGCAACGAGCGCAACCCTTGTCTTATGTTACCAGCGagtaatgtCGGGGA
CTCATGAGAGACTGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGCCCCTTATGTCCAGGGCTGC
ACACATGCTACAATGGCCGGTACAGAGGGTCGCAATCCCGCGAGGGGGAGCTAATCCCACAAAGCCGGTCTCAGTTCGGA
TCGCAGTCTGCAACTCGACTGCGTGAAGCCGGAGTCGCTAGTAATCCCGAATCAGCaTTGTCGGGGTGAATACGTTCCCG
GGCCTTGTACACACCGCCCGTCACACCACGAAAGTCGGCAACACCCGAAGCCGGTggcccaaccagtaatggagggaGCC

0 comments on commit 621a17e

Please sign in to comment.