Skip to content

Commit

Permalink
update to HLAminer
Browse files Browse the repository at this point in the history
Ability to stream the (.sam) output of modern read  aligners, directly
into HLAminer.
Initial support for predicting HLA types from long nanopore reads such
as those from Oxford Nanopore Technologies.
Better information/sub-routine/date tracking in hlaminer
  • Loading branch information
warrenlr committed Oct 6, 2018
1 parent ca00182 commit c00effa
Show file tree
Hide file tree
Showing 211 changed files with 2,626,927 additions and 14 deletions.
743 changes: 743 additions & 0 deletions HLAminer-1.4/bin/HLAminer.pl

Large diffs are not rendered by default.

639 changes: 639 additions & 0 deletions HLAminer-1.4/bin/HLAminer_v1-2.pl

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions HLAminer-1.4/bin/HPRArnaseq_classI-II.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### Run bwa or your favorite short read aligner
echo "Running bwa..."
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA-I_II_CDS.fasta rd1.fq > aln_test.1.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA-I_II_CDS.fasta rd2.fq > aln_test.2.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa sampe -o 1000 ../database/HLA-I_II_CDS.fasta aln_test.1.sai aln_test.2.sai rd1.fq rd2.fq > aln.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -h ../database/HLA-I_II_CDS.fasta -s 500
7 changes: 7 additions & 0 deletions HLAminer-1.4/bin/HPRArnaseq_classI-II_SE.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### Run bwa or your favorite short read aligner
echo "Running bwa..."
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA-I_II_CDS.fasta rd1.fq > aln_test.1.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa samse ../database/HLA-I_II_CDS.fasta aln_test.1.sai rd1.fq > aln.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -e 1 -h ../database/HLA-I_II_CDS.fasta -s 500
16 changes: 16 additions & 0 deletions HLAminer-1.4/bin/HPRArnaseq_classI.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
### Run bwa or your favorite short read aligner
echo "Running bwa v0.7.17-r1188"
bwa aln -e 0 -o 0 ../database/HLA_ABC_CDS.fasta rd1.fq > aln_test.1.sai
bwa aln -e 0 -o 0 ../database/HLA_ABC_CDS.fasta rd2.fq > aln_test.2.sai
bwa sampe -o 1000 ../database/HLA_ABC_CDS.fasta aln_test.1.sai aln_test.2.sai rd1.fq rd2.fq > aln.sam

#echo "Running bwa mem..."
#/gsc/btl/linuxbrew/bin/bwa mem -a ../database_bwamem/HLA_ABC_CDS.fasta rd1.fq rd2.fq > TEST_vs_HLA.sam
#echo "Fixing MD tag..."
#/gsc/btl/linuxbrew/bin/samtools fillmd -S TEST_vs_HLA.sam ../database_bwamem/HLA_ABC_CDS.fasta > aln.sam

### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -h ../database/HLA_ABC_CDS.fasta -s 500
mv HLAminer_HPRA.csv HLAminer_HPRArun.csv
mv HLAminer_HPRA.log HLAminer_HPRArun.log
7 changes: 7 additions & 0 deletions HLAminer-1.4/bin/HPRArnaseq_classI_SE.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### Run bwa or your favorite short read aligner
echo "Running bwa..."
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA_ABC_CDS.fasta rd1.fq > aln_test.1.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa samse ../database/HLA_ABC_CDS.fasta aln_test.1.sai rd1.fq > aln.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -e 1 -h ../database/HLA_ABC_CDS.fasta -s 500
14 changes: 14 additions & 0 deletions HLAminer-1.4/bin/HPRArnaseq_pacbioSEclassI.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
### Run bwa or your favorite short read aligner
#echo "Building bwa index..."
#/gsc/btl/linuxbrew/bin/bwa index HLA_ABC_CDS.fasta
echo "Downloading MCF-7 pacbio RNA-seq reads..."
wget http://datasets.pacb.com.s3.amazonaws.com/2013/IsoSeqHumanMCF7Transcriptome/IsoSeq_MCF7_polished.unimapped.fasta
echo "Running bwa mem..."
/gsc/btl/linuxbrew/bin/bwa mem -x pacbio ../database_bwamem/HLA_ABC_CDS.fasta IsoSeq_MCF7_polished.unimapped.fasta > MCF7_vs_HLA.sam
echo "Fixing MD tag..."
/gsc/btl/linuxbrew/bin/samtools fillmd -S MCF7_vs_HLA.sam ../database_bwamem/HLA_ABC_CDS.fasta > MCF7_vs_HLAmd.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a MCF7_vs_HLAmd.sam -h ../database_bwamem/HLA_ABC_CDS.fasta -s 500 -q 1 -i 1 -e 1
mv HLAminer_HPRA.csv HLAminer_HPRA_MCF-7.csv
mv HLAminer_HPRA.log HLAminer_HPRA_MCF-7.log
10 changes: 10 additions & 0 deletions HLAminer-1.4/bin/HPRAwgs_ONTclassI-IIdemo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
echo "Predicting HLA from raw ONT reads for human NA19240..."
echo "Fetching raw ONT promethion ERR2585115 data (Yoruban individual NA19240)..."
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR258/005/ERR2585115/ERR2585115.fastq.gz
echo "Fetching database for alignment..."
wget http://www.bcgsc.ca/downloads/btl/hlaminer/GCA_000001405.15_GRCh38_genomic.chr-only-noChr6-HLA-I_II_GEN.fa.gz
####
echo "Running minimap2 (minimap2-2.12-r827) and HLAminer v1.4 combined ..."
/usr/bin/time -v -o minimap_hlaminerERR2585115_GEN.time minimap2 -t 60 -ax map-ont --MD GCA_000001405.15_GRCh38_genomic.chr-only-noChr6-HLA-I_II_GEN.fa.gz ERR2585115.fastq.gz | ../bin/HLAminer.pl -h ../database/HLA-I_II_GEN.fasta -s 500 -q 1 -i 1 -p ../database/hla_nom_p.txt -a stream
mv HLAminer_HPRA.csv HLAminer_HPRA_ERR2585115.csv
mv HLAminer_HPRA.log HLAminer_HPRA_ERR2585115.log
8 changes: 8 additions & 0 deletions HLAminer-1.4/bin/HPRAwgs_classI-II.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### Run bwa or your favorite short read aligner
echo "Running bwa..."
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA-I_II_GEN.fasta rd1.fq > aln_test.1.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA-I_II_GEN.fasta rd2.fq > aln_test.2.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa sampe -o 1000 ../database/HLA-I_II_GEN.fasta aln_test.1.sai aln_test.2.sai rd1.fq rd2.fq > aln.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -h ../database/HLA-I_II_GEN.fasta -s 500
7 changes: 7 additions & 0 deletions HLAminer-1.4/bin/HPRAwgs_classI-II_SE.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### Run bwa or your favorite short read aligner
echo "Running bwa..."
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA-I_II_GEN.fasta rd1.fq > aln_test.1.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa samse ../database/HLA-I_II_GEN.fasta aln_test.1.sai rd1.fq > aln.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -e 1 -h ../database/HLA-I_II_GEN.fasta -s 500
8 changes: 8 additions & 0 deletions HLAminer-1.4/bin/HPRAwgs_classI.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### Run bwa or your favorite short read aligner
echo "Running bwa..."
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA_ABC_GEN.fasta rd1.fq > aln_test.1.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA_ABC_GEN.fasta rd2.fq > aln_test.2.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa sampe -o 1000 ../database/HLA_ABC_GEN.fasta aln_test.1.sai aln_test.2.sai rd1.fq rd2.fq > aln.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -h ../database/HLA_ABC_GEN.fasta -s 500
7 changes: 7 additions & 0 deletions HLAminer-1.4/bin/HPRAwgs_classI_SE.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### Run bwa or your favorite short read aligner
echo "Running bwa..."
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa aln -e 0 -o 0 ../database/HLA_ABC_GEN.fasta rd1.fq > aln_test.1.sai
/home/pubseq/BioSw/bwa/bwa-0.5.9/bwa samse ../database/HLA_ABC_GEN.fasta aln_test.1.sai rd1.fq > aln.sam
### Predict HLA
echo "Predicting HLA..."
../bin/HLAminer.pl -a aln.sam -e 1 -h ../database/HLA_ABC_GEN.fasta -s 500
18 changes: 18 additions & 0 deletions HLAminer-1.4/bin/HPTASRrnaseq_classI-II.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
###Run TASR
echo "Running TASR..."
#TASR Default is -k 15 for recruiting reads. You may increase k, as long as k < L/2 where L is the minimum shotgun read length
../bin/TASR -f patient.fof -m 20 -k 20 -s ../database/HLA-I_II_CDS.fasta -i 1 -b TASRhla -w 1
###Restrict 200nt+ contigs
cat TASRhla.contigs |perl -ne 'if(/size(\d+)/){if($1>=200){$flag=1;print;}else{$flag=0;}}else{print if($flag);}' > TASRhla200.contigs
###Create a [NCBI] blastable database
echo "Formatting blastable database..."
../bin/formatdb -p F -i TASRhla200.contigs
###Align HLA contigs to references
echo "Aligning TASR contigs to HLA references..."
../bin/parseXMLblast.pl -c ncbiBlastConfig.txt -d ../database/HLA-I_II_CDS.fasta -i TASRhla200.contigs -o 0 -a 1 > tig_vs_hla-ncbi.coord
###Align HLA references to contigs
echo "Aligning HLA references to TASR contigs (go have a coffee, it may take a while)..."
../bin/parseXMLblast.pl -c ncbiBlastConfig.txt -i ../database/HLA-I_II_CDS.fasta -d TASRhla200.contigs -o 0 > hla_vs_tig-ncbi.coord
###Predict HLA alleles
echo "Predicting HLA alleles..."
../bin/HLAminer.pl -b tig_vs_hla-ncbi.coord -r hla_vs_tig-ncbi.coord -c TASRhla200.contigs -h ../database/HLA-I_II_CDS.fasta
18 changes: 18 additions & 0 deletions HLAminer-1.4/bin/HPTASRrnaseq_classI.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
###Run TASR
echo "Running TASR..."
#TASR Default is -k 15 for recruiting reads. You may increase k, as long as k < L/2 where L is the minimum shotgun read length
../bin/TASR -f patient.fof -m 20 -k 20 -s ../database/HLA_ABC_CDS.fasta -i 1 -b TASRhla -w 1
###Restrict 200nt+ contigs
cat TASRhla.contigs |perl -ne 'if(/size(\d+)/){if($1>=200){$flag=1;print;}else{$flag=0;}}else{print if($flag);}' > TASRhla200.contigs
###Create a [NCBI] blastable database
echo "Formatting blastable database..."
../bin/formatdb -p F -i TASRhla200.contigs
###Align HLA contigs to references
echo "Aligning TASR contigs to HLA references..."
../bin/parseXMLblast.pl -c ncbiBlastConfigO.txt -d ../database/HLA_ABC_CDS.fasta -i TASRhla200.contigs -o 0 -a 1 > tig_vs_hla-ncbi.coord
###Align HLA references to contigs
echo "Aligning HLA references to TASR contigs (go have a coffee, it may take a while)..."
../bin/parseXMLblast.pl -c ncbiBlastConfigO.txt -i ../database/HLA_ABC_CDS.fasta -d TASRhla200.contigs -o 0 > hla_vs_tig-ncbi.coord
###Predict HLA alleles
echo "Predicting HLA alleles..."
../bin/HLAminer.pl -b tig_vs_hla-ncbi.coord -r hla_vs_tig-ncbi.coord -c TASRhla200.contigs -h ../database/HLA_ABC_CDS.fasta
18 changes: 18 additions & 0 deletions HLAminer-1.4/bin/HPTASRwgs_classI-II.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
###Run TASR
echo "Running TASR..."
#TASR Default is -k 15 for recruiting reads. You may increase k, as long as k < L/2 where L is the minimum shotgun read length
../bin/TASR -f patient.fof -m 20 -k 20 -s ../database/HLA-I_II_GEN.fasta -i 1 -b TASRhla -w 1
###Restrict 200nt+ contigs
cat TASRhla.contigs |perl -ne 'if(/size(\d+)/){if($1>=200){$flag=1;print;}else{$flag=0;}}else{print if($flag);}' > TASRhla200.contigs
###Create a [NCBI] blastable database
echo "Formatting blastable database..."
../bin/formatdb -p F -i TASRhla200.contigs
###Align contigs against database
echo "Aligning TASR contigs to HLA references..."
../bin/parseXMLblast.pl -c ncbiBlastConfig.txt -d ../database/HLA-I_II_GEN.fasta -i TASRhla200.contigs -o 0 -a 1 > tig_vs_hla-ncbi.coord
###Align HLA references to contigs
echo "Aligning HLA references to TASR contigs (go have a coffee, it may take a while)..."
../bin/parseXMLblast.pl -c ncbiBlastConfig.txt -i ../database/HLA-I_II_GEN.fasta -d TASRhla200.contigs -o 0 > hla_vs_tig-ncbi.coord
###Predict HLA alleles
echo "Predicting HLA alleles..."
../bin/HLAminer.pl -b tig_vs_hla-ncbi.coord -r hla_vs_tig-ncbi.coord -c TASRhla200.contigs -h ../database/HLA-I_II_GEN.fasta
18 changes: 18 additions & 0 deletions HLAminer-1.4/bin/HPTASRwgs_classI.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
###Run TASR
echo "Running TASR..."
#TASR Default is -k 15 for recruiting reads. You may increase k, as long as k < L/2 where L is the minimum shotgun read length
../bin/TASR -f patient.fof -m 20 -k 20 -s ../database/HLA_ABC_GEN.fasta -i 1 -b TASRhla -w 1
###Restrict 200nt+ contigs
cat TASRhla.contigs |perl -ne 'if(/size(\d+)/){if($1>=200){$flag=1;print;}else{$flag=0;}}else{print if($flag);}' > TASRhla200.contigs
###Create a [NCBI] blastable database
echo "Formatting blastable database..."
../bin/formatdb -p F -i TASRhla200.contigs
###Align contigs against database
echo "Aligning TASR contigs to HLA references..."
../bin/parseXMLblast.pl -c ncbiBlastConfig.txt -d ../database/HLA_ABC_GEN.fasta -i TASRhla200.contigs -o 0 -a 1 > tig_vs_hla-ncbi.coord
###Align HLA references to contigs
echo "Aligning HLA references to TASR contigs (go have a coffee, it may take a while)..."
../bin/parseXMLblast.pl -c ncbiBlastConfig.txt -i ../database/HLA_ABC_GEN.fasta -d TASRhla200.contigs -o 0 > hla_vs_tig-ncbi.coord
###Predict HLA alleles
echo "Predicting HLA alleles..."
../bin/HLAminer.pl -b tig_vs_hla-ncbi.coord -r hla_vs_tig-ncbi.coord -c TASRhla200.contigs -h ../database/HLA_ABC_GEN.fasta
Loading

0 comments on commit c00effa

Please sign in to comment.