Skip to content

Commit 0c567c5

Browse files
authored
Merge pull request #47 from koriege/master
bump to v1.5.0
2 parents 9720b3f + 396fed3 commit 0c567c5

25 files changed

+2209
-1304
lines changed

activate.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#####################################################################################
1010

11+
export BASHBONE_WORKINGDIR="$PWD"
1112
export BASHBONE_DIR="$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")"
1213
export BASHBONE_VERSION=$(source "$BASHBONE_DIR/lib/version.sh"; echo $version)
1314
export BASHBONE_TOOLSDIR="${BASHBONE_TOOLSDIR:-$(dirname "$BASHBONE_DIR")}" # export to not override if previously defined
@@ -241,7 +242,7 @@ function _bashbone_trace(){
241242
line=$((line+3))
242243
cmd=$(declare -f $fun | awk -v l=$line '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
243244
else
244-
cmd=$(awk -v l=$line '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' "$src" | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
245+
cmd=$(awk -v l=$line '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' "$BASHBONE_WORKINGDIR/$src" | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
245246
fi
246247
echo ":ERROR: ${BASHBONE_ERROR:+$BASHBONE_ERROR }in ${src:-shell} (function: ${fun:-main}) @ line $line: $cmd" >&2
247248
fi
@@ -269,7 +270,7 @@ function _bashbone_trace_interactive(){
269270
if [[ $line -eq 1 ]]; then
270271
echo ":ERROR: ${BASHBONE_ERROR:+$BASHBONE_ERROR }in ${src:-shell} (function: ${fun:-main} @ line $l)" >> "$o"
271272
else
272-
cmd=$(awk -v l=$l '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' "$src" | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
273+
cmd=$(awk -v l=$l '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' "$BASHBONE_WORKINGDIR/$src" | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
273274
echo ":ERROR: ${BASHBONE_ERROR:+$BASHBONE_ERROR }in ${src:-shell} (function: ${fun:-main}) @ line $l: $cmd" >> "$o"
274275
fi
275276
done
@@ -364,8 +365,8 @@ function _bashbone_wrapper(){
364365
read -r f_bashbone_wrapper l_bashbone_wrapper s_bashbone_wrapper < <(declare -F $BASHBONE_FUNCNAME) || true
365366
if [[ "$s_bashbone_wrapper" == "$BASHBONE_DIR/lib/"* ]]; then
366367
[[ "$BASHBONE_FUNCNAME" == commander::* || "$BASHBONE_FUNCNAME" == progress::* ]] || BASHBONE_LEGACY=true
367-
else
368-
BASHBONE_LEGACY=false
368+
# else
369+
# BASHBONE_LEGACY=false
369370
fi
370371
local BASHBONE_BPID=$BASHPID
371372
local BASHBONE_CLEANUP="$(command mktemp -p "$BASHBONE_TMPDIR" cleanup.XXXXXXXXXX.sh)"

bashbone_lite.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
[[ "$(uname)" != "Linux" ]] && echo "unsupported operating system" >&2 && exit 1
77
[[ ${BASH_VERSINFO[0]} -lt 4 || (${BASH_VERSINFO[0]} -eq 4 && ${BASH_VERSINFO[1]} -lt 4) ]] && echo "requieres bash >= v4.4" >&2 && exit 1
88

9+
export BASHBONE_WORKINGDIR="$PWD"
910
export BASHBONE_DIR="$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")"
1011
export BASHBONE_EXTENSIONDIR="${BASHBONE_EXTENSIONDIR:-$BASHBONE_DIR}/lib"
1112
export BASHBONE_LEGACY=${BASHBONE_LEGACY:-false}
@@ -118,7 +119,7 @@ function mktemp(){
118119
}
119120

120121
BASHBONE_PGID=$(($(ps -o pgid= -p $BASHPID)))
121-
export TMPDIR="$(command mktemp -d -p "${TMPDIR:-/tmp}" XXXXXXXXXX)"
122+
export TMPDIR="$(command mktemp -d -p "${TMPDIR:-/tmp}" bashbone.XXXXXXXXXX)"
122123
trap '_bashbone_on_error 130 $LINENO; exit 130 &> /dev/null' INT
123124
trap '_bashbone_on_error $? $LINENO || { [[ $BASHPID -ne $BASHBONE_PGID ]] && { return 130 &> /dev/null || exit 130; } || exit 130 &> /dev/null; }' ERR
124125
trap '_bashbone_on_exit $?' EXIT
@@ -183,7 +184,7 @@ function _bashbone_trace(){
183184
line=$((line+3))
184185
cmd=$(declare -f $fun | awk -v l=$line '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
185186
else
186-
cmd=$(awk -v l=$line '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' "$src" | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
187+
cmd=$(awk -v l=$line '{ if(NR>=l){if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{print o$0; exit}}else{if($0~/\s\\\s*$/){o=o""gensub(/\\\s*$/,"",1,$0)}else{o=""}}}' "$BASHBONE_WORKINGDIR/$src" | sed -E -e 's/\s+/ /g' -e 's/(^\s+|\s+$)//g')
187188
fi
188189
echo ":ERROR: ${BASHBONE_ERROR:+$BASHBONE_ERROR }in ${src:-shell} (function: ${fun:-main}) @ line $line: $cmd" >&2
189190
fi

config/m6aviewer.url

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
http://dna2.leeds.ac.uk/m6a/m6aViewer_1_6_1.jar
1+
https://gitlab.leibniz-fli.de/kriege/m6aviewer/-/raw/main/m6aviewer.jar

lib/alignment_1.sh

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ function alignment::segemehl(){
157157
ln -sfnr "$o.sngl.bed" "$o.sj"
158158
CMD
159159
fi
160-
segemehl+=("$o.bam")
160+
segemehl[$i]="$o.bam"
161161
done
162162

163163
if $skip; then
@@ -388,14 +388,14 @@ function alignment::star(){
388388
CMD
389389
ln -sfnr "$o.SJ.out.tab" "$o.sj"
390390
CMD
391-
star+=("$o.transcriptomic.bam")
391+
star[$i]="$o.transcriptomic.bam"
392392
else
393393
commander::makecmd -a cmd2 -s ';' -c {COMMANDER[0]}<<- CMD {COMMANDER[1]}<<- CMD
394394
mv "$o.Aligned.out.bam" "$o.bam"
395395
CMD
396396
ln -sfnr "$o.SJ.out.tab" "$o.sj"
397397
CMD
398-
star+=("$o.bam")
398+
star[$i]="$o.bam"
399399
fi
400400
done
401401

@@ -492,7 +492,7 @@ function alignment::bwa(){
492492

493493
commander::printinfo "mapping bwa"
494494

495-
# use absolute path, because on some maschines sometimes bwa-mem2 aborts with error: prefix is too long
495+
# use absolute path, because on some machines sometimes bwa-mem2 aborts with error: prefix is too long
496496
declare -a cmdchk=("which bwa-mem2 &> /dev/null && which bwa-mem2 || echo bwa")
497497
local bwacmd=$(commander::runcmd -c bwa -a cmdchk)
498498

@@ -622,7 +622,7 @@ function alignment::bwa(){
622622
CMD
623623
fi
624624
fi
625-
bwa+=("$o1.bam")
625+
bwa[$i]="$o1.bam"
626626
done
627627

628628
if $skip; then
@@ -1052,7 +1052,8 @@ function alignment::_blacklist(){
10521052
-@ $threads
10531053
-b
10541054
-L "$tmpdir/whitelist.bed"
1055-
-o "$_returnfile_blacklist"
1055+
--write-index
1056+
-o "$_returnfile_blacklist##idx##${_returnfile_blacklist%.*}.bai"
10561057
"$bam"
10571058
CMD
10581059
fi
@@ -1063,7 +1064,8 @@ function alignment::_blacklist(){
10631064
-@ $threads
10641065
-b
10651066
-e 'rname!="$blacklist"'
1066-
-o "$_returnfile_blacklist"
1067+
--write-index
1068+
-o "$_returnfile_blacklist##idx##${_returnfile_blacklist%.*}.bai"
10671069
"$bam"
10681070
CMD
10691071
fi
@@ -2070,6 +2072,7 @@ function alignment::qcstats(){
20702072
-p "${tdirs[-1]}" \
20712073
-h "$(echo -e "$header")" \
20722074
-o "$odir/insertsizes.histogram.tsv" \
2075+
-d \
20732076
"${tojoin[@]}"
20742077

20752078
commander::makecmd -a cmd2 -s ' ' -c {COMMANDER[0]}<<- 'CMD' {COMMANDER[1]}<<- CMD

lib/alignment_2.sh

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ function alignment::slice(){
6262
df <- df[srt$ix,];
6363
6464
lmax <- max(len);
65-
n <- slices+1;
66-
bins <- c();
65+
bins <- c(1:nrow(df));
66+
n <- length(bins);
6767
i <- 1;
6868
while(n > slices){
6969
i <- i+1;
@@ -97,9 +97,6 @@ function alignment::slice(){
9797
o="$tdir"/$(basename "$f")
9898
o="${o%.*}"
9999

100-
# alignment::_index -1 cmd2 -t $ithreads -f "$f" -o "$o.bai"
101-
# params="-X '$o.bai'"
102-
103100
mapfile -t mapdata < <(find "$tmpdir/genome" -maxdepth 1 -type f -name "slice.*.bed" | sort -V)
104101
printf "%s\n" "${mapdata[@]}" | sed -E "s@.+\.([0-9]+)\.bed@$o.slice.\1.bam@" > "$o.slices.info"
105102
_bamslices_slice["$f"]="$o.slices.info"
@@ -264,7 +261,7 @@ function alignment::rmduplicates(){
264261
CMD
265262
| awk -v f=<(helper::cat -f "${umi_rmduplicates[$i]}" | paste - - - -)
266263
CMD
267-
-v OFS='\t' '/^@\S\S\s/{print; next}{l=$0; r="@"$1; getline < f; while(r!=$1){getline < f} print l,"RX:Z:"$(NF-2)}'
264+
-v OFS='\t' '/^@\S\S\s/{print; next}{l=$0; r="@"$1; getline < f; while(r!=$1){getline < f} print l,"RX:Z:"$(NF-2),"QX:Z:"$NF}'
268265
CMD
269266
| samtools sort
270267
-@ $ithreads
@@ -613,14 +610,15 @@ function alignment::clipmateoverlaps(){
613610
-r <mapper> | array of sorted, indexed bams within array of
614611
-c <sliceinfo> | array of
615612
-o <outdir> | path to
613+
-u | mark fully softclipped mate as unmapped to avoid e.g. downstream GATK BaseRecalibrator issue java.lang.IllegalStateException: cigar is completely soft-clipped
616614
EOF
617615
return 1
618616
}
619617

620618
local OPTIND arg mandatory skip=false threads memory maxmemory outdir
621-
declare -n _mapper_clipmateoverlaps _bamslices_clipmateoverlaps
619+
declare -n _mapper_clipmateoverlaps _bamslices_clipmateoverlaps unmapped=false
622620
declare -A nidx tidx
623-
while getopts 'S:s:t:m:M:r:c:o:' arg; do
621+
while getopts 'S:s:t:m:M:r:c:o:u' arg; do
624622
case $arg in
625623
S) $OPTARG && return 0;;
626624
s) $OPTARG && skip=true;;
@@ -630,6 +628,7 @@ function alignment::clipmateoverlaps(){
630628
r) ((++mandatory)); _mapper_clipmateoverlaps=$OPTARG;;
631629
c) ((++mandatory)); _bamslices_clipmateoverlaps=$OPTARG;;
632630
o) ((++mandatory)); outdir="$OPTARG"; mkdir -p "$outdir";;
631+
u) unmapped=true;;
633632
*) _usage;;
634633
esac
635634
done
@@ -646,7 +645,9 @@ function alignment::clipmateoverlaps(){
646645
local ithreads instances=$((${#_mapper_clipmateoverlaps[@]}*${#_bams_clipmateoverlaps[@]}))
647646
read -r instances ithreads < <(configure::instances_by_threads -i $instances -t 10 -T $threads)
648647

649-
local m i o slice odir
648+
local m i o slice odir params
649+
$unmapped && params='--unmapped' || params=''
650+
650651
declare -a tomerge cmd1 cmd2 cmd3
651652
for m in "${_mapper_clipmateoverlaps[@]}"; do
652653
declare -n _bams_clipmateoverlaps=$m
@@ -658,13 +659,13 @@ function alignment::clipmateoverlaps(){
658659

659660
while read -r slice; do
660661
# bamutil clips second read even full length, and by default does not produce unmapped reads
661-
# --unmapped marks full length clipped reads as unmapped and does fixmate afterwards
662-
# -> when combined with --excludeFlags (default:0xF0C, better: 0x80C) that includes UNMAP and MUNMAP, pairs with 2nd read fully clipped gets removed
662+
# --unmapped marks full length clipped reads as unmapped and fixes both mates flags.
663663
# use stdout with bam extension to enfoce bam output
664664
commander::makecmd -a cmd1 -s ';' -c {COMMANDER[0]}<<- CMD
665665
bam clipOverlap
666666
--in "$slice"
667667
--out -.bam
668+
$params
668669
--excludeFlags 0x0
669670
--poolSize $poolsize
670671
--stats
@@ -1407,15 +1408,18 @@ function alignment::bqsr(){
14071408

14081409
commander::printinfo "base quality score recalibration"
14091410

1410-
local minstances mthreads jmem jgct jcgct
1411+
local minstances mthreads jmem jgct jcgct minstances2 mthreads2 jmem2 jgct2 jcgct2
14111412
read -r minstances mthreads jmem jgct jcgct < <(configure::jvm -T $threads -m $memory -M "$maxmemory")
14121413

14131414
declare -n _bams_bqsr="${_mapper_bqsr[0]}"
14141415
local ithreads instances=$((${#_mapper_bqsr[@]}*${#_bams_bqsr[@]}))
1415-
read -r instances ithreads < <(configure::instances_by_threads -i $instances -t 10 -T $threads)
1416+
read -r minstances2 mthreads2 jmem2 jgct2 jcgct2 < <(configure::jvm -i $instances -T $threads -M "$maxmemory") # for gatk simple tasks like gathering
1417+
read -r instances ithreads < <(configure::instances_by_threads -i $instances -t 10 -T $threads) # for samtools
14161418

1417-
local m i o slice odir
1418-
declare -a tdirs tomerge cmd1 cmd2 cmd3 cmd4
1419+
local m i o slice odir params
1420+
[[ -s "$genome.InDel_gold.vcf.gz" ]] && params="--known-sites '$dbsnp' --known-sites '$genome.InDel_gold.vcf.gz'" || params="--known-sites '$dbsnp'"
1421+
1422+
declare -a reportfile tdirs tomerge cmd1 cmd2 cmd3 cmd4 cmd5
14191423
for m in "${_mapper_bqsr[@]}"; do
14201424
declare -n _bams_bqsr=$m
14211425
odir="$outdir/$m"
@@ -1424,6 +1428,8 @@ function alignment::bqsr(){
14241428
for i in "${!_bams_bqsr[@]}"; do
14251429
tomerge=()
14261430

1431+
reportfile="$(mktemp -p "$tmpdir" cleanup.XXXXXXXXXX.bqsreport)"
1432+
14271433
while read -r slice; do
14281434
# https://gatkforums.broadinstitute.org/gatk/discussion/7131/is-indel-realignment-removed-from-gatk4
14291435
# https://software.broadinstitute.org/gatk/blog?id=7847
@@ -1449,32 +1455,33 @@ function alignment::bqsr(){
14491455
'
14501456
BaseRecalibrator
14511457
-R "$genome"
1452-
--known-sites "$dbsnp"
1458+
$params
14531459
-I "$slice"
14541460
-O "$slice.bqsreport"
14551461
-verbosity ERROR
14561462
--tmp-dir "${tdirs[-1]}"
14571463
CMD
14581464

1459-
commander::makecmd -a cmd2 -s ';' -c {COMMANDER[0]}<<- CMD {COMMANDER[1]}<<- CMD
1465+
tdirs+=("$(mktemp -d -p "$tmpdir" cleanup.XXXXXXXXXX.gatk)")
1466+
commander::makecmd -a cmd3 -s ';' -c {COMMANDER[0]}<<- CMD {COMMANDER[1]}<<- CMD
14601467
rm -rf "$slice.bqsr.parts"
14611468
CMD
1462-
gatk
1469+
MALLOC_ARENA_MAX=4 gatk
14631470
--java-options '
14641471
-Xmx${jmem}m
14651472
-XX:ParallelGCThreads=$jgct
14661473
-XX:ConcGCThreads=$jcgct
14671474
-Djava.io.tmpdir="$tmpdir"
14681475
'
14691476
ApplyBQSR
1470-
-bqsr "$slice.bqsreport"
1477+
-bqsr "$reportfile"
14711478
-I "$slice"
14721479
-O "$slice.bqsr"
14731480
-verbosity ERROR
14741481
--tmp-dir "${tdirs[-1]}"
14751482
CMD
14761483

1477-
commander::makecmd -a cmd3 -s ';' -c {COMMANDER[0]}<<- CMD {COMMANDER[1]}<<- CMD
1484+
commander::makecmd -a cmd4 -s ';' -c {COMMANDER[0]}<<- CMD {COMMANDER[1]}<<- CMD
14781485
mv "$slice.bqsr" "$slice"
14791486
CMD
14801487
samtools index -@ $ithreads "$slice" "${slice%.*}.bai"
@@ -1483,10 +1490,25 @@ function alignment::bqsr(){
14831490
tomerge+=("$slice")
14841491
done < "${_bamslices_bqsr[${_bams_bqsr[$i]}]}"
14851492

1493+
tdirs+=("$(mktemp -d -p "$tmpdir" cleanup.XXXXXXXXXX.gatk)")
1494+
commander::makecmd -a cmd2 -s ';' -c {COMMANDER[0]}<<- CMD
1495+
MALLOC_ARENA_MAX=4 gatk
1496+
--java-options '
1497+
-Xmx${jmem2}m
1498+
-XX:ParallelGCThreads=$jgct2
1499+
-XX:ConcGCThreads=$jcgct2
1500+
-Djava.io.tmpdir="$tmpdir"
1501+
'
1502+
GatherBQSRReports
1503+
$(printf -- '-I "%s" ' "${tomerge[@]/%/.bqsreport}")
1504+
-O "$reportfile"
1505+
--tmp-dir "${tdirs[-1]}"
1506+
CMD
1507+
14861508
o="$odir/$(basename "${_bams_bqsr[$i]}")"
14871509
o="${o%.*}.bqsr.bam"
14881510
# slices have full sam header info used by merge to maintain the global sort order
1489-
commander::makecmd -a cmd4 -s '|' -c {COMMANDER[0]}<<- CMD
1511+
commander::makecmd -a cmd5 -s '|' -c {COMMANDER[0]}<<- CMD
14901512
samtools merge
14911513
-@ $ithreads
14921514
-f
@@ -1509,11 +1531,13 @@ function alignment::bqsr(){
15091531
commander::printcmd -a cmd2
15101532
commander::printcmd -a cmd3
15111533
commander::printcmd -a cmd4
1534+
commander::printcmd -a cmd5
15121535
else
15131536
commander::runcmd -c gatk -v -b -i $minstances -a cmd1
1514-
commander::runcmd -c gatk -v -b -i $minstances -a cmd2
1515-
commander::runcmd -v -b -i $instances -a cmd3
1537+
commander::runcmd -c gatk -v -b -i $minstances2 -a cmd2
1538+
commander::runcmd -c gatk -v -b -i $minstances -a cmd3
15161539
commander::runcmd -v -b -i $instances -a cmd4
1540+
commander::runcmd -v -b -i $instances -a cmd5
15171541
fi
15181542

15191543
return 0

0 commit comments

Comments
 (0)