Skip to content

Commit b5fc407

Browse files
committed
Merge branch 'release/2.5.1'
2 parents 28cc0ac + 601d5de commit b5fc407

32 files changed

+355
-336
lines changed

CHANGES.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
2.5.1
2+
3+
* Added universal new line support to deeptoolsintervals (issue #506).
4+
* Fixed a few issues with correctGCBias under python 3.5 (thanks to @drakeeee)
5+
* Setting `--minThreshold 0.0` or `--maxThreshold 0.0` now works properly. Previously, setting either of these to 0 was ignored. (issue #516)
6+
* You can now specify the plot width and height in `plotPCA` and `plotCorrelation` (heatmap only) with the `--plotWidth` and `--plotHeight` parameters. (issue #507)
7+
* plotCoverage no longer clips the top off of plots. Further, you can now set the plot width and height with `--plotWidth` and `--plotHeight`. (issue #508)
8+
* In bamCoverage, specifying `--filterRNAstrand` no longer results in `--extendReads` being ignored. (issue #520)
9+
* `plotFingerprint` and `plotEnrichment` no longer require producing a plot, which is useful if you only need QC metrics and are using a LOT of samples (such that matplotlib would crash anyway). This hasn't been implemented in Galaxy, but can if people would like it. (issues #519 and #526)
10+
* `computeMatrix` now accepts a `--samplesLabel` option, which is useful in those cases when you aren't immediately running `plotHeatmap` and don't have terribly descriptive file names (issue #523)
11+
* If you use `plotFingerprint` with the `--JSDsample` option and forget to list that file under `--bamfiles` it will be added automatically and the file name added to the labels if needed (issue #527)
12+
* Various Galaxy wrapper fixes
13+
114
2.5.0
215

316
* Fix a bug where using regions with the same name in multiple BED files in computeMatrix caused downstream problems in plotHeatmap/plotProfile (issue #477).

deeptools/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# This file is originally generated from Git information by running 'setup.py
33
# version'. Distribution tarballs contain a pre-generated copy of this file.
44

5-
__version__ = '2.5.0.1'
5+
__version__ = '2.5.1'

deeptools/bamCoverage.py

Lines changed: 4 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ def main(args=None):
148148

149149
func_args = {'scaleFactor': scale_factor}
150150

151+
# This fixes issue #520, where --extendReads wasn't honored if --filterRNAstrand was used
152+
if args.filterRNAstrand and not args.Offset:
153+
args.Offset = [1, -1]
154+
151155
if args.MNase:
152156
# check that library is paired end
153157
# using getFragmentAndReadSize
@@ -210,26 +214,6 @@ def main(args=None):
210214
verbose=args.verbose)
211215
wr.filter_strand = args.filterRNAstrand
212216
wr.Offset = args.Offset
213-
214-
elif args.filterRNAstrand:
215-
wr = filterRnaStrand([args.bam],
216-
binLength=args.binSize,
217-
stepSize=args.binSize,
218-
region=args.region,
219-
numberOfProcessors=args.numberOfProcessors,
220-
extendReads=args.extendReads,
221-
minMappingQuality=args.minMappingQuality,
222-
ignoreDuplicates=args.ignoreDuplicates,
223-
center_read=args.centerReads,
224-
zerosToNans=args.skipNonCoveredRegions,
225-
samFlag_include=args.samFlagInclude,
226-
samFlag_exclude=args.samFlagExclude,
227-
minFragmentLength=args.minFragmentLength,
228-
maxFragmentLength=args.maxFragmentLength,
229-
verbose=args.verbose,
230-
)
231-
232-
wr.filter_strand = args.filterRNAstrand
233217
else:
234218
wr = writeBedGraph.WriteBedGraph([args.bam],
235219
binLength=args.binSize,
@@ -399,58 +383,3 @@ def get_fragment_from_read(self, read):
399383
fragment_end = fragment_start + 3
400384

401385
return [(fragment_start, fragment_end)]
402-
403-
404-
class filterRnaStrand(writeBedGraph.WriteBedGraph):
405-
"""
406-
Class to redefine the get_fragment_from_read for the --filterRNAstrand case
407-
408-
Only reads either forward or reverse are kept as follows:
409-
410-
For paired-end
411-
--------------
412-
reads forward:
413-
414-
1. alignments of the second in pair (128) if they map to the forward strand (~16)
415-
2. alignments of the first in pair (64) if they map to the reverse strand (~32)
416-
417-
1. include 128, exclude 16
418-
or
419-
2. include 64 exclude 32
420-
421-
reads reverse:
422-
1. alignments of the second in pair (128) if it maps to the reverse strand (16) 128 & 16 = 144
423-
2. alignments of the first in pair (64) if their mates map to the reverse strand (32) 64 & 32 = 96
424-
425-
1. include 144
426-
or
427-
2. include 96
428-
429-
For single-end
430-
--------------
431-
forward: include 16 (map forward strand)
432-
reverse: exclude 16
433-
434-
"""
435-
436-
def get_fragment_from_read(self, read):
437-
"""
438-
Gets only reads for the given strand
439-
"""
440-
fragment_start = fragment_end = None
441-
442-
# only paired forward reads are considered
443-
if read.is_paired:
444-
if self.filter_strand == 'forward':
445-
if (read.flag & 128 == 128 and read.flag & 16 == 0) or (read.flag & 64 == 64 and read.flag & 32 == 0):
446-
return read.get_blocks()
447-
else:
448-
if read.flag & 144 == 144 or read.flag & 96 == 96:
449-
return read.get_blocks()
450-
else:
451-
if self.filter_strand == 'forward' and read.flag & 16 == 16:
452-
return read.get_blocks()
453-
elif self.filter_strand == 'reverse' and read.flag & 16 == 0:
454-
return read.get_blocks()
455-
456-
return [(fragment_start, fragment_end)]

deeptools/bamPEFragmentSize.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,18 @@ def parse_arguments():
8383
return parser
8484

8585

86-
def getFragSize(bam, args):
86+
def getFragSize(bam, args, idx):
8787
fragment_len_dict, read_len_dict = get_read_and_fragment_length(bam, return_lengths=True,
8888
blackListFileName=args.blackListFileName,
8989
numberOfProcessors=args.numberOfProcessors,
9090
verbose=args.verbose,
9191
binSize=args.binSize,
9292
distanceBetweenBins=args.distanceBetweenBins)
93-
print("\n\nBAM file : {}".format(bam))
93+
if args.samplesLabel and idx < len(args.samplesLabel):
94+
print("\n\nSample label: {}".format(args.samplesLabel[idx]))
95+
else:
96+
print("\n\nBAM file : {}".format(bam))
97+
9498
if fragment_len_dict:
9599
if fragment_len_dict['mean'] == 0:
96100
print("No pairs were found. Is the data from a paired-end sequencing experiment?")
@@ -125,8 +129,8 @@ def main(args=None):
125129
args = parse_arguments().parse_args(args)
126130

127131
fraglengths = {}
128-
for bam in args.bamfiles:
129-
fraglengths[bam] = getFragSize(bam, args)
132+
for idx, bam in enumerate(args.bamfiles):
133+
fraglengths[bam] = getFragSize(bam, args, idx)
130134

131135
if args.histogram:
132136
import matplotlib

deeptools/computeMatrix.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,14 @@ def computeMatrixOptArgs(case=['scale-regions', 'reference-point'][0]):
316316
metavar="BED file",
317317
required=False)
318318

319+
optional.add_argument('--samplesLabel',
320+
help='Labels for the samples. This will then be passed to plotHeatmap and plotProfile. The '
321+
'default is to use the file name of the '
322+
'sample. The sample labels should be separated '
323+
'by spaces and quoted if a label itself'
324+
'contains a space E.g. --samplesLabel label-1 "label 2" ',
325+
nargs='+')
326+
319327
# in contrast to other tools,
320328
# computeMatrix by default outputs
321329
# messages and the --quiet flag supresses them

deeptools/correctGCBias.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,16 @@ def writeCorrected_worker(chrNameBam, chrNameBit, start, end, step):
237237

238238
cvg_corr[vectorStart:vectorEnd] += float(1) / R_gc[gc]
239239
i += 1
240-
if debug:
241-
endTime = time.time()
242-
print("{}, processing {} ({:.1f} per sec) ")
243-
"reads @ {}:{}-{}".format(multiprocessing.current_process().name,
244-
i, i / (endTime - startTime),
245-
chrNameBit, start, end)
240+
241+
try:
242+
if debug:
243+
endTime = time.time()
244+
print("{}, processing {} ({:.1f} per sec) ")
245+
"reads @ {}:{}-{}".format(multiprocessing.current_process().name,
246+
i, i / (endTime - startTime),
247+
chrNameBit, start, end)
248+
except NameError:
249+
pass
246250

247251
if i == 0:
248252
return None
@@ -661,7 +665,7 @@ def main(args=None):
661665
res = list(map(writeCorrected_wrapper, mp_args))
662666

663667
# concatenate intermediary bedgraph files
664-
_temp_bg_file = open(_temp_bg_file_name, 'w')
668+
_temp_bg_file = open(_temp_bg_file_name, 'wb')
665669
for tempFileName in res:
666670
if tempFileName:
667671
# concatenate all intermediate tempfiles into one

deeptools/correlation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def compute_correlation(self):
227227

228228
def plot_correlation(self, plot_fiilename, plot_title='', vmax=None,
229229
vmin=None, colormap='jet', image_format=None,
230-
plot_numbers=False):
230+
plot_numbers=False, plotWidth=11, plotHeight=9.5):
231231
"""
232232
plots a correlation using a symmetric heatmap
233233
"""
@@ -248,7 +248,7 @@ def plot_correlation(self, plot_fiilename, plot_title='', vmax=None,
248248
vmin = 0 if corr_matrix .min() >= 0 else -1
249249

250250
# Compute and plot dendrogram.
251-
fig = plt.figure(figsize=(11, 9.5))
251+
fig = plt.figure(figsize=(plotWidth, plotHeight))
252252
plt.suptitle(plot_title)
253253

254254
axdendro = fig.add_axes([0.02, 0.12, 0.1, 0.66])
@@ -431,12 +431,12 @@ def plot_scatter(self, plot_fiilename, plot_title='', image_format=None, log1p=F
431431
plt.savefig(plot_fiilename, format=image_format)
432432
plt.close()
433433

434-
def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False):
434+
def plot_pca(self, plot_filename, plot_title='', image_format=None, log1p=False, plotWidth=5, plotHeight=10):
435435
"""
436436
Plot the PCA of a matrix
437437
"""
438438

439-
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5, 10))
439+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(plotWidth, plotHeight))
440440
# PCA
441441
if self.rowCenter:
442442
_ = self.matrix.mean(axis=1)

deeptools/getFragmentAndReadSize.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,19 @@ def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFileNam
8484

8585
distanceBetweenBins *= 2
8686
fl = []
87+
88+
# Fix issue #522, allow distanceBetweenBins == 0
89+
if distanceBetweenBins == 0:
90+
imap_res = mapReduce.mapReduce((bam_handle.filename, distanceBetweenBins),
91+
getFragmentLength_wrapper,
92+
chrom_sizes,
93+
genomeChunkLength=binSize,
94+
blackListFileName=blackListFileName,
95+
numberOfProcessors=numberOfProcessors,
96+
verbose=verbose)
97+
fl = np.concatenate(imap_res)
98+
99+
# Try to ensure we have at least 1000 regions from which to compute statistics, halving the intra-bin distance as needed
87100
while len(fl) < 1000 and distanceBetweenBins > 1:
88101
distanceBetweenBins /= 2
89102
stepsize = binSize + distanceBetweenBins

deeptools/getScaleFactor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def get_scale_factor(args):
168168
bam_mapped, bam_mapped_total = get_num_kept_reads(args)
169169
if args.normalizeTo1x:
170170
# Print output, since normalzation stuff isn't printed to stderr otherwise
171-
sys.stderr.write("normalization: 1x\n")
171+
sys.stderr.write("normalization: 1x (effective genome size {})\n".format(args.normalizeTo1x))
172172

173173
# try to guess fragment length if the bam file contains paired end reads
174174
from deeptools.getFragmentAndReadSize import get_read_and_fragment_length

deeptools/heatmapper.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,7 @@ def computeMatrix(self, score_file_list, regions_file, parameters, blackListFile
271271
"matrix length does not match regions length"
272272

273273
if len(regions) == 0:
274-
sys.stderr.write(
275-
"\nERROR: BED file does not contain any valid regions. "
276-
"Please check\n")
274+
sys.stderr.write("\nERROR: Either the BED file does not contain any valid regions or there are none remaining after filtering.\n")
277275
exit(1)
278276
if regions_no_score == len(regions):
279277
exit("\nERROR: None of the BED regions could be found in the bigWig"
@@ -298,7 +296,10 @@ def computeMatrix(self, score_file_list, regions_file, parameters, blackListFile
298296
numcols = matrix.shape[1]
299297
num_ind_cols = self.get_num_individual_matrix_cols()
300298
sample_boundaries = list(range(0, numcols + num_ind_cols, num_ind_cols))
301-
sample_labels = [splitext(basename(x))[0] for x in score_file_list]
299+
if allArgs is not None and allArgs['samplesLabel'] is not None:
300+
sample_labels = allArgs['samplesLabel']
301+
else:
302+
sample_labels = [splitext(basename(x))[0] for x in score_file_list]
302303

303304
# Determine the group boundaries
304305
group_boundaries = []
@@ -555,9 +556,9 @@ def compute_sub_matrix_worker(self, chrom, start, end, score_file_list, paramete
555556
if not parameters['missing data as zero']:
556557
coverage[:] = np.nan
557558

558-
if parameters['min threshold'] and coverage.min() <= parameters['min threshold']:
559+
if parameters['min threshold'] is not None and coverage.min() <= parameters['min threshold']:
559560
continue
560-
if parameters['max threshold'] and coverage.max() >= parameters['max threshold']:
561+
if parameters['max threshold'] is not None and coverage.max() >= parameters['max threshold']:
561562
continue
562563
if parameters['scale'] != 1:
563564
coverage = parameters['scale'] * coverage

0 commit comments

Comments
 (0)