Skip to content

Commit ba00d83

Browse files
author
Lenna Peterson
committed
Merge branch 'master' of https://github.com/jamescasbon/PyVCF into lenna
Conflicts: vcf/parser.py vcf/test/test_vcf.py
2 parents 49f8897 + d1a9fdc commit ba00d83

28 files changed

+1238
-188
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ docs/_build
1010
.DS_Store
1111
vcf/cparse.c
1212
vcf/cparse.so
13+
.coverage

.travis.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ python:
44
- "2.6"
55
- "2.7"
66
- "3.2"
7+
- "3.3"
78
- "pypy"
89
install:
9-
- "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors pysam argparse ordereddict; fi"
10-
- "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors pysam; fi"
10+
- "if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam argparse counter ordereddict; fi"
11+
- "if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pip install --use-mirrors cython && pip install --use-mirrors pysam; fi"
1112
- python setup.py install
1213
script: python setup.py test

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
recursive-include vcf *.pyx

README.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ There main interface is the class: ``Reader``. It takes a file-like
1414
object and acts as a reader::
1515

1616
>>> import vcf
17-
>>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
17+
>>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
1818
>>> for record in vcf_reader:
1919
... print record
2020
Record(CHROM=20, POS=14370, REF=G, ALT=[A])
@@ -49,7 +49,7 @@ one-entry Python lists (see, e.g., ``Record.ALT``). Semicolon-delimited lists
4949
of key=value pairs are converted to Python dictionaries, with flags being given
5050
a ``True`` value. Integers and floats are handled exactly as you'd expect::
5151

52-
>>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'rb'))
52+
>>> vcf_reader = vcf.Reader(open('vcf/test/example-4.0.vcf', 'r'))
5353
>>> record = vcf_reader.next()
5454
>>> print record.POS
5555
14370
@@ -65,10 +65,10 @@ examine properties of interest::
6565
3 1.0 0
6666
>>> print record.num_hom_ref, record.num_het, record.num_hom_alt
6767
1 1 1
68-
>>> print record.nucl_diversity, record.aaf
69-
0.6 0.5
68+
>>> print record.nucl_diversity, record.aaf, record.heterozygosity
69+
0.6 [0.5] 0.5
7070
>>> print record.get_hets()
71-
[Call(sample=NA00002, GT=1|0, HQ=[51, 51], DP=8, GQ=48)]
71+
[Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
7272
>>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
7373
True False True False
7474
>>> print record.var_type, record.var_subtype
@@ -101,7 +101,7 @@ call data in ``data``::
101101
>>> print call.sample
102102
NA00001
103103
>>> print call.data
104-
{'GT': '0|0', 'HQ': [58, 50], 'DP': 3, 'GQ': 49}
104+
CallData(GT=0|0, GQ=49, DP=3, HQ=[58, 50])
105105

106106
Please note that as of release 0.4.0, attributes known to have single values (such as
107107
``DP`` and ``GQ`` above) are returned as values. Other attributes are returned
@@ -134,7 +134,7 @@ For example::
134134

135135
ALT records are actually classes, so that you can interrogate them::
136136

137-
>>> reader = vcf.Reader(file('vcf/test/example-4.1-bnd.vcf'))
137+
>>> reader = vcf.Reader(open('vcf/test/example-4.1-bnd.vcf'))
138138
>>> _ = reader.next(); row = reader.next()
139139
>>> print row
140140
Record(CHROM=1, POS=2, REF=T, ALT=[T[2:3[])
@@ -146,22 +146,22 @@ Random access is supported for files with tabix indexes. Simply call fetch for
146146
region you are interested in::
147147

148148
>>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
149-
>>> for record in vcf_reader.fetch('20', 1110696, 1230237):
149+
>>> for record in vcf_reader.fetch('20', 1110696, 1230237): # doctest: +SKIP
150150
... print record
151151
Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
152152
Record(CHROM=20, POS=1230237, REF=T, ALT=[None])
153153

154154
Or extract a single row::
155155

156-
>>> print vcf_reader.fetch('20', 1110696)
156+
>>> print vcf_reader.fetch('20', 1110696) # doctest: +SKIP
157157
Record(CHROM=20, POS=1110696, REF=A, ALT=[G, T])
158158

159159

160160
The ``Writer`` class provides a way of writing a VCF file. Currently, you must specify a
161161
template ``Reader`` which provides the metadata::
162162

163163
>>> vcf_reader = vcf.Reader(filename='vcf/test/tb.vcf.gz')
164-
>>> vcf_writer = vcf.Writer(file('/dev/null', 'w'), vcf_reader)
164+
>>> vcf_writer = vcf.Writer(open('/dev/null', 'w'), vcf_reader)
165165
>>> for record in vcf_reader:
166166
... vcf_writer.write_record(record)
167167

docs/HISTORY.rst

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,72 @@ New features should have test code sent with them.
1717
Changes
1818
=======
1919

20+
0.6.7 Release
21+
-------------
22+
23+
* Include missing .pyx files
24+
25+
0.6.6 Release
26+
-------------
27+
28+
* better walk together record ordering (Thanks @datagram, #141)
29+
30+
0.6.5 Release
31+
-------------
32+
33+
* Better contig handling (#115, #116, #119 thanks Martijn)
34+
* INFO lines with type character (#120, #121 thanks @AndrewUzilov, Martijn)
35+
* Single breakends fix (#126 thanks @pkrushe)
36+
* Speedup by losing ordering of INFO (#128 thanks Martijn)
37+
* HOMSEQ and other missing fields in INFO (#130 thanks Martijn)
38+
* Add aaf property, (thanks @mgymrek #131)
39+
* Custom equality for walk_together, thanks bow #132
40+
* Change default line encoding to '\n'
41+
* Improved __eq__ (#134, thanks bow)
42+
43+
44+
0.6.4 Release
45+
-------------
46+
47+
* Handle INFO fields with multiple values, thanks
48+
* Support writing records without GT data #88, thanks @bow
49+
* Pickleable call data #112, thanks @superbobry
50+
* Write files without FORMAT #95 thanks Martijn
51+
* Strict whitespace mode, thanks Martijn, Lee Lichtenstein and Manawsi Gupta
52+
* Add support for contigs in header, thanks @gcnh and Martijn
53+
* Fix GATK header parsing, thanks @alimanfoo
54+
55+
0.6.3 Release
56+
-------------
57+
58+
* cython port of #79
59+
* correct writing of meta lines #84
60+
61+
0.6.2 Release
62+
-------------
63+
64+
* issues #78, #79 (thanks Sean, Brad)
65+
66+
0.6.1 Release
67+
-------------
68+
69+
* Add strict whitespace mode for well formed VCFs with spaces
70+
in sample names (thanks Marco)
71+
* Ignore blank lines in files (thanks Martijn)
72+
* Tweaks for handling missing data (thanks Sean)
73+
* bcftools tests (thanks Martijn)
74+
* record.FILTER is always a list
75+
76+
0.6.0 Release
77+
-------------
78+
79+
* Backwards incompatible change: _Call.data is now a
80+
namedtuple (previously it was a dict)
81+
* Optional cython version, much improved performance.
82+
* Improvements to writer (thanks @cmclean)
83+
* Improvements to inheritance of classes (thanks @lennax)
84+
85+
2086
0.5.0 Release
2187
-------------
2288

scripts/vcf_filter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def addfilt(filt):
162162
if output_record:
163163
# use PASS only if other filter names appear in the FILTER column
164164
#FIXME: is this good idea?
165-
if record.FILTER == '.' and not drop_filtered: record.FILTER = 'PASS'
165+
if record.FILTER is None and not drop_filtered: record.FILTER = 'PASS'
166166
output.write_record(record)
167167

168168
if __name__ == '__main__': main()

scripts/vcf_melt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ for record in reader:
3939

4040
for sample in record.samples:
4141
row = [sample.sample]
42-
row += [flatten(sample.data.get(x, None)) for x in formats]
42+
# Format fields not present will simply end up "blank"
43+
# in the output
44+
row += [flatten(getattr(sample.data, x, None)) for x in formats]
4345
row += [record.FILTER or '.']
4446
row += fixed
4547
row += info_row

setup.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,12 @@
1616
except ImportError:
1717
requires.append('argparse')
1818

19-
19+
import collections
20+
try:
21+
collections.Counter
22+
except AttributeError:
23+
requires.append('counter')
2024
try:
21-
import collections
2225
collections.OrderedDict
2326
except AttributeError:
2427
requires.append('ordereddict')

tox.ini

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# and then run "tox" from this directory.
55

66
[tox]
7-
envlist = py26, py27, py32
7+
envlist = py26, py27, py32, py33
88

99
[testenv]
1010
commands =
@@ -14,7 +14,9 @@ commands =
1414
[testenv:py26]
1515
deps =
1616
argparse
17+
counter
1718
ordereddict
19+
cython
1820
pysam
1921

2022
[testenv:py27]
@@ -23,6 +25,9 @@ deps =
2325
cython
2426

2527
[testenv:py32]
26-
deps =
28+
deps =
2729
cython
2830

31+
[testenv:py33]
32+
deps =
33+
cython

vcf/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@
6666
3 1.0 0
6767
>>> print record.num_hom_ref, record.num_het, record.num_hom_alt
6868
1 1 1
69-
>>> print record.nucl_diversity, record.aaf
70-
0.6 0.5
69+
>>> print record.nucl_diversity, record.aaf, record.heterozygosity
70+
0.6 [0.5] 0.5
7171
>>> print record.get_hets()
7272
[Call(sample=NA00002, CallData(GT=1|0, GQ=48, DP=8, HQ=[51, 51]))]
7373
>>> print record.is_snp, record.is_indel, record.is_transition, record.is_deletion
@@ -178,4 +178,4 @@
178178
from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
179179
from vcf.sample_filter import SampleFilter
180180

181-
VERSION = '0.5.0'
181+
VERSION = '0.6.7'

0 commit comments

Comments
 (0)