Skip to content

Commit

Permalink
Bundle BioPython code so that installation is not necessary (Tests an…
Browse files Browse the repository at this point in the history
…d documentation directories have been removed)
  • Loading branch information
bosborne committed Jul 19, 2015
1 parent 5d08843 commit 08c7133
Show file tree
Hide file tree
Showing 617 changed files with 209,566 additions and 35 deletions.
193 changes: 193 additions & 0 deletions Bio/Affy/CelFile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# Copyright 2004 by Harry Zuzan. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

"""
Classes for accessing the information in Affymetrix cel files.
Functions:
read Read a cel file and store its contents in a Record
Classes:
Record Contains the information from a cel file
"""

# We use print in the doctests
from __future__ import print_function

try:
import numpy
except ImportError:
from Bio import MissingPythonDependencyError
raise MissingPythonDependencyError(
"Install NumPy if you want to use Bio.Affy.CelFile")

__docformat__ = "restructuredtext en"


class Record(object):
"""Stores the information in a cel file
Example usage:
>>> from Bio.Affy import CelFile
>>> with open('Affy/affy_v3_example.CEL') as handle:
... c = CelFile.read(handle)
...
>>> print(c.ncols, c.nrows)
5 5
>>> print(c.intensities)
[[ 234. 170. 22177. 164. 22104.]
[ 188. 188. 21871. 168. 21883.]
[ 188. 193. 21455. 198. 21300.]
[ 188. 182. 21438. 188. 20945.]
[ 193. 20370. 174. 20605. 168.]]
>>> print(c.stdevs)
[[ 24. 34.5 2669. 19.7 3661.2]
[ 29.8 29.8 2795.9 67.9 2792.4]
[ 29.8 88.7 2976.5 62. 2914.5]
[ 29.8 76.2 2759.5 49.2 2762. ]
[ 38.8 2611.8 26.6 2810.7 24.1]]
>>> print(c.npix)
[[25 25 25 25 25]
[25 25 25 25 25]
[25 25 25 25 25]
[25 25 25 25 25]
[25 25 25 25 25]]
"""
def __init__(self):
self.version = None
self.GridCornerUL = None
self.GridCornerUR = None
self.GridCornerLR = None
self.GridCornerLL = None
self.DatHeader = None
self.Algorithm = None
self.AlgorithmParameters = None
self.NumberCells = None
self.intensities = None
self.stdevs = None
self.npix = None
self.nrows = None
self.ncols = None
self.nmask = None
self.mask = None
self.noutliers = None
self.outliers = None
self.modified = None


def read(handle):
"""
Read the information in a cel file, and store it in a Record.
"""
# Needs error handling.
# Needs to know the chip design.
record = Record()
section = ""
for line in handle:
if not line.strip():
continue
# Set current section
if line[:5] == "[CEL]":
section = "CEL"
elif line[:8] == "[HEADER]":
section = "HEADER"
elif line[:11] == "[INTENSITY]":
section = "INTENSITY"
record.intensities = numpy.zeros((record.nrows, record.ncols))
record.stdevs = numpy.zeros((record.nrows, record.ncols))
record.npix = numpy.zeros((record.nrows, record.ncols), int)
elif line[:7] == "[MASKS]":
section = "MASKS"
record.mask = numpy.zeros((record.nrows, record.ncols))
elif line[:10] == "[OUTLIERS]":
section = "OUTLIERS"
record.outliers = numpy.zeros((record.nrows, record.ncols))
elif line[:10] == "[MODIFIED]":
section = "MODIFIED"
record.modified = numpy.zeros((record.nrows, record.ncols))
elif line[0] == "[":
# This would be an unknown section
section = ""
elif section == "CEL":
keyword, value = line.split("=", 1)
if keyword == 'Version':
record.version = int(value)
elif section == "HEADER":
# Set record.ncols and record.nrows, remaining data goes into
# record.header dict
keyword, value = line.split("=", 1)
if keyword == "Cols":
record.ncols = int(value)
elif keyword == "Rows":
record.nrows = int(value)
elif keyword == 'GridCornerUL':
x, y = value.split()
record.GridCornerUL = (int(x), int(y))
elif keyword == 'GridCornerUR':
x, y = value.split()
record.GridCornerUR = (int(x), int(y))
elif keyword == 'GridCornerLR':
x, y = value.split()
record.GridCornerLR = (int(x), int(y))
elif keyword == 'GridCornerLL':
x, y = value.split()
record.GridCornerLL = (int(x), int(y))
elif keyword == 'DatHeader':
record.DatHeader = value.strip('\n\r')
elif keyword == 'Algorithm':
record.Algorithm = value.strip('\n\r')
elif keyword == 'AlgorithmParameters':
record.AlgorithmParameters = value.strip('\n\r')
elif section == "INTENSITY":
if "NumberCells" in line:
record.NumberCells = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.intensities[x, y] = float(words[2])
record.stdevs[x, y] = float(words[3])
record.npix[x, y] = int(words[4])
elif section == "MASKS":
if "NumberCells" in line:
record.nmask = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.mask[x, y] = int(1)
elif section == "OUTLIERS":
if "NumberCells" in line:
record.noutliers = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.outliers[x, y] = int(1)
elif section == "MODIFIED":
if "NumberCells" in line:
record.nmodified = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.modified[x, y] = float(words[2])
else:
continue
return record

if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
9 changes: 9 additions & 0 deletions Bio/Affy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#

"""Deal with Affymetrix related data such as cel files.
"""

__docformat__ = "restructuredtext en"
Loading

0 comments on commit 08c7133

Please sign in to comment.