Skip to content

Commit 2aeb176

Browse files
committed
misc documentation, some work on rpy2 interface. near git migration
git-svn-id: http://pandas.googlecode.com/svn/trunk@202 d5231056-7de3-11de-ac95-d976489f1ece
1 parent 3f3508f commit 2aeb176

File tree

19 files changed

+639
-31
lines changed

19 files changed

+639
-31
lines changed

.coveragerc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# .coveragerc to control coverage.py
2+
[run]
3+
branch = False
4+
5+
[report]
6+
# Regexes for lines to exclude from consideration
7+
exclude_lines =
8+
# Have to re-enable the standard pragma
9+
pragma: no cover
10+
11+
# Don't complain about missing debug-only code:
12+
def __repr__
13+
if self\.debug
14+
15+
# Don't complain if tests don't hit defensive assertion code:
16+
raise AssertionError
17+
raise NotImplementedError
18+
19+
# Don't complain if non-runnable code isn't run:
20+
if 0:
21+
if __name__ == .__main__.:
22+
23+
ignore_errors = False
24+
25+
[html]
26+
directory = coverage_html_report

LICENSE.txt renamed to LICENSE

File renamed without changes.

README.txt renamed to README.rst

File renamed without changes.

RELEASE.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
=============
2+
Release Notes
3+
=============
4+
5+
pandas 0.3.0
6+
============
7+
8+
**Release date:**
9+
10+
**New features / modules**
11+
12+
**Improvements**
13+
14+
**API Changes**
15+
16+
**Bug fixes**
17+

TODO.txt renamed to TODO.rst

File renamed without changes.

bench/alignment.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Setup
2+
import numpy as np
3+
import pandas
4+
import la
5+
N = 1000
6+
K = 50
7+
arr1 = np.random.randn(N, K)
8+
arr2 = np.random.randn(N, K)
9+
idx1 = range(N)
10+
idx2 = range(K)
11+
12+
# pandas
13+
dma1 = pandas.DataMatrix(arr1, idx1, idx2)
14+
dma2 = pandas.DataMatrix(arr2, idx1[::-1], idx2[::-1])
15+
16+
# larry
17+
lar1 = la.larry(arr1, [idx1, idx2])
18+
lar2 = la.larry(arr2, [idx1[::-1], idx2[::-1]])
19+
20+
for i in range(100):
21+
result = lar1 + lar2

bench/serialize.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import time, os
2+
import numpy as np
3+
4+
import la
5+
import pandas
6+
7+
def timeit(f, iterations):
8+
start = time.clock()
9+
10+
for i in xrange(iterations):
11+
f()
12+
13+
return time.clock() - start
14+
15+
def roundtrip_archive(N, iterations=10):
16+
17+
# Create data
18+
arr = np.random.randn(N, N)
19+
lar = la.larry(arr)
20+
dma = pandas.DataMatrix(arr, range(N), range(N))
21+
22+
# filenames
23+
filename_numpy = '/Users/wesm/tmp/numpy.npz'
24+
filename_larry = '/Users/wesm/tmp/archive.hdf5'
25+
filename_pandas = '/Users/wesm/tmp/pandas_tmp'
26+
27+
# Delete old files
28+
try:
29+
os.unlink(filename_numpy)
30+
except:
31+
pass
32+
try:
33+
os.unlink(filename_larry)
34+
except:
35+
pass
36+
try:
37+
os.unlink(filename_pandas)
38+
except:
39+
pass
40+
41+
# Time a round trip save and load
42+
numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr)
43+
numpy_time = timeit(numpy_f, iterations) / iterations
44+
45+
larry_f = lambda: larry_roundtrip(filename_larry, lar, lar)
46+
larry_time = timeit(larry_f, iterations) / iterations
47+
48+
pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
49+
pandas_time = timeit(pandas_f, iterations) / iterations
50+
51+
print 'Numpy (npz) %7.4f seconds' % numpy_time
52+
print 'larry (HDF5) %7.4f seconds' % larry_time
53+
print 'pandas (HDF5) %7.4f seconds' % pandas_time
54+
55+
def numpy_roundtrip(filename, arr1, arr2):
56+
np.savez(filename, arr1=arr1, arr2=arr2)
57+
npz = np.load(filename)
58+
arr1 = npz['arr1']
59+
arr2 = npz['arr2']
60+
61+
def larry_roundtrip(filename, lar1, lar2):
62+
io = la.IO(filename)
63+
io['lar1'] = lar1
64+
io['lar2'] = lar2
65+
lar1 = io['lar1']
66+
lar2 = io['lar2']
67+
68+
def pandas_roundtrip(filename, dma1, dma2):
69+
from pandas.io.pytables import HDFStore
70+
store = HDFStore(filename)
71+
store['dma1'] = dma1
72+
store['dma2'] = dma2
73+
dma1 = store['dma1']
74+
dma2 = store['dma2']
75+
76+
def pandas_roundtrip_pickle(filename, dma1, dma2):
77+
dma1.save(filename)
78+
dma1 = pandas.DataMatrix.load(filename)
79+
dma2.save(filename)
80+
dma2 = pandas.DataMatrix.load(filename)

bench/test.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import numpy as np
2+
import itertools
3+
import collections
4+
import scipy.ndimage as ndi
5+
6+
N = 10000
7+
8+
lat = np.random.randint(0, 360, N)
9+
lon = np.random.randint(0, 360, N)
10+
data = np.random.randn(N)
11+
12+
def groupby1(lat, lon, data):
13+
indexer = np.lexsort((lon, lat))
14+
lat = lat.take(indexer)
15+
lon = lon.take(indexer)
16+
sorted_data = data.take(indexer)
17+
18+
keys = 1000. * lat + lon
19+
unique_keys = np.unique(keys)
20+
bounds = keys.searchsorted(unique_keys)
21+
22+
result = group_agg(sorted_data, bounds, lambda x: x.mean())
23+
24+
decoder = keys.searchsorted(unique_keys)
25+
26+
return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
27+
28+
def group_mean(lat, lon, data):
29+
indexer = np.lexsort((lon, lat))
30+
lat = lat.take(indexer)
31+
lon = lon.take(indexer)
32+
sorted_data = data.take(indexer)
33+
34+
keys = 1000 * lat + lon
35+
unique_keys = np.unique(keys)
36+
37+
result = ndi.mean(sorted_data, labels=keys, index=unique_keys)
38+
decoder = keys.searchsorted(unique_keys)
39+
40+
return dict(zip(zip(lat.take(decoder), lon.take(decoder)), result))
41+
42+
def group_mean_naive(lat, lon, data):
43+
grouped = collections.defaultdict(list)
44+
for lt, ln, da in zip(lat, lon, data):
45+
grouped[(lt, ln)].append(da)
46+
47+
averaged = dict((ltln, np.mean(da)) for ltln, da in grouped.items())
48+
49+
return averaged
50+
51+
def group_agg(values, bounds, f):
52+
N = len(values)
53+
result = np.empty(len(bounds), dtype=float)
54+
for i, left_bound in enumerate(bounds):
55+
if i == len(bounds) - 1:
56+
right_bound = N
57+
else:
58+
right_bound = bounds[i + 1]
59+
60+
result[i] = f(values[left_bound : right_bound])
61+
62+
return result
63+
64+
# for i in range(10):
65+
# groupby1(lat, lon, data)

doc/source/r_guide.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
.. currentmodule:: pandas
2+
3+
.. r_guide:
4+
5+
pandas for R users
6+
------------------

pandas/core/tests/test_common.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from pandas.core.common import notnull, isnull
2+
import pandas.core.common as common
3+
4+
import numpy as np
5+
6+
def test_notnull():
7+
assert notnull(1.)
8+
assert not notnull(None)
9+
assert not notnull(np.NaN)
10+
assert not notnull(np.inf)
11+
assert not notnull(-np.inf)
12+
13+
def test_isnull():
14+
assert not isnull(1.)
15+
assert isnull(None)
16+
assert isnull(np.NaN)
17+
assert isnull(np.inf)
18+
assert isnull(-np.inf)
19+

0 commit comments

Comments
 (0)