-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_data_numpy.py
93 lines (68 loc) · 2.49 KB
/
get_data_numpy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import urllib
import zipfile
import os
import shutil
fname = "tageswerte_00044_19710301_20131231_hist.zip"
url = "ftp://ftp-cdc.dwd.de/pub/CDC/observations_germany/climate/daily/kl/historical/"
if not os.path.exists(fname):
fln, headers = urllib.urlretrieve(url + fname)
shutil.copyfile(fln, fname)
datazip = zipfile.ZipFile(fname, 'r')
datazip.namelist()
datacsv = datazip.read('produkt_klima_Tageswerte_19710301_20131231_00044.txt')
datalines = datacsv.splitlines()
header = [item.strip() for item in datalines[0].split(";")]
# remove is in-place
header.remove('eor')
data = {}
# last line in the file just contains '\x1a', which is not a useful key
for line in datalines[1:]:
linedata = line.split(";")
if len(linedata) < 2:
continue
daily = {}
# Don't try to read the last column, as this is always 'eor'
for idx, value in enumerate(linedata[:-1]):
key = header[idx]
if key not in ("Stations_ID", "Mess_Datum"):
v = float(value)
if v == -999:
daily[key] = None
else:
daily[key] = v
elif key == "Mess_Datum":
date = value
data[date] = daily
header.remove("Mess_Datum")
header.remove("Stations_ID")
timeseries = {}
for key in header:
timeseries[key] = []
for date in sorted(data.keys()):
timeseries[key].append(data[date][key])
import numpy
import pylab
airtemp_val = numpy.array(timeseries["LUFTTEMPERATUR"], dtype=float)
airtemp_idx = numpy.arange(len(airtemp_val))
airtemp_mask = numpy.isfinite(airtemp_val)
airtemp_val = airtemp_val[airtemp_mask]
airtemp_idx = airtemp_idx[airtemp_mask]
datelabels = numpy.array(sorted(data.keys()))[airtemp_mask]
datelabel_spacing = numpy.linspace(0, len(airtemp_idx) - 1, 10).astype(int)
firstjanuary_idx = []
for idx, datelabel in enumerate(datelabels):
if datelabel.endswith("0701"):
firstjanuary_idx.append(idx)
firstjanuary_idx = numpy.array(firstjanuary_idx)
firstjanuary_temp_val = airtemp_val[firstjanuary_idx]
firstjanuary_temp_idx = airtemp_idx[firstjanuary_idx]
p = numpy.polyfit(firstjanuary_temp_idx, firstjanuary_temp_val, deg=1)
fit_y = p[1] + firstjanuary_temp_idx * p[0]
pylab.figure()
pylab.title("Local warming")
pylab.plot(airtemp_idx, airtemp_val)
pylab.plot(firstjanuary_temp_idx, firstjanuary_temp_val, "ro", ms=10)
pylab.plot(firstjanuary_temp_idx, fit_y, "g-", lw=2)
pylab.grid()
pylab.xticks(airtemp_idx[datelabel_spacing], datelabels[datelabel_spacing], rotation=45)
pylab.show()