-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathload_dataset.py
113 lines (91 loc) · 2.59 KB
/
load_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Example code for loading a dataset to a TimeSeries object.
Note that this code requires Pandas to be available.
Author: Gertjan van den Burg
Copyright: The Alan Turing Institute, 2019
License: See LICENSE file.
"""
import json
import numpy as np
import pandas as pd
class TimeSeries:
def __init__(
self,
t,
y,
name=None,
longname=None,
datestr=None,
datefmt=None,
columns=None,
):
self.t = t
self.y = y
self.name = name
self.longname = longname
self.datestr = datestr
self.datefmt = datefmt
self.columns = columns
# whether the series is stored as zero-based or one-based
self.zero_based = True
@property
def n_obs(self):
return len(self.t)
@property
def n_dim(self):
return self.y.shape[1]
@property
def shape(self):
return (self.n_obs, self.n_dim)
@classmethod
def from_json(cls, filename):
with open(filename, "rb") as fp:
data = json.load(fp)
tidx = np.array(data["time"]["index"])
tidx = np.squeeze(tidx)
if "format" in data["time"]:
datefmt = data["time"]["format"]
datestr = np.array(data["time"]["raw"])
else:
datefmt = None
datestr = None
y = np.zeros((data["n_obs"], data["n_dim"]))
columns = []
for idx, series in enumerate(data["series"]):
columns.append(series.get("label", "V%i" % (idx + 1)))
thetype = np.int if series["type"] == "integer" else np.float64
vec = np.array(series["raw"], dtype=thetype)
y[:, idx] = vec
ts = cls(
tidx,
y,
name=data["name"],
longname=data["longname"],
datefmt=datefmt,
datestr=datestr,
columns=columns,
)
return ts
@property
def df(self):
d = {"t": self.t}
for i in range(len(self.columns)):
col = self.columns[i]
val = self.y[:, i]
d[col] = val
return pd.DataFrame(d)
def make_one_based(self):
""" Convert the time index to a one-based time index. """
if self.zero_based:
self.t = [t + 1 for t in self.t]
self.zero_based = False
def __repr__(self):
return "TimeSeries(name=%s, n_obs=%s, n_dim=%s)" % (
self.name,
self.n_obs,
self.n_dim,
)
def __str__(self):
return repr(self)