-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathread_dataset.py
146 lines (136 loc) · 3.99 KB
/
read_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
'''
Create parameter object for each file.
'''
class fileParam:
def __init__(self):
self.data_name = ''
self.file_path = ''
self.start_time = ''
self.end_time = ''
self.interval = ''
def get_param(self, param):
self.data_name = param[0]
self.file_path = param[1]
self.start_time = param[2]
self.end_time = param[3]
self.interval = param[4]
'''
Create data object for micro file.
'''
class microData:
def __init__(self):
self.data_time = []
self.percip_mm = []
self.humidity = []
self.temp = []
self.leafwet450_min = []
self.leafwet460_min = []
self.leafwet_lwscnt = []
self.gusts_ms = []
self.wind_dir = []
self.wind_ms = []
self.output = []
def get_data(self, data):
raw_data_time = data[:, 0]
fixed_data_time = []
for time in raw_data_time:
fixed_time = assign_time(time)
fixed_data_time.append(fixed_time)
self.data_time = fixed_data_time
self.percip_mm = data[:, 1]
self.humidity = data[:, 2]
self.temp = data[:, 3]
self.leafwet450_min = data[:, 4]
self.leafwet460_min = data[:, 5]
self.leafwet_lwscnt = data[:, 6]
self.gusts_ms = data[:, 7]
self.wind_dir = data[:, 8]
self.wind_ms = data[:, 9]
def set_output(self, data):
tmp = []
for row in data[:, 1]:
tmp.append(row)
tmp = np.array(tmp)
self.output = tmp
return tmp
def read_feature(self, feature):
if feature == 'percip_mm':
return self.percip_mm
elif feature == 'humidity':
return self.humidity
elif feature == 'temp':
return self.temp
elif feature == 'leafwet450_min':
return self.leafwet450_min
elif feature == 'leafwet460_min':
return self.leafwet460_min
elif feature == 'leafwet_lwscnt':
return self.leafwet_lwscnt
elif feature == 'gusts_ms':
return self.gusts_ms
elif feature == 'wind_dir':
return self.wind_dir
elif feature == 'wind_ms':
return self.wind_ms
else:
print 'Wrong feature', feature
return []
'''
Assign the time from raw string in the file to datetime object.
@param {string} time
@return {!datetime}
'''
def assign_time(time):
tmp = datetime.datetime.strptime('', '')
if '-' in time:
tmp = datetime.datetime.strptime(time, '%Y-%m-%d %H:%M:%S')
elif '/' in time:
date_and_time = time.split(' ')
date = date_and_time[0].split('/')
date[2] = '20' + date[2]
time = '/'.join(date) + ' ' + date_and_time[1]
tmp = datetime.datetime.strptime(time, '%m/%d/%Y %H:%M')
else:
print 'invalid time format'
return tmp
'''
Get the interval from raw string in the file to number of minute.
@param {string} interval
@return {number}
'''
def get_interval_minute(interval):
minute = 0
if interval[-1] == 'h':
minute = int(interval[:-1]) * 60
elif interval[-1] == 'm':
minute = int(interval[:-1])
else:
print 'invalid parameter'
return minute
'''
Read all dataset and save parameters for each file.
@param {string} path
@return {!Array<!fileParam>}
'''
def read_all_dataset(path):
all_file_param = []
for row in path.values:
file_param = fileParam()
file_param.get_param(row)
all_file_param.append(file_param)
return all_file_param
'''
Read all dataset and save parameters for each file.
@param {string} dataset_path
@param {string} plot_files
@return {!Array<string>}
'''
def set_features(dataset_path, plot_files):
path = dataset_path + plot_files[0] + '.csv'
df_header = pd.read_csv(path, header=None, nrows=1)
micro_features = df_header.values[0][1:]
return micro_features