-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdata_provider.py
117 lines (107 loc) · 3.57 KB
/
data_provider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from paddle.trainer.PyDataProvider2 import *
import re
import logging
import random
TERM_SIZE = 24
def initialize(settings, num, point, **kwargs):
s = dict()
settings.point = point
settings.num = num
for i in range(num):
key = 'data_%s' % i
s[key] = dense_vector_sequence(TERM_SIZE)
#s['label'] = integer_value_sequence(TERM_SIZE)
for i in range(TERM_SIZE):
label_key = 'label_%s' % i
s[label_key] = integer_value(4)
settings.input_types = s
@provider(init_hook=initialize, cache=CacheType.NO_CACHE,should_shuffle=True)
def process(settings, filename):
data = []
node_num = settings.num
max_len = 100000000000000000
with open(filename, 'r') as f:
for line in f.readlines():
speed = map(int, line.rstrip('\n').split(','))
data.append(speed)
max_len = min(len(speed),max_len)
for i in range(0,max_len-2*TERM_SIZE, 1):
result = dict()
for j in range(node_num):
key = 'data_%s' % j
result[key] = [[(data[j][k])/4.0 for k in range(i, i+TERM_SIZE)]]
# result['label'] = data[0][i+TERM_SIZE] - 1
# if result['label'] == -1:
# result['label'] = random.randint(0, 3)
label = data[0][i+TERM_SIZE:i+2*TERM_SIZE]
for j in range(TERM_SIZE):
if label[j] == 0:
label[j] = 1
label[j] -= 1
label_key = 'label_%s' % j
result[label_key] = label[j]
# logging.info(result)
# result['label'] = label
yield result
# def initialize(settings, num, point, **kwargs):
# s = dict()
# settings.point = point
# settings.num = num
# for i in range(num):
# key = 'data_%s' % i
# s[key] = dense_vector_sequence(TERM_SIZE)
# for i in range(TERM_SIZE):
# s['label_%s' % i] = integer_value(4)
# settings.input_types = s
#
#
# @provider(init_hook=initialize,cache=CacheType.CACHE_PASS_IN_MEM)
# def process(settings, filename):
# data = []
#
# max_len = 0
# node_num = settings.num
#
# with open(filename, 'r') as f:
# for line in f.readlines():
# speeds = map(int, line.rstrip('\n').split(','))
# data.append(speeds)
# max_len = len(speeds)
# for i in range(max_len - 2*TERM_SIZE - 1):
# result = dict()
#
# for j in range(node_num):
# key = 'data_%s' % j
# result[key] = [[data[j][k] - 1 for k in range(i, i + TERM_SIZE)]]
# labels = data[0][i+TERM_SIZE:i+TERM_SIZE*2]
# if 0 in labels:
# continue
# for p in range(TERM_SIZE):
# key = 'label_%s' % p
# result[key] = labels[p]-1
# yield result
#
#
def predict_initialize(settings, num, point, **kwargs):
s = dict()
settings.point = point
settings.num = num
for i in range(num):
key = 'data_%s' % i
s[key] = dense_vector_sequence(TERM_SIZE)
settings.input_types = s
@provider(init_hook=predict_initialize,cache=CacheType.CACHE_PASS_IN_MEM)
def process_predict(settings, filename):
with open(filename,'r') as f:
data = []
max_len = 0
node_num = settings.num
result = dict()
for line in f.readlines():
speeds = map(int, line.rstrip('\n').split(','))
data.append(speeds)
max_len = len(speeds)
for i in range(node_num):
key = 'data_%s' % i
result[key] = [[data[i][k]/4.0 for k in range(0, TERM_SIZE)]]
yield result