-
Notifications
You must be signed in to change notification settings - Fork 0
/
jqxml.py
91 lines (70 loc) · 2.6 KB
/
jqxml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from pyquery import PyQuery as pq
import pandas as pd
import argparse
import typing
# from glob import glob
parser = argparse.ArgumentParser(description=None)
def GetArgs(parser):
"""Parser function using argparse"""
# parser.add_argument('directory', help='directory use',
# action='store', nargs='*')
parser.add_argument("files", nargs="+")
return parser.parse_args()
fileList = GetArgs(parser)
# print(fileList.files)
data = []
horseattrs = ('race_id', 'id', 'horse', 'number', 'finished', 'age', 'sex',
'blinkers', 'trainernumber', 'career', 'thistrack', 'firstup',
'secondup', 'variedweight', 'weight', 'pricestarting')
meetattrs = ('id', 'venue', 'date', 'rail', 'weather', 'trackcondition')
raceattrs = ('id', 'number', 'shortname', 'stage', 'distance',
'grade', 'age', 'weightcondition', 'fastesttime', 'sectionaltime')
clubattrs = ('code')
frames = pd.DataFrame([])
noms = []
for items in fileList.files:
d = pq(filename=items)
meet = d('meeting')
club = d('club')
race = d('race')
res = d('nomination')
# d('p').filter(lambda i: i == 1)
# Here i need to traverse and modify but I don't want to restore the
# structure just pass it on. So I can use it in the following list
# comprehensions as I had before.
for race_el in d('race'):
race = pq(race_el)
race_id = race.attr('id')
for nom_el in race.items('nomination'):
res.append((pq(nom_el).attr('raceid', race_id)))
for x in horseattrs:
for i in range(len(res)):
res.eq(i).attr(x)
resdata = [[res.eq(i).attr(x)
for x in horseattrs] for i in range(len(res))]
# print(dataSets)
meetdata = [[meet.eq(i).attr(x)
for x in meetattrs] for i in range(len(meet))]
racedata = [[race.eq(i).attr(x)
for x in raceattrs] for i in range(len(race))]
clubdata = [[club.eq(i).attr(x)
for x in clubattrs] for i in range(len(club))]
raceid = [row[0] for row in racedata]
# L = [x + [0] for x in L]
# print(resdata)
# resdata = [raceid[i] for i in raceid x + i for x in resdata]
# for number of classes equalling nomination in the each category of
# race inset raceid into resdata
#
# print(resdata)
# clubdf = pd.DataFrame(clubdata)
# meetdf = pd.DataFrame(meetdata)
# racedf = pd.DataFrame(racedata)
# resdf = pd.DataFrame(resdata)
# frames = frames.append(clubdf)
# frames = frames.append(meetdf)
#
# frames = frames.append(racedf)
# frames = frames.append(resdf)
# print(frames)
# frames.to_csv('~/testingFrame5.csv', encoding='utf-8')