-
Notifications
You must be signed in to change notification settings - Fork 0
/
youtubeTrend.py
181 lines (147 loc) · 6.51 KB
/
youtubeTrend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/python
# -*- coding: utf-8 -*-
from pprint import pprint
import json
import os
import datetime
import cassandra
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
# Connection to yvideos keyspace
cloud_config= {
'secure_connect_bundle': 'secure-connect-teaminferno.zip'
}
auth_provider = PlainTextAuthProvider('PKulaMpxCDcyZsxHoLeorxdE', 'R1KL7l1u,awPgafa0-G3Xjt5QAjk,gTtz.qgmQSrsstUIdQnOoq_jgI,77nPA9upDOOYj2+ZefBMXudz+dFfF7IYPMGo56gz7xD337Nrcaufv3KKh,kzaS,0_xuCflNI')
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
session = cluster.connect('yvideos')
session.execute("""truncate table records;""")
session.execute("""truncate table snippets;""")
session.execute("""truncate table localized;""")
session.execute("""truncate table contentdetails;""")
session.execute("""truncate table statistics;""")
record_id = 1
snippet_id = 1
localized_id = 1
contentDetails_id = 1
statistics_id = 1
with open('videoinfo.json') as data_file:
data = json.load(data_file)
for v in data:
keywordslist = []
rec_etag = "'" + v['etag'] + "'"
rec_id = "'" + v['id'] + "'"
rec_record_id = record_id
rec_snippet_id = snippet_id
rec_localized_id = localized_id
rec_contentDetails_id = contentDetails_id
rec_statistics_id = statistics_id
s_id = snippet_id
s_publishedAt = "'" + v['snippet']['publishedAt'] + "'"
s_channelId = "'" + v['snippet']['channelId'] + "'"
s_title = "'" + v['snippet']['title'] + "'"
s_channelTitle = "'" + v['snippet']['channelTitle'] + "'"
s_categoryId = "'" + v['snippet']['categoryId'] + "'"
s_liveBroadcastContent = "'" + v['snippet']['liveBroadcastContent'] + "'"
s_defaultlanguage = "'" + v['snippet']['defaultLanguage'] + "'"
scrubdescription = v['snippet']['description']
scrubdescription = scrubdescription.replace("'", "''")
scrubdescription = scrubdescription.replace("\n"," ")
scrubdescription = scrubdescription.replace("\r", " ")
scrubdescription = scrubdescription.replace("\r\n", " ")
s_description = "'" + scrubdescription + "'"
cont_id = contentDetails_id
cont_duration = "'" + v['snippet']['duration'] + "'"
cont_definition = "'" + v['snippet']['definition'] + "'"
cont_caption = "'" + v['snippet']['caption'] + "'"
stat_id = statistics_id
stat_viewcount = "'" + v['snippet']['viewCount'] + "'"
stat_likecount = "'" + v['snippet']['likeCount'] + "'"
stat_dislikecount = "'" + v['snippet']['dislikeCount'] + "'"
stat_commentcount = "'" + v['snippet']['commentCount'] + "'"
l_id = localized_id
l_title = "'" + v['snippet']['title'] + "'"
record_id += 1
snippet_id += 1
contentDetails_id += 1
statistics_id += 1
localized_id += 1
query = 'insert into records (id,videoid,etag) values ('
query += str(rec_record_id) + ', '
query += str(rec_id) + ', '
query += rec_etag + ')'
#query += str(rec_snippet_id)
prepared_stmt = session.prepare(query)
session.execute(prepared_stmt)
query = \
'insert into snippets (id,publishedAt,channelId,title,channelTitle,categoryid,liveBroadcastContent,defeaultlanguage,description) values ('
query += str(s_id) + ', '
query += s_publishedAt + ', '
query += s_channelId + ', '
query += s_title + ', '
query += s_channelTitle + ', '
query += s_categoryId + ', '
query += s_liveBroadcastContent + ', '
query += s_defaultlanguage + ', '
query += s_description + ')'
#query += str(s_channelTitle_id) + ', '
#query += str(s_localized_id) + ')'
prepared_stmt = session.prepare(query)
session.execute(prepared_stmt)
query = \
'insert into localized (id,title) values ('
query += str(l_id) + ', '
query += l_title + ')'
prepared_stmt = session.prepare(query)
session.execute(prepared_stmt)
query = 'insert into contentdetails(id,duration,definition,caption) values ('
query += str(cont_id) + ', '
query += cont_duration + ', '
query += cont_definition + ', '
query += cont_caption + ')'
prepared_stmt = session.prepare(query)
session.execute(prepared_stmt)
query = 'insert into statistics(id,viewcount,likecount,dislikecount,commentcount) values ('
query += str(stat_id) + ', '
query += stat_viewcount + ', '
query += stat_likecount + ', '
query += stat_dislikecount + ', '
query += stat_commentcount + ')'
prepared_stmt = session.prepare(query)
session.execute(prepared_stmt)
checker = "tags" in v['snippet']
if checker:
datalist = v['snippet']['tags']
for value in datalist:
value = value.translate(str.maketrans({"'": "''"}))
keywordslist.append(value)
query = ' '
for value in keywordslist:
query = "select keyval, count, videoids from averages where keyval = "
query += "'tag: " + value + "';"
myrows = session.execute(query)
for resultparse in myrows:
ids = "'" + str(resultparse.videoids) + ", " + str(snippet_id) + "'"
counter = resultparse.count + 1
mycount = str(counter)
print(mycount)
query = "update averages set count = " + mycount + ", videoids = " + ids + " where keyval = "
query += "'tag: " + value + "';"
print(query)
prepared_stmt = session.prepare(query)
session.execute(prepared_stmt)
timevalue = s_publishedAt[12: 14]
print("timevalue = " + timevalue)
query = "select count, videoids from averages where keyval = "
query += "'time: " + timevalue + "';"
print(query)
rows = session.execute(query)
ids = ''
counter = 0
for resultparse in rows:
ids = "'" + str(resultparse.videoids) + ", " + str(snippet_id) + "'"
counter = resultparse.count + 1
query = "update averages set count = " + str(counter) + ", videoids = " + ids + " where keyval = "
query += "'time: " + timevalue + "';"
print(query)
prepared_stmt = session.prepare(query)
session.execute(prepared_stmt)