Skip to content

Commit

Permalink
Version 0.8 of vk-backup
Browse files Browse the repository at this point in the history
* A number of speed optimizations
* Generated self appid with many permissions requirements
* Added known error return codes, like access restrictions
* Enabled json database sort keys
* Fixed copy_history processing
* Added request comments for photos, wall, videos
* Rewrited update of media
* Added user updated filed to know, if it already updated this session
  • Loading branch information
rabits committed Nov 4, 2014
1 parent 22b9fb9 commit 0efb216
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 25 deletions.
22 changes: 19 additions & 3 deletions lib/Api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@

c.log('debug', 'Init Api')

# Session start time
_START_TIME = long(time.time())

# Vk application ID
_CLIENT_ID = '2951857'
_CLIENT_ID = '4603710'

# Get token & user_id by login
(_TOKEN, _USER_ID) = vk_auth.auth(c.cfg('user'), c.cfg('password'), _CLIENT_ID, "messages")
(_TOKEN, _USER_ID) = vk_auth.auth(c.cfg('user'), c.cfg('password'), _CLIENT_ID, "messages,audio,docs,video,photos,wall,friends")

# Last time api call to prevent service overloading
_LAST_API_CALL = 0
Expand All @@ -39,7 +42,16 @@ def request(method, params):
url = "https://api.vk.com/method/%s?%s" % (method, urlencode(params))
data = json.loads(urllib2.urlopen(url, None, 30).read())
if 'response' not in data:
raise Exception('no correct response while calling api method "%s", data: %s' % (method, data))
if 'error' in data:
c.log('warning', 'Api responded error: %s' % data['error']['error_msg'])
if data['error']['error_code'] in [7, 15, 212]:
return
elif data['error']['error_code'] in [10]:
continue
else:
raise Exception('unknown error code %i, "%s", data: %s' % (data['error']['error_code'], method, data))
else:
raise Exception('no correct response while calling api method "%s", data: %s' % (method, data))
break
except Exception as e:
c.log('warning', 'Retry request %i (3): %s' % (retry, str(e)))
Expand All @@ -55,3 +67,7 @@ def getUserId():
global _USER_ID
return str(_USER_ID)

def getStartTime():
global _START_TIME
return _START_TIME

2 changes: 1 addition & 1 deletion lib/Database.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def store(self):
if not os.path.isdir(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with codecs.open(path, 'w', 'utf-8') as outfile:
json.dump(self.data[i], outfile, indent=1, ensure_ascii=False)
json.dump(self.data[i], outfile, indent=1, ensure_ascii=False, sort_keys=True)

def load(self, subdir = None):
path = self.path if subdir == None else os.path.join(self.path, subdir)
Expand Down
86 changes: 70 additions & 16 deletions lib/Media.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from Database import Database

import Api

class Media(Database):
class Downloader(threading.Thread):
def __init__(self, queue, report):
Expand Down Expand Up @@ -83,7 +85,7 @@ def download(self):
return self.success

def stopDownloads(self):
c.log('debug', 'Stopping download threads')
c.log('debug', 'Stopping download threads (%i)' % len(self.threads))
for i in self.threads:
i.stop()

Expand Down Expand Up @@ -125,14 +127,15 @@ def loadAttachments(self, data):
if 'attachment' in data:
attachments.append(data['attachment'])
if 'copy_history' in data:
self.loadAttachments(data['copy_history'])
for subdata in data['copy_history']:
self.loadAttachments(subdata)
for attach in attachments:
c.log('debug', 'Processing %s' % attach['type'])
funcname = 'process' + attach['type'].title()
if funcname in dir(self):
getattr(self, funcname)(attach[attach['type']])
else:
c.log('error', ' unable to find attachment processing function "Media.%s"' % funcname)
c.log('error', ' media processing function "Media.%s" is not implemented' % funcname)
c.log('debug', str(attach))

def addDownload(self, url, path = None):
Expand Down Expand Up @@ -165,15 +168,59 @@ def preprocess(self, data, data_type):
path = os.path.join(data_type, str(mydata['id']))

if path in self.data:
return None
return path

self.data[path] = mydata

return path

def requestComments(self, data, data_type, owner_id):
if str(owner_id) != Api.getUserId():
return

c.log('debug', 'Requesting comments for %s %i' % (data_type, data['id']))

if data_type == 'photo':
api_method = 'photos.getComments'
api_id_name = 'photo_id'
elif data_type == 'video':
api_method = 'video.getComments'
api_id_name = 'video_id'
elif data_type == 'wall':
api_method = 'wall.getComments'
api_id_name = 'post_id'
else:
c.log('warning', 'Unable to request comments for %s %i - not implemented' % (data_type, data['id']))
return

if 'comments' not in data:
data['comments'] = {}
if not isinstance(data['comments'], dict):
data['comments'] = {}

req_data = {'owner_id': int(owner_id), api_id_name: int(data['id']), 'count': 100, 'offset': 0}

while True:
subdata = Api.request(api_method, req_data)
if subdata == None:
return
count = subdata['count']
subdata = subdata['items']
for d in subdata:
data['comments'][str(d['date'])] = d
self.loadAttachments(data['comments'][str(d['date'])])

req_data['offset'] += 100
if req_data['offset'] >= count:
break

def processPhoto(self, data):
c.log('debug', 'Processing photo media')
path = self.preprocess(data, 'photo')
if path != None:
if 'localpath' not in self.data[path]:
url = None
if 'url' in self.data[path]:
url = self.data[path]['url']
size = 0
for key in self.data[path].keys():
if key.startswith('photo_'):
Expand All @@ -188,46 +235,53 @@ def processPhoto(self, data):

self.data[path]['url'] = url
self.data[path]['localpath'] = self.addDownload(self.data[path]['url'])
self.requestComments(self.data[path], 'photo', self.data[path]['owner_id'])

def processDoc(self, data):
c.log('debug', 'Processing doc media')
path = self.preprocess(data, 'doc')
if path != None:
if 'localpath' not in self.data[path]:
self.data[path]['localpath'] = self.addDownload(self.data[path]['url'])

def processAudio(self, data):
c.log('debug', 'Processing audio media')
path = self.preprocess(data, 'audio')
if path != None:
if 'localpath' not in self.data[path]:
self.data[path]['localpath'] = self.addDownload(self.data[path]['url'])

def processWall(self, data):
c.log('debug', 'Processing wall attachments')
c.log('debug', 'Processing wall media')
data['comments'].pop('count', None)
data['comments'].pop('can_post', None)
self.requestComments(data, 'wall', data['from_id'])
self.loadAttachments(data)

def processGeo(self, data):
self.preprocess(data, 'geo')
c.log('debug', 'Skipping geo attachment - no data to download')
c.log('debug', 'Skipping geo media - no data to download')

def processVideo(self, data):
self.preprocess(data, 'video')
c.log('debug', 'Skipping video attachment - size of the file is too big')
path = self.preprocess(data, 'video')
self.requestComments(self.data[path], 'video', self.data[path]['owner_id'])
c.log('debug', 'Skipping video media - size of the file is too big')

def processSticker(self, data):
self.preprocess(data, 'sticker')
c.log('debug', 'Skipping sticker attachment - idiotizm')
c.log('debug', 'Skipping sticker media - idiotizm')

def processLink(self, data):
c.log('debug', 'Skipping link attachment - no data to download')
c.log('debug', 'Skipping link media - no data to download')

def processPoll(self, data):
self.preprocess(data, 'poll')
c.log('debug', 'Skipping poll attachment - no data to download')
c.log('debug', 'Skipping poll media - no data to download')

def processNote(self, data):
self.preprocess(data, 'note')
c.log('debug', 'Skipping note attachment - no data to download')
c.log('debug', 'Skipping note media - no data to download')

def processPresent(self, data):
self.preprocess(data, 'present')
c.log('debug', 'Skipping present attachment - stupid present')
c.log('debug', 'Skipping present media - stupid present')

S = Media()
5 changes: 4 additions & 1 deletion lib/Users.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,15 @@ def addUser(self, newdata):
'data': { long(time.time()): newdata },
}
else:
if self.data[newdata_id]['updated'] > Api.getStartTime():
return
user_data = self.data[newdata_id]['data']
if len(set(user_data[max(user_data)].items()) & set(newdata.items())) != len(newdata):
c.log('debug', 'Adding new data for user %s' % newdata_id)
user_data[long(time.time())] = newdata

self.requestProfilePhotos(newdata_id)
self.data[newdata_id]['updated'] = long(time.time())

def requestProfilePhotos(self, user_id):
c.log('debug', 'Requesting profile photos')
Expand Down Expand Up @@ -102,7 +105,7 @@ def requestBlog(self, user_id):
data = data['items']
for d in data:
self.data[user_id]['blog'][str(d['date'])] = d
Media.loadAttachments(self.data[user_id]['blog'][str(d['date'])])
Media.processWall(self.data[user_id]['blog'][str(d['date'])])

req_data['offset'] += 100
if req_data['offset'] >= count:
Expand Down
7 changes: 3 additions & 4 deletions vk-backup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-
'''VK-Backup 0.7.0
'''VK-Backup 0.8.0
Author: Rabit <[email protected]>
License: GPL v3
Expand Down Expand Up @@ -46,7 +46,7 @@ def store(self):
Chats.store()
Media.store()

with open(os.path.join(self.path, 'backup.json'), 'w') as outfile:
with open(os.path.join(self.path, 'backup.id'), 'w') as outfile:
outfile.write(str(Api.getUserId()))

def process(self):
Expand All @@ -69,10 +69,9 @@ def process(self):

# Store data
backup.store()
except Exception as e:
except (Exception, KeyboardInterrupt) as e:
Media.stopDownloads()
c.log('error', 'Exception: %s' % str(e))
raise e

from lib import Api

Expand Down

0 comments on commit 0efb216

Please sign in to comment.