Skip to content
This repository was archived by the owner on May 10, 2019. It is now read-only.

Adding file size checking, skipping files over the configured max file size #59

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions f2flickr/flickr.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,7 @@ def getPhotos(self, per_page='', page='', **extras):
#for details of each param

#XXX: Could be Photo.search(cls)
def photos_search(user_id='', auth=False, tags='', tag_mode='', text='',\
def photos_search_with_pages(user_id='', auth=False, tags='', tag_mode='', text='',\
min_upload_date='', max_upload_date='',\
min_taken_date='', max_taken_date='', \
license='', per_page='', page='', sort='',\
Expand All @@ -989,26 +989,33 @@ def photos_search(user_id='', auth=False, tags='', tag_mode='', text='',\
photos.append(_parse_photo(photo))
else:
photos = [_parse_photo(data.rsp.photos.photo)]
return photos, int(data.rsp.photos.pages)

def photos_search(user_id='', auth=False, tags='', tag_mode='', text='',\
min_upload_date='', max_upload_date='',\
min_taken_date='', max_taken_date='', \
license='', per_page='', page='', sort='',\
safe_search='', content_type='', **kwargs):
"""Returns a list of Photo objects.

If auth=True then will auth the user. Can see private etc
"""

photos, pages = photos_search_with_pages(**locals())

return photos

def photos_search_pages(user_id='', auth=False, tags='', tag_mode='', text='',\
min_upload_date='', max_upload_date='',\
min_taken_date='', max_taken_date='', \
license='', per_page='', page='', sort=''):
license='', per_page='', page='', sort='',\
safe_search='', content_type='', **kwargs):
"""Returns the number of pages for the previous function (photos_search())
"""

method = 'flickr.photos.search'

data = _doget(method, auth=auth, user_id=user_id, tags=tags, text=text,\
min_upload_date=min_upload_date,\
max_upload_date=max_upload_date, \
min_taken_date=min_taken_date, \
max_taken_date=max_taken_date, \
license=license, per_page=per_page,\
page=page, sort=sort)
photos, pages = photos_search_with_pages(**locals())

return data.rsp.photos.pages
return pages

def photos_get_recent(extras='', per_page='', page=''):
"""http://www.flickr.com/services/api/flickr.photos.getRecent.html
Expand Down
142 changes: 141 additions & 1 deletion f2flickr/flickr2history.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import sys
import os
import f2flickr.flickr as flickr
from pprint import pprint

def getPhotoIDbyTag(tag, user):
"""
Expand Down Expand Up @@ -101,7 +102,102 @@ def convert_format(images, imageDir, historyFile):
(num_images, num_ok, num_not_found, num_converted))
uploaded.close()

def reshelf(images, imageDir, historyFile):
def get_photos_from_flickr():
"""
Get all photo ids from flickr
"""
logging.debug('flickr2history: get_photo_ids_from_flickr')
try:
user = flickr.test_login()
logging.debug(user.id)
except:
logging.error(sys.exc_info()[0])
return None

per_page = 500

logging.debug("Fetching page 1...")
photos, pages = flickr.photos_search_with_pages(user_id=user.id, auth=all, per_page=per_page);
photodict = {}
for photo in photos:
photodict[photo.id] = photo

for page in range(2, pages + 1):
logging.debug("Fetching page {}...".format(page))
photos, pages = flickr.photos_search_with_pages(user_id=user.id, auth=all, per_page=per_page, page=page);
for photo in photos:
photodict[photo.id] = photo

return photodict

def get_photo_ids_from_database_file(history_file):
"""
Get all photo ids from the history database file
"""
history = shelve.open(history_file)
return get_photo_ids_from_database(history)

def get_photo_ids_from_database(history):
"""
Get all photo ids from the history database
"""
return list(filter(lambda x: not x.startswith('/'), history.keys()))

def get_photo_paths_from_database(history):
"""
Get all photo paths from the history database
"""
return list(filter(lambda x: x.startswith('/'), history.keys()))

def database_compare(images, image_dir, history_file, absolute_path):
history = shelve.open(history_file)
logging.info("Loading photo database from flickr...")
photos = get_photos_from_flickr()
basepath = image_dir if absolute_path else ''

flickr_ids = set(photos.keys())
database_ids = set(get_photo_ids_from_database(history))
database_paths = set(get_photo_paths_from_database(history))
filesystem_paths = set(map(lambda x: '/' + os.path.relpath(x, image_dir), images))

print("%s photos on flickr, %s photos on disk, %s photos in database" % (len(flickr_ids), len(images), len(database_ids)))

notinfilesystem = list(database_paths - filesystem_paths)
notinfilesystem.sort()
print('####################################################')
print("%s photos in the database not on the filesystem..." % len(notinfilesystem))
for path in notinfilesystem:
print("id=%s: path=%s" % (history[path][0], basepath + path))

notindatabase = list(filesystem_paths - database_paths)
notindatabase.sort()
print('####################################################')
print("%s photos on the filesystem not in the database..." % len(notindatabase))
for path in notindatabase:
print("path=%s" % basepath + path)

notinflickr = list(database_ids - flickr_ids)
notinflickr.sort()
print('####################################################')
print("%s photos in database not on flickr..." % len(notinflickr))
for photoid in notinflickr:
print("id=%s: path=%s" % (photoid, basepath + history[photoid]))

notindatabase = list(flickr_ids - database_ids)
notindatabase.sort()
print('####################################################')
print("%s photos on flickr not in database..." % len(notindatabase))
for photoid in notindatabase:
tags = [tag.raw for tag in photos[photoid].tags]
hashtags = filter(lambda x: x.startswith('#'), tags)
if len(hashtags) > 0:
path = basepath + hashtags[0][1:].replace('#', ' ')
print('id=%s, path=%s, exists_local=%s' % (photoid, path, os.path.isfile(path)))
else:
print('id=%s, tags=%s' % (photoid, ', '. join(tags)))


def reshelf(images, imageDir, historyFile):
"""
Store image reference in the history file if its not there yet and if we
actually can find it on Flickr.
Expand Down Expand Up @@ -146,3 +242,47 @@ def reshelf(images, imageDir, historyFile):
uploaded[ str(image)] = ( str(photo.id), file_mtime, file_size )
uploaded[ str(photo.id) ] =str(image)
uploaded.close()

def delete_photo(filename, image_dir, history_file):
logging.debug('flickr2history: Started delete_photo')
try:
user = flickr.test_login()
logging.debug(user.id)
except:
logging.error(sys.exc_info()[0])
return None

path = '/' + os.path.relpath(filename, image_dir) if filename.startswith(image_dir) else filename
tag = '#' + path.replace(' ', '#')

history = shelve.open(history_file)
found_in_database = history.has_key(path)
photos = flickr.photos_search(user_id=user.id, auth=all, tags=tag, tag_mode='any')

if history.has_key(path) or len(photos) > 0:
if history.has_key(path):
photoid, uploaded, filesize = history[path]
logging.info('Found in database: path=%s, id=%s, uploaded=%s, filesize=%s', path, photoid, uploaded, filesize)
if len(photos) > 0:
for photo in photos:
tags = [tag.raw for tag in photo.tags]
hashtags = filter(lambda x: x.startswith('#'), tags)
logging.info('Found photo on flickr: id=%s, tags=%s', photo.id, ', '.join(hashtags if len(hashtags) > 0 else tags))

delete_confirm = raw_input('Are you sure you want to delete these items (yes/no)? ')
if delete_confirm.lower() == 'yes' or delete_confirm.lower() == 'y':
if history.has_key(path):
photoid, uploaded, filesize = history[path]
logging.info('Deleting database entry: path=%s, id=%s, uploaded=%s, filesize=%s', path, photoid, uploaded, filesize)
del history[path]
if history.has_key(photoid):
logging.info('Deleting database entry: id=%s, path=%s', photoid, history[photoid])
del history[photoid]
if len(photos) > 0:
for photo in photos:
logging.info('Deleting photo: %s', photo.id)
photo.delete()
else:
print('Aborted.')
else:
print('Could not find photo in database or on flickr matching: %s' % path)
60 changes: 58 additions & 2 deletions f2flickr/uploadr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@
from itertools import groupby
from os.path import dirname
import calendar
import argparse

import f2flickr.flickr as flickr
import f2flickr.tags2set as tags2set
from f2flickr.configuration import configdict
from flickr2history import convert_format
from flickr2history import *
from xml.dom import minidom

#
Expand Down Expand Up @@ -79,6 +80,9 @@
mpeg
'''.split())

# Max file size
MAX_FILE_SIZE = int(configdict.get('max_file_size', '1073741824'))

##
## You shouldn't need to modify anything below here
##
Expand Down Expand Up @@ -633,14 +637,36 @@ def grabNewImages(dirname):
continue
ext = f.lower().split(".")[-1]
if ext in ALLOWED_EXT and not ignoreMatch(f, ignoreglobs):
images.append(os.path.normpath(os.path.join(dirpath, f)))
filepath = os.path.normpath(os.path.join(dirpath, f))
filesize = os.path.getsize(filepath)
if filesize > MAX_FILE_SIZE:
logging.info('Skipping %s - %d bytes is over max_file_size' % (filepath, filesize))
continue
images.append(filepath)
images.sort()
return images

def list_history(absolutepath):
"""
Print the history database
"""
history = shelve.open(HISTORY_FILE)
keys = list(history.keys())
keys.sort()

for key in keys:
if key.startswith('/'):
if absolutepath:
print("{}{}".format(IMAGE_DIR, key))
else:
print("{}".format(key))

def main():
"""
Initial entry point for the uploads
"""
global HISTORY_FILE

logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(filename)s:%(lineno)s - %(funcName)20s() %(message)s',
filename='debug.log',
Expand All @@ -655,6 +681,28 @@ def main():
console.setFormatter(logging.Formatter('%(asctime)s %(filename)s:%(lineno)s - %(funcName)20s() %(message)s'))
logging.getLogger('').addHandler(console)


parser = argparse.ArgumentParser()
parser.add_argument('-u', '--upload', action='store_true', help='Upload photos to flickr (default option)')
parser.add_argument('-l', '--list', action='store_true', help='List all the file entries in the history database')
parser.add_argument('-a', '--absolutepath', action='store_true', help='Show absolute file path when listing files')
parser.add_argument('-r', '--reshelf', action='store_true', help='Rebuild the history database')
parser.add_argument('-c', '--compare', action='store_true', help='Compare database entries with filesystem and flickr')
parser.add_argument('-f', '--historyfile', action='store', help='Use the specified history file for operations')
parser.add_argument('-d', '--delete', action='store', help='Delete the specified photo (by path) from the database and flickr')
args = parser.parse_args()

if args.historyfile:
HISTORY_FILE = args.historyfile

if args.list:
list_history(args.absolutepath)
sys.exit()

if args.delete:
delete_photo(args.delete, IMAGE_DIR, HISTORY_FILE)
sys.exit()

uploadinstance = Uploadr()
if not uploadinstance.checkToken():
uploadinstance.authenticate()
Expand All @@ -663,6 +711,14 @@ def main():
images = grabNewImages(IMAGE_DIR)
logging.info('Found %d images' % len(images))

if args.compare:
database_compare(images, IMAGE_DIR, HISTORY_FILE, args.absolutepath)
sys.exit()

if args.reshelf:
reshelf(images, IMAGE_DIR, HISTORY_FILE)
sys.exit()

# Convert history file to new format, if necessary.
logging.info('Converting existing history file to new format, if needed')
convert_format(images, IMAGE_DIR, HISTORY_FILE)
Expand Down
4 changes: 4 additions & 0 deletions uploadr.ini.sample
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ override_dates = 0
#
# secret = 13c314caee8b1f31
# api_key = 91dfde3ed605f6b8b9d9c38886547dcf

#
# Max file size. Any file over max_file_size bytes will be logged and ignored. Defaults to 1GB
max_file_size = 1073741824