richq · keithwharrison · Jun 1, 2017 · Jun 27, 2017
diff --git a/f2flickr/flickr.py b/f2flickr/flickr.py
@@ -962,7 +962,7 @@ def getPhotos(self, per_page='', page='', **extras):
 #for details of each param
 
 #XXX: Could be Photo.search(cls)
-def photos_search(user_id='', auth=False,  tags='', tag_mode='', text='',\
+def photos_search_with_pages(user_id='', auth=False,  tags='', tag_mode='', text='',\
                   min_upload_date='', max_upload_date='',\
                   min_taken_date='', max_taken_date='', \
                   license='', per_page='', page='', sort='',\
@@ -989,26 +989,33 @@ def photos_search(user_id='', auth=False,  tags='', tag_mode='', text='',\
                 photos.append(_parse_photo(photo))
         else:
             photos = [_parse_photo(data.rsp.photos.photo)]
+    return photos, int(data.rsp.photos.pages)
+
+def photos_search(user_id='', auth=False,  tags='', tag_mode='', text='',\
+                  min_upload_date='', max_upload_date='',\
+                  min_taken_date='', max_taken_date='', \
+                  license='', per_page='', page='', sort='',\
+                  safe_search='', content_type='', **kwargs):
+    """Returns a list of Photo objects.
+
+    If auth=True then will auth the user.  Can see private etc
+    """
+
+    photos, pages = photos_search_with_pages(**locals())
+
     return photos
 
 def photos_search_pages(user_id='', auth=False,  tags='', tag_mode='', text='',\
                   min_upload_date='', max_upload_date='',\
                   min_taken_date='', max_taken_date='', \
-                  license='', per_page='', page='', sort=''):
+                  license='', per_page='', page='', sort='',\
+                  safe_search='', content_type='', **kwargs):
     """Returns the number of pages for the previous function (photos_search())
     """
 
-    method = 'flickr.photos.search'
-
-    data = _doget(method, auth=auth, user_id=user_id, tags=tags, text=text,\
-                  min_upload_date=min_upload_date,\
-                  max_upload_date=max_upload_date, \
-                  min_taken_date=min_taken_date, \
-                  max_taken_date=max_taken_date, \
-                  license=license, per_page=per_page,\
-                  page=page, sort=sort)
+    photos, pages = photos_search_with_pages(**locals())
 
-    return data.rsp.photos.pages
+    return pages
 
 def photos_get_recent(extras='', per_page='', page=''):
     """http://www.flickr.com/services/api/flickr.photos.getRecent.html

diff --git a/f2flickr/flickr2history.py b/f2flickr/flickr2history.py
@@ -11,6 +11,7 @@
 import sys
 import os
 import f2flickr.flickr as flickr
+from pprint import pprint
 
 def getPhotoIDbyTag(tag, user):
     """
@@ -101,7 +102,102 @@ def convert_format(images, imageDir, historyFile):
                      (num_images, num_ok, num_not_found, num_converted))
     uploaded.close()
 
-def reshelf(images,  imageDir, historyFile):
+def get_photos_from_flickr():
+    """
+    Get all photo ids from flickr
+    """
+    logging.debug('flickr2history: get_photo_ids_from_flickr')
+    try:
+        user = flickr.test_login()
+        logging.debug(user.id)
+    except:
+        logging.error(sys.exc_info()[0])
+        return None
+
+    per_page = 500
+
+    logging.debug("Fetching page 1...")    
+    photos, pages = flickr.photos_search_with_pages(user_id=user.id, auth=all, per_page=per_page);
+    photodict = {}
+    for photo in photos:
+        photodict[photo.id] = photo
+
+    for page in range(2, pages + 1):
+        logging.debug("Fetching page {}...".format(page))    
+        photos, pages = flickr.photos_search_with_pages(user_id=user.id, auth=all, per_page=per_page, page=page);
+        for photo in photos:
+            photodict[photo.id] = photo
+
+    return photodict
+
+def get_photo_ids_from_database_file(history_file):
+    """
+    Get all photo ids from the history database file
+    """
+    history = shelve.open(history_file)
+    return get_photo_ids_from_database(history)
+
+def get_photo_ids_from_database(history):
+    """
+    Get all photo ids from the history database
+    """
+    return list(filter(lambda x: not x.startswith('/'), history.keys()))
+
+def get_photo_paths_from_database(history):
+    """
+    Get all photo paths from the history database
+    """
+    return list(filter(lambda x: x.startswith('/'), history.keys()))
+
+def database_compare(images, image_dir, history_file, absolute_path):
+    history = shelve.open(history_file)
+    logging.info("Loading photo database from flickr...")
+    photos = get_photos_from_flickr()
+    basepath = image_dir if absolute_path else ''
+
+    flickr_ids = set(photos.keys())
+    database_ids = set(get_photo_ids_from_database(history))
+    database_paths = set(get_photo_paths_from_database(history))
+    filesystem_paths = set(map(lambda x: '/' + os.path.relpath(x, image_dir), images))
+
+    print("%s photos on flickr, %s photos on disk,  %s photos in database" % (len(flickr_ids), len(images), len(database_ids)))
+
+    notinfilesystem = list(database_paths - filesystem_paths)
+    notinfilesystem.sort()
+    print('####################################################')
+    print("%s photos in the database not on the filesystem..." % len(notinfilesystem))
+    for path in notinfilesystem:
+        print("id=%s: path=%s" % (history[path][0], basepath + path))
+
+    notindatabase = list(filesystem_paths - database_paths)
+    notindatabase.sort()
+    print('####################################################')
+    print("%s photos on the filesystem not in the database..." % len(notindatabase))
+    for path in notindatabase:
+        print("path=%s" % basepath + path)
+
+    notinflickr = list(database_ids - flickr_ids)
+    notinflickr.sort()
+    print('####################################################')
+    print("%s photos in database not on flickr..." % len(notinflickr))
+    for photoid in notinflickr:
+        print("id=%s: path=%s" % (photoid, basepath + history[photoid]))
+
+    notindatabase = list(flickr_ids - database_ids)
+    notindatabase.sort()
+    print('####################################################')
+    print("%s photos on flickr not in database..." % len(notindatabase))
+    for photoid in notindatabase:
+        tags = [tag.raw for tag in photos[photoid].tags]
+        hashtags = filter(lambda x: x.startswith('#'), tags)
+        if len(hashtags) > 0:
+            path = basepath + hashtags[0][1:].replace('#', ' ')
+            print('id=%s, path=%s, exists_local=%s' % (photoid, path, os.path.isfile(path)))
+        else:
+            print('id=%s, tags=%s' % (photoid, ', '. join(tags)))
+
+
+def reshelf(images, imageDir, historyFile):
     """
     Store image reference in the history file if its not there yet and if we
     actually can find it on Flickr.
@@ -146,3 +242,47 @@ def reshelf(images,  imageDir, historyFile):
         uploaded[ str(image)] = ( str(photo.id), file_mtime, file_size )
         uploaded[ str(photo.id) ] =str(image)
         uploaded.close()
+
+def delete_photo(filename, image_dir, history_file):
+    logging.debug('flickr2history: Started delete_photo')
+    try:
+        user = flickr.test_login()
+        logging.debug(user.id)
+    except:
+        logging.error(sys.exc_info()[0])
+        return None
+
+    path = '/' + os.path.relpath(filename, image_dir) if filename.startswith(image_dir) else filename
+    tag = '#' + path.replace(' ', '#')
+
+    history = shelve.open(history_file)
+    found_in_database = history.has_key(path)
+    photos = flickr.photos_search(user_id=user.id, auth=all, tags=tag, tag_mode='any')
+
+    if history.has_key(path) or len(photos) > 0:
+        if history.has_key(path):
+            photoid, uploaded, filesize = history[path]
+            logging.info('Found in database: path=%s, id=%s, uploaded=%s, filesize=%s', path, photoid, uploaded, filesize)
+        if len(photos) > 0:
+            for photo in photos:
+                tags = [tag.raw for tag in photo.tags]
+                hashtags = filter(lambda x: x.startswith('#'), tags)
+                logging.info('Found photo on flickr: id=%s, tags=%s', photo.id, ', '.join(hashtags if len(hashtags) > 0 else tags))
+
+        delete_confirm = raw_input('Are you sure you want to delete these items (yes/no)? ')
+        if delete_confirm.lower() == 'yes' or delete_confirm.lower() == 'y':
+            if history.has_key(path):
+                photoid, uploaded, filesize = history[path]
+                logging.info('Deleting database entry: path=%s, id=%s, uploaded=%s, filesize=%s', path, photoid, uploaded, filesize)
+                del history[path]
+                if history.has_key(photoid):
+                    logging.info('Deleting database entry: id=%s, path=%s', photoid, history[photoid])
+                    del history[photoid]
+            if len(photos) > 0:
+                for photo in photos:
+                    logging.info('Deleting photo: %s', photo.id)
+                    photo.delete()
+        else:
+            print('Aborted.')
+    else:
+        print('Could not find photo in database or on flickr matching: %s' % path)
diff --git a/f2flickr/uploadr.py b/f2flickr/uploadr.py
@@ -36,11 +36,12 @@
 from itertools import groupby
 from os.path import dirname
 import calendar
+import argparse
 
 import f2flickr.flickr as flickr
 import f2flickr.tags2set as tags2set
 from f2flickr.configuration import configdict
-from flickr2history import convert_format
+from flickr2history import *
 from xml.dom import minidom
 
 #
@@ -79,6 +80,9 @@
 mpeg
 '''.split())
 
+# Max file size
+MAX_FILE_SIZE = int(configdict.get('max_file_size', '1073741824'))
+
 ##
 ##  You shouldn't need to modify anything below here
 ##
@@ -633,14 +637,36 @@ def grabNewImages(dirname):
                 continue
             ext = f.lower().split(".")[-1]
             if ext in ALLOWED_EXT and not ignoreMatch(f, ignoreglobs):
-                images.append(os.path.normpath(os.path.join(dirpath, f)))
+                filepath = os.path.normpath(os.path.join(dirpath, f))
+                filesize = os.path.getsize(filepath)
+                if filesize > MAX_FILE_SIZE:
+                    logging.info('Skipping %s - %d bytes is over max_file_size' % (filepath, filesize))
+                    continue
+                images.append(filepath)
     images.sort()
     return images
 
+def list_history(absolutepath):
+    """
+    Print the history database
+    """
+    history = shelve.open(HISTORY_FILE)
+    keys = list(history.keys())
+    keys.sort()
+
+    for key in keys:
+        if key.startswith('/'):
+            if absolutepath:
+                print("{}{}".format(IMAGE_DIR, key))
+            else:
+                print("{}".format(key))
+
 def main():
     """
     Initial entry point for the uploads
     """
+    global HISTORY_FILE
+
     logging.basicConfig(level=logging.DEBUG,
                 format='%(asctime)s %(levelname)s %(filename)s:%(lineno)s - %(funcName)20s() %(message)s',
                 filename='debug.log',
@@ -655,6 +681,28 @@ def main():
     console.setFormatter(logging.Formatter('%(asctime)s %(filename)s:%(lineno)s - %(funcName)20s() %(message)s'))
     logging.getLogger('').addHandler(console)
 
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-u', '--upload', action='store_true', help='Upload photos to flickr (default option)')
+    parser.add_argument('-l', '--list', action='store_true', help='List all the file entries in the history database')
+    parser.add_argument('-a', '--absolutepath', action='store_true', help='Show absolute file path when listing files')
+    parser.add_argument('-r', '--reshelf', action='store_true', help='Rebuild the history database')
+    parser.add_argument('-c', '--compare', action='store_true', help='Compare database entries with filesystem and flickr')
+    parser.add_argument('-f', '--historyfile', action='store', help='Use the specified history file for operations')
+    parser.add_argument('-d', '--delete', action='store', help='Delete the specified photo (by path) from the database and flickr')
+    args = parser.parse_args()
+
+    if args.historyfile:
+        HISTORY_FILE = args.historyfile
+
+    if args.list:
+        list_history(args.absolutepath)
+        sys.exit()
+
+    if args.delete:
+        delete_photo(args.delete, IMAGE_DIR, HISTORY_FILE)
+        sys.exit()
+
     uploadinstance = Uploadr()
     if not uploadinstance.checkToken():
         uploadinstance.authenticate()
@@ -663,6 +711,14 @@ def main():
     images = grabNewImages(IMAGE_DIR)
     logging.info('Found %d images' % len(images))
 
+    if args.compare:
+        database_compare(images, IMAGE_DIR, HISTORY_FILE, args.absolutepath)
+        sys.exit()
+
+    if args.reshelf:
+        reshelf(images, IMAGE_DIR, HISTORY_FILE)
+        sys.exit()     
+
     # Convert history file to new format, if necessary.
     logging.info('Converting existing history file to new format, if needed')
     convert_format(images, IMAGE_DIR, HISTORY_FILE)

diff --git a/uploadr.ini.sample b/uploadr.ini.sample
@@ -52,3 +52,7 @@ override_dates = 0
 #
 # secret = 13c314caee8b1f31
 # api_key = 91dfde3ed605f6b8b9d9c38886547dcf
+
+#
+# Max file size.  Any file over max_file_size bytes will be logged and ignored. Defaults to 1GB
+max_file_size = 1073741824