-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
112 lines (84 loc) · 3.56 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tabulate import tabulate
from PIL import Image, ExifTags
from pillow_heif import register_heif_opener
from progressbar import ProgressBar, Percentage, Bar, ETA
# set dimensiouns of visualizations
WIDTH = 10
HEIGHT = 5
TIMEZONE = 'Europe/Berlin'
# file extentions to ignore
ignore_ext = ('.md', '.mov', '.mp4', '.cr2', '.raf')
# returns specified EXIF tag value for image
def get_field (exif, field):
for (k,v) in exif.items():
if ExifTags.TAGS.get(k) == field:
return v
def create_df(folder):
# creates pandas DatFrame with relevant columns
data_frame = pd.DataFrame(columns=('photo', 'capture_date', 'camera'))
# total number of photos to scan
total_photos = 0
for root, dirs, files in os.walk(folder):
total_photos += len(files)
# loop through all directories and subdirectories
i = 0
pbar = ProgressBar(widgets=["Reading photos", Percentage(), Bar(), ETA()], maxval=total_photos).start()
for subdir, dirs, files in os.walk(folder):
for filename in files:
# ignores hidden files and files in ignore list
if not filename.startswith('.') and not filename.lower().endswith(ignore_ext):
f = os.path.join(subdir, filename)
if os.path.isfile(f):
img = Image.open(f)
img.verify()
# gets raw EXIF data for image
img_exif = img.getexif()
# if image has EXIF data it saves specified tag values in dataframe
if img_exif is None:
print("no EXIF data for " + filename)
else:
capture_date = pd.to_datetime(get_field(img_exif, 'DateTime'), format='%Y:%m:%d %H:%M:%S')
camera = get_field(img_exif, 'Model')
data_frame.loc[i] = [filename, capture_date, camera]
i +=1
pbar.update(i)
pbar.finish()
return data_frame
def show_cam(data_frame):
# adds new column with number of pictures taken for each camera
data_frame = data_frame.groupby(['camera']).size().sort_values(ascending=False)
fig_cam, axs_cam = plt.subplots(figsize=[WIDTH,HEIGHT])
cam = data_frame.plot.bar(ax=axs_cam, title="Photos by Camera", rot=0)
cam.bar_label(cam.containers[0])
def show_date(data_frame):
data_frame['date'] = data_frame['capture_date'].dt.date
data_frame = data_frame.groupby(['date', 'camera']).size().unstack()
# fill empty dates with 0
min_date = pd.to_datetime(data_frame.index).min()
max_date = pd.to_datetime(data_frame.index).max()
date_range = pd.date_range(min_date, max_date)
data_frame = data_frame.reindex(date_range).fillna(0)
plt_date, axs_date = plt.subplots(figsize=[WIDTH, HEIGHT])
data_frame.plot(kind='line', ax=axs_date)
# instantiates argument parser
parser = argparse.ArgumentParser()
parser.add_argument("folder_path", help="path to photos directory which should be scanned")
parser.add_argument("--cam", help="output number of photos by camera", action="store_true")
parser.add_argument("--date", help="output photos taken by camera per day", action="store_true")
args = parser.parse_args()
# function to open HEIF files
register_heif_opener()
# creates pandas DataFrame
df = create_df(args.folder_path)
if args.cam:
show_cam(df)
if args.date:
show_date(df)
print(df.info(memory_usage='deep'))
plt.show()