-
Notifications
You must be signed in to change notification settings - Fork 0
/
process-video
101 lines (83 loc) · 2.61 KB
/
process-video
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/bin/bash
# If the filename is missing, explain that it's required.
if [ -z "$1" ]; then
echo "usage: $0 [filename] [chamber], omitting .mp4 extension"
exit
fi
# If the chamber hasn't been specified, try to guess it.
if [ -z "$2" ]; then
FILE_PATH=$(dirname $(cd "$(dirname "$1.mp4")"; pwd)/$(basename "$1.mp4"))
if [[ "$FILE_PATH" == *"house"* ]]; then
CHAMBER="house"
elif [[ "$FILE_PATH" == *"senate"* ]]; then
CHAMBER="senate"
elif [[ ${1:0:1} == "h" ]]; then
CHAMBER="house"
elif [[ ${1:0:1} == "s" ]]; then
CHAMBER="senate"
else
echo "usage: $0 [filename] [chamber], omitting .mp4 extension"
exit
fi
fi
# Reassign command-line variables to named variables.
SRC="$1"
if [ -z "$CHAMBER" ]; then
CHAMBER="$2"
fi
if [ ! -f "$SRC".mp4 ]; then
echo "Error: $SRC.mp4 does not exist"
exit 1;
fi
if [ "$CHAMBER" = "house" ]; then
FRAMESTEP=150
elif [ "$CHAMBER" = "senate" ]; then
FRAMESTEP=60
else
echo "The chamber must be either 'house' or 'senate'."
exit
fi
# Have mplayer create a folder full of screenshots.
if ! mplayer -vf framestep="$FRAMESTEP" -framedrop -nosound "$SRC".mp4 -speed 100 -vo jpeg:outdir="$SRC"
then
exit "$?"
fi
echo "Extracting names and bill numbers from each frame"
cd "$SRC" || exit
# Standardize screenshot dimensions
if ! mogrify -resize 640x480 ./*
then
echo "Couldn't resize all images"
exit "$?"
fi
if [ "$CHAMBER" = "house" ]; then
NAME_CROP="333x60+120+380"
BILL_CROP="129x27+465+42"
else
NAME_CROP="345x60+172+340"
BILL_CROP="172x27+0+40"
fi
for f in *[0-9].jpg; do convert "$f" -crop "$NAME_CROP" +repage -negate -fx '.8*r+.8*g+0*b' -compress none -depth 8 "$f".name.jpg; done
for f in *[0-9].jpg; do convert "$f" -crop "$BILL_CROP" +repage -negate -fx '.8*r+.8*g+0*b' -compress none -depth 8 "$f".bill.jpg; done
echo OCRing names and bill numbers
# We do this in two steps to avoid exceeding the limits of ls.
find . -type f -name '*.name.jpg' -exec tesseract {} {} \;
find . -type f -name '*.bill.jpg' -exec tesseract {} {} \;
# Make sure we have the privileges to delete files, later, during the processing
# stage (after this script is run).
chgrp web ./*
chmod g+w ./*
# Delete all of the images that we just OCRed.
find . -type f -name '*.name.jpg' -exec rm {} \;
find . -type f -name '*.bill.jpg' -exec rm {} \;
# Make the text files world-writable, so that the web server can delete them
# post-import.
chmod 777 ./*.txt
# Duplicate all JPEGs with a -150 suffix.
for F in $(ls -1 ./*.jpg |awk -F. '{print $1}')
do
cp "$F".jpg "${F}"-150.jpg
done
# Create thumbnails of all *150.jpg screenshots.
echo Creating thumbnails of screenshots
mogrify -resize 150x112 ./*-150.jpg