-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpdf_validation_and_deletionv2.sh
67 lines (56 loc) · 2.21 KB
/
pdf_validation_and_deletionv2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
input_directory="/mnt/k/place-your-target-dir-here" # Replace with the path to your input directory
interval=1000 # Output progress every 1000 files
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
output_dir="/mnt/k/PDF_Validation_Output_${timestamp}"
log_file="Validation_Log_${timestamp}.txt"
mkdir -p "$output_dir" # Create the output directory
echo "Scanning directory: $input_directory"
echo "Logging details to: $output_dir/$log_file"
echo
count=0
total_invalid=0
deleted_count=0
start_time=$(date +%s)
find "$input_directory" -type f -name "*.pdf" | while IFS= read -r pdf_file; do
if [ $((count % interval)) -eq 0 ]; then
if [ $count -gt 0 ]; then
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
estimated_completion=$((elapsed_time * (total_files - count) / count))
fi
echo "Processed: $count files"
echo "Invalid PDFs found: $total_invalid"
echo "Files deleted: $deleted_count"
echo "Last processed file: $pdf_file"
if [ $count -gt 0 ]; then
echo "Estimated time to completion: $estimated_completion seconds"
fi
echo
echo "Processed: $count files" >> "$output_dir/$log_file"
echo "Invalid PDFs found: $total_invalid" >> "$output_dir/$log_file"
echo "Files deleted: $deleted_count" >> "$output_dir/$log_file"
echo "Last processed file: $pdf_file" >> "$output_dir/$log_file"
if [ $count -gt 0 ]; then
echo "Estimated time to completion: $estimated_completion seconds" >> "$output_dir/$log_file"
fi
fi
pdfinfo "$pdf_file" &> /dev/null
if [ $? -ne 0 ]; then
rm "$pdf_file"
echo "Deleted corrupted or invalid PDF: $pdf_file"
echo "Deleted corrupted or invalid PDF: $pdf_file" >> "$output_dir/$log_file"
total_invalid=$((total_invalid + 1))
deleted_count=$((deleted_count + 1))
fi
count=$((count + 1))
done
end_time=$(date +%s)
total_files=$((count))
echo
echo "Scan complete."
echo "Processed: $count files"
echo "Invalid PDFs found: $total_invalid"
echo "Files deleted: $deleted_count"
echo "Time taken: $((end_time - start_time)) seconds"
echo "Output saved to: $output_dir"