Skip to content

Commit 58e74e4

Browse files
Search zip ecr's for keywords (#1392)
* working gui search * added selection for doc types * Update zip search * Remove uneeded comment --------- Co-authored-by: Lina Roth <[email protected]> Co-authored-by: Lina Roth <[email protected]>
1 parent 396cb92 commit 58e74e4

File tree

4 files changed

+168
-6
lines changed

4 files changed

+168
-6
lines changed

package-lock.json

Lines changed: 0 additions & 6 deletions
This file was deleted.

utils/package-lock.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

utils/package.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"name": "utils",
3+
"scripts": {
4+
"zip-search": "python zip-search/zip-search-gui.py"
5+
}
6+
}

utils/zip-search/zip-search-gui.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
import glob
2+
import os
3+
import shutil
4+
import tkinter as tk
5+
import zipfile
6+
from tkinter import filedialog
7+
8+
9+
class ZipSearcher(tk.Tk):
10+
def __init__(self):
11+
super().__init__()
12+
self.title("Zip File Searcher")
13+
self.geometry("500x500")
14+
15+
# Variables
16+
self.source_dir = None
17+
self.output_dir = None
18+
self.search_term = None
19+
20+
# UI Components
21+
self.create_widgets()
22+
23+
def create_widgets(self):
24+
# Source directory selection button
25+
self.source_button = tk.Button(
26+
self, text="Select Source Directory", command=self.select_source_dir
27+
)
28+
self.source_button.pack(pady=10)
29+
30+
# Output directory selection button
31+
self.output_button = tk.Button(
32+
self, text="Select Output Directory", command=self.select_output_dir
33+
)
34+
self.output_button.pack(pady=10)
35+
36+
# Search text
37+
self.search_entry_label = tk.Label(self, text="Search Term")
38+
self.search_entry_label.pack(pady=(10, 0))
39+
self.search_entry = tk.Entry(self)
40+
self.search_entry.pack(pady=(0, 10))
41+
42+
# Files to search through
43+
self.file_type_label = tk.Label(self, text="Document names")
44+
self.file_type_label.pack(pady=(10, 0))
45+
self.entries = []
46+
self.init_entries()
47+
48+
self.add_button = tk.Button(self, text="Add New Field", command=self.add_entry)
49+
self.add_button.pack(pady=10)
50+
51+
# Button to start search
52+
self.search_button = tk.Button(
53+
self, text="Start Search", command=self.start_search
54+
)
55+
self.search_button.pack(pady=10)
56+
57+
# Define labels for displaying selected directories
58+
self.source_dir_label = tk.Label(
59+
self, text="No source directory selected", fg="white"
60+
)
61+
self.source_dir_label.pack()
62+
63+
self.output_dir_label = tk.Label(
64+
self, text="No output directory selected", fg="white"
65+
)
66+
self.output_dir_label.pack()
67+
68+
self.search_term_label = tk.Label(self, text="")
69+
self.search_term_label.pack(pady=(0, 0))
70+
71+
self.output_results_label = tk.Label(self, text="", fg="white")
72+
self.output_results_label.pack(pady=10)
73+
74+
def init_entries(self):
75+
initial_values = ["CDA_eICR.xml", "CDA_RR.xml"]
76+
for value in initial_values:
77+
self.add_entry(predefined_value=value)
78+
79+
def add_entry(self, predefined_value=""):
80+
# Frame to hold the entry and remove button
81+
entry_frame = tk.Frame(self)
82+
entry_frame.pack(pady=5)
83+
84+
# Entry widget
85+
entry = tk.Entry(entry_frame, width=40)
86+
entry.insert(0, predefined_value) # Prepopulate with a value if provided
87+
entry.pack(side=tk.LEFT, padx=5)
88+
self.entries.append(entry) # Keep track of this entry
89+
90+
# Button to remove the entry
91+
remove_button = tk.Button(
92+
entry_frame,
93+
text="Remove",
94+
command=lambda: self.remove_entry(entry_frame, entry),
95+
)
96+
remove_button.pack(side=tk.LEFT, padx=5)
97+
98+
def remove_entry(self, frame, entry):
99+
# Remove the entry from the tracking list and destroy the frame
100+
self.entries.remove(entry)
101+
frame.destroy()
102+
103+
def select_source_dir(self):
104+
self.source_dir = filedialog.askdirectory()
105+
print("Selected source directory:", self.source_dir)
106+
self.source_dir_label.config(text=f"Source Directory: {self.source_dir}")
107+
108+
def select_output_dir(self):
109+
self.output_dir = filedialog.askdirectory()
110+
print("Selected output directory:", self.output_dir)
111+
self.output_dir_label.config(text=f"Output Directory: {self.output_dir}")
112+
113+
def start_search(self):
114+
self.search_term = self.search_entry.get()
115+
print("search term:")
116+
print(self.search_term)
117+
self.search_term_label.config(text=f"Searching for: {self.search_term}")
118+
if self.source_dir and self.output_dir and self.search_term:
119+
self.search_in_zips()
120+
else:
121+
print(
122+
"Please select both source and output directories "
123+
+ "and enter a search term."
124+
)
125+
126+
def search_in_zips(self):
127+
# Your search logic here, adapted to use self.source_dir, self.output_dir,
128+
# and self.search_term
129+
130+
# Example adapted from your provided script
131+
results_count = 0
132+
for filename in glob.glob(os.path.join(self.source_dir, "*.zip")):
133+
with zipfile.ZipFile(filename) as zip_ref:
134+
try:
135+
for doc_type in self.entries:
136+
doc = zip_ref.open(doc_type.get())
137+
doc_data = doc.read().decode("utf-8")
138+
if self.search_term in doc_data:
139+
id_num = os.path.basename(filename)
140+
shutil.copy(filename, os.path.join(self.output_dir, id_num))
141+
results_count += 1
142+
except KeyError:
143+
print("No eICR/RR here")
144+
pass
145+
print("done searching")
146+
print(f"Found {results_count} result(s)")
147+
self.output_results_label.config(text=f"Found {results_count} result(s)")
148+
149+
150+
if __name__ == "__main__":
151+
app = ZipSearcher()
152+
app.mainloop()

0 commit comments

Comments
 (0)