-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_mk_selected_fields_issues.py
29 lines (24 loc) · 1.17 KB
/
2_mk_selected_fields_issues.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import json
# second script to change the original data
# This code only selects the fields required to make search engine , and dropping other columns.
# Path to the filtered JSONL file
input_file = 'filtered_issues.jsonl'
# Path to the output JSONL file with selected fields
output_file = 'selected_fields_issues.jsonl'
# Open the input and output files
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
# Iterate over each line in the input file
for line in infile:
# Convert the JSON line to a Python dictionary
data = json.loads(line)
# Extract only the selected fields
selected_data = {
'html_url': data.get('html_url', ''),
'title': data.get('title', ''),
'comments': data.get('comments', 0),
'body': data.get('body', ''),
'number': data.get('number', 0)
}
# Write the filtered data to the output file
outfile.write(json.dumps(selected_data) + '\n')
print(f'Created {output_file} with selected fields.') # saves the selected_fields_issues.jsonl to the local