-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt-test.py
89 lines (74 loc) · 3.16 KB
/
gpt-test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import openai
import os
import json
import time
openai.api_key = ""
def get_completion(prompt, model="gpt-3.5-turbo", temperature=0):
messages = [{"role": "user", "content": prompt}]
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=temperature,
)
return response.choices[0].message["content"]
json_sample = {
"name": "",
"email": "",
"phone": "",
"address": "",
"linkedin URL": "",
"github URL": "",
"education": [
{
"education level": "",
"specialization": "",
"university name": "",
"duration": "",
"marks/percentage/cgpa obtained": "",
"additional information": "",
},
],
"work_experience": [
{
"title": "",
"company": "",
"start_date": "",
"end_date": "",
"description": ""
},
],
"skills": [],
"certifications": [],
"projects": [],
"extracurricular_activities": [],
}
def main():
input_directory = '#text files directory'
output_directory = '#output directory'
# Get a list of all PDF files in the input directory
pdf_files = [file for file in os.listdir(input_directory) if file.endswith('.txt')]
# Initialize a counter variable to keep track of the file number
file_count = 0
for pdf_file in pdf_files:
file_count += 1
print(f"Processing File {file_count}/{len(pdf_files)} - {pdf_file}")
# Construct the full path of the PDF file
txt_file_path = os.path.join(input_directory, pdf_file)
with open(txt_file_path, 'r', encoding='utf-8') as f:
pdf_text = f.read()
prompt = f"""Extract details from the text of resume delimited by angle brackets into a JSON. For any details that are not found, use the word 'Unknown'.
The JSON should have a similar structure to the keys delimited by triple backticks but it can have multiple entries of details in each section.
Education section must capture granular details where education level refers to the degree level for example Bachelors, Masters or PhD or such synonymous acronyms and specialisation refers to the major or course name.
```{json_sample}``` <{pdf_text}>"""
start_time = time.time()
response = get_completion(prompt)
end_time = time.time()
execution_time = end_time - start_time
print(f"Response received in {execution_time:.2f} seconds")
file_name = os.path.splitext(pdf_file)[0] + '_processed.json'
json_file_path = os.path.join(output_directory, file_name)
with open(json_file_path, 'w', encoding='utf-8') as f:
f.write(response)
print("prompt executed, json saved")
if __name__ == "__main__":
main()