-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
164 lines (134 loc) · 5.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
import os
import shutil
from typing import Dict
import pandas as pd
import evadb
DEFAULT_FILE_PATH = "data/user_complaints.txt"
DEFAULT_QUERY = (
"The keyboard on my laptop is typing the wrong letters and it's driving me crazy!"
)
DEFAULT_STRUCTURE_FORMAT = [
[
"Issue Category",
"What category the issue belongs to",
"One of (hardware, software)",
],
[
"Raw Issue String",
"Raw String containing the exact input given by the user",
"string",
],
["Issue Component", "Component that is causing the issue", "string"],
]
def receive_user_input() -> Dict:
"""Receives user input.
Returns:
user_input (dict): global configurations
"""
print(
"🔮 Welcome to EvaDB-STRUCTURE-GPT! This app lets you provide an unstructured text file and it will generate a structured dataframe for you using that file"
)
user_input = dict()
unstructured_query = str(
input("Enter the user complaint query you want to structure: ")
)
if unstructured_query == "":
unstructured_query = DEFAULT_QUERY
user_input["unstructured_query"] = unstructured_query
# Add input for prompt, right now not needed as such
# get OpenAI key if needed
try:
api_key = os.environ["OPENAI_KEY"]
except KeyError:
api_key = str(input("🔑 Enter your OpenAI key: "))
os.environ["OPENAI_KEY"] = api_key
return user_input
def create_prompt(extra_prompt_line, user_input):
# TODO :- Enable storing historical context
base_prompt = """
You are given a user query extracted from a customer support chatbot. Your task is to extract the following fields from the text and return the result in json format.
"Issue Category, What category the issue belongs to, Issue category HAS TO BE one of (hardware, software)"
"Raw Issue String, Raw String containing the exact input given by the user, string"
"Issue Component, Component that is causing the issue, string"
\n
The query is as follows:
"""
if extra_prompt_line == "":
return base_prompt
else:
return base_prompt + extra_prompt_line
def generate_response(cursor, prompt):
"""Generates question response with llm.
Args:
cursor (EVADBCursor): evadb api cursor.
question (str): question to ask to llm.
Returns
str: response from llm.
"""
response = cursor.table("unstructuredtable").select(f"ChatGPT('{prompt}', text)").df()["chatgpt.response"][0]
print(response)
return response
#return cursor.table("unstructuredtable").select(f"ChatGPT({prompt}, text)")
def cleanup():
"""Removes any temporary file / directory created by EvaDB."""
if os.path.exists("evadb_data"):
shutil.rmtree("evadb_data")
if __name__ == "__main__":
# receive input from user
user_input = receive_user_input()
try:
# establish evadb api cursor
cursor = evadb.connect().cursor()
cursor.drop_table("unstructuredtable", if_exists=True).execute()
cursor.query(
"""CREATE TABLE IF NOT EXISTS unstructuredtable (text TEXT(150));"""
).execute()
# TODO : Add back when lifting stuff from a file
cursor.query(
"""INSERT INTO unstructuredtable (text) VALUES ("{}");""".format(
user_input["unstructured_query"]
)
).execute()
# Add something about getting a file and read it
print("===========================================")
ready = True
while ready:
extra_prompt_line = str(
input(
"If not satisfied by the query add more context. This will be appended to the end of the query (enter 'exit' to exit): "
)
)
prompt = create_prompt(extra_prompt_line, user_input)
if extra_prompt_line.lower() == "exit":
ready = False
else:
# Generate response with chatgpt udf
print("⏳ Generating response (may take a while)...")
response = generate_response(cursor, prompt)
print("+--------------------------------------------------+")
print("✅ Answer:")
print(response)
print("+--------------------------------------------------+")
cleanup()
print("✅ Session ended.")
print("===========================================")
except Exception as e:
cleanup()
print("❗️ Session ended with an error.")
print(e)
print("===========================================")