This repository has been archived by the owner on May 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
135 lines (117 loc) · 4.32 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
import openai
import pinecone
import tiktoken
import ast
import pandas as pd
from dotenv import load_dotenv
# Configuration
load_dotenv() # Load default environment variables (.env)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
assert OPENAI_API_KEY, "OPENAI_API_KEY environment variable is missing from .env"
openai.api_key = OPENAI_API_KEY
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
assert PINECONE_API_KEY, "PINECONE_API_KEY environment variable is missing from .env"
PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT", "")
assert (
PINECONE_ENVIRONMENT
), "PINECONE_ENVIRONMENT environment variable is missing from .env"
# models
EMBEDDING_MODEL = "text-embedding-ada-002"
GPT_MODEL = "gpt-3.5-turbo"
# Initialize Pinecone
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENVIRONMENT)
table_name = "visionos-docs-2023-07-10"
dimension = 1536
metric = "cosine"
pod_type = "p1"
# Create Pinecone index
if table_name not in pinecone.list_indexes():
print("Creating pinecone index")
pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type)
# Connect to the index
index = pinecone.Index(table_name)
def load_vectors():
print("loading CSV file")
embeddings_path = "./visionos_docs_2023_07_10_embedding.csv"
df = pd.read_csv(embeddings_path)
# convert embeddings from CSV str type back to list type
print("converting it to list type")
df['embedding'] = df['embedding'].apply(ast.literal_eval)
print("writing vectors")
vectors = [(str(row["id"]), row["embedding"]) for i, row in df.iterrows()]
# print(vectors)
for vector in vectors:
index.upsert([vector])
# search function
def strings_ranked_by_relatedness(
query: str,
top_n: int = 100
) -> object:
"""Returns a list of strings and relatednesses, sorted from most related to least."""
query_embedding_response = openai.Embedding.create(
model=EMBEDDING_MODEL,
input=query,
)
query_embedding = query_embedding_response["data"][0]["embedding"]
results = index.query(query_embedding, top_k=top_n, include_metadata=True)
return results
# examples
# results = strings_ranked_by_relatedness("curling gold medal", top_n=5)
# for match in results["matches"]:
# print(match["score"])
def num_tokens(text: str, model: str = GPT_MODEL) -> int:
"""Return the number of tokens in a string."""
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
def query_message(
query: str,
model: str,
token_budget: int
) -> str:
"""Return a message for GPT, with relevant source texts pulled from a dataframe."""
results = strings_ranked_by_relatedness(query)
introduction = 'Use the below articles on Apple visionOS to answer the subsequent question. If the answer cannot be found in the articles, write "I could not find an answer."'
question = f"\n\nQuestion: {query}"
message = introduction
embeddings_path = "./visionos_docs_2023_07_10_text.csv"
df = pd.read_csv(embeddings_path)
for match in results["matches"]:
match_id = int(match['id'])
string = df[df['id']==match_id]['text'].values[0]
next_article = f'\n\nvisionOS document section:\n"""\n{string}\n"""'
if (
num_tokens(message + next_article + question, model=model)
> token_budget
):
break
else:
message += next_article
return message + question
def ask(
query: str,
model: str = GPT_MODEL,
token_budget: int = 4096 - 500,
print_message: bool = False,
) -> str:
"""Answers a query using GPT and a dataframe of relevant texts and embeddings."""
message = query_message(query, model=model, token_budget=token_budget)
if print_message:
print(message)
messages = [
{"role": "system", "content": "You answer questions about Apple VisionOS."},
{"role": "user", "content": message},
]
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=0
)
response_message = response["choices"][0]["message"]["content"]
return response_message
# make index
load_vectors()
# ask question
query = 'How to add a button to open full immersive space using swiftUI with visionOS?'
res = ask(query, print_message=False)
print(f"Q: {query}\nA: {res}")