-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathInvestorGPT.py
349 lines (291 loc) · 13.1 KB
/
InvestorGPT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
PERPLEXITY_API = ""
import json
import time
from googlesearch import search
from bs4 import BeautifulSoup
import re
import requests
import yfinance as yf
from datetime import date, timedelta
from stock_data_loader import *
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
################################################################################################
# Helper funcs
################################################################################################
def goog_query_str(company_name):
today = date.today()
yesterday = today - timedelta(days=1)
yesterday = yesterday.strftime('%Y-%m-%d')
query = f"{company_name} stock dropped after:{yesterday}"
print(query)
try:
search_results = search(query,num_results=5,advanced=True)
top_links = []
for sr in search_results:
if f"{company_name}" in sr.title or f"{company_name}" in sr.description:
top_links.append(sr.url)
top_links = list(search_results)
#print(top_links)
except Exception as e:
print(f"Google1 failed: {e}")
scraped_texts = []
for link in top_links:
print(link)
try:
page = requests.get(link)
soup = BeautifulSoup(page.content, 'html.parser')
text = ' '.join([p.get_text() for p in soup.find_all('p')])
except Exception as e:
print(f"Failed to scrape the link: {link}\nError: {e}")
scraped_texts.append(text)
all_scraped_text = '.\n'.join(scraped_texts)
return all_scraped_text
def get_company_name(ticker):
user_prompt = f"""
Answer only with the company name and nothing else.
Do not answer anything except company name, no commas, periods, punctuation or explanation.
Answer only with the company name.
Question: What is the company name that has {ticker} stock ticker?
Company name:
"""
url = "https://api.perplexity.ai/chat/completions"
payload = {
"model": "sonar-small-online",
"temperature": 0,
"messages": [
{
"role": "user",
"content": user_prompt
}
]
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": "Bearer " + PERPLEXITY_API
}
response = requests.post(url, json=payload, headers=headers)
json_data = response.text
parsed_json = json.loads(json_data)
company_name = parsed_json["choices"][0]["message"]["content"]
return company_name
def llm_call(prompt):
url = "https://api.perplexity.ai/chat/completions"
payload = {
"model": "sonar-medium-chat",
"temperature": 0,
"messages": [
{
"role": "system",
"content": "Be precise and concise."
},
{
"role": "user",
"content": prompt
}
]
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": "Bearer " + PERPLEXITY_API
}
response = requests.post(url, json=payload, headers=headers)
json_data = response.text
parsed_json = json.loads(json_data)
answer = parsed_json["choices"][0]["message"]["content"]
return answer
################################################################################################
# Full Step 1 is this func: get losers ticker and percentage drop
################################################################################################
def get_losers():
try:
url = "https://finance.yahoo.com/losers"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'class': 'W(100%)'})
data = []
for row in table.find_all('tr')[1:]:
cells = row.find_all('td')
ticker = cells[0].text.strip()
percentage_drop = cells[4].text.strip()
percentage_drop = float(percentage_drop.replace("-", "").replace("%", ""))
data.append((ticker, percentage_drop))
return data
except Exception as e:
url = "https://stockanalysis.com/markets/losers/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'class': 'main-table'})
data = []
for row in table.find_all('tr')[1:]:
cells = row.find_all('td')
ticker = cells[1].text.strip()
percentage_drop = cells[3].text.strip()
percentage_drop = float(percentage_drop.replace("-", "").replace("%", ""))
data.append((ticker, percentage_drop))
#data.append(ticker)
return data
################################################################################################
# Full Step 2 is this func: get top 2 google results for "{ticker} stock fell"
################################################################################################
def why_stock_fell(company_name):
context = goog_query_str(company_name)
prompt = f"""
Context: {context}\n
Question: Based on the Context, answer with bullet points and give detailed analysis the precise reasons why {company_name} stock price dropped. Why did {company_name} stock price fell?
Answer:
"""
answer = llm_call(prompt)
return answer
################################################################################################
# Full Step 3: Get 10k and 10q reports and vectorize them (rag pipeline)
################################################################################################
# Download 10-Ks and 10-Qs
# Vectorize them
# Ask 20+ questions to them using semantic search + LLM
# summarize the answer into coherent and not so long text
################################################################################################
# Full Step 4: Analyze fundamentals
################################################################################################
# Use some kind of a tool to download and analyze 3 Financial Reports
# Get their main metrics and ratios
# Compare them to competitors
def get_book_value(ticker):
if "." in ticker:
ticker = ticker.split(".")[0]
company = yf.Ticker(ticker)
balance_sheet = company.balance_sheet
# getting book value
balance_sheet = balance_sheet.dropna(how="any")
balance_sheet = balance_sheet.iloc[:, :1]
total_assets = balance_sheet.loc["Total Assets"][0]
total_liabilities = balance_sheet.loc["Total Liabilities Net Minority Interest"][0]
#print("Total Assets in dollars: ",total_assets)
#print("Total Liabilities Net Minority Interest in dollars: ",total_liabilities)
book_value = total_assets - total_liabilities
return book_value
def get_market_cap(ticker):
if "." in ticker:
ticker = ticker.split(".")[0]
company = yf.Ticker(ticker)
balance_sheet = company.balance_sheet
# getting book value
balance_sheet = balance_sheet.dropna(how="any")
balance_sheet = balance_sheet.iloc[:, :1]
market_cap = balance_sheet.loc["Total Capitalization"][0]
#print("Total Capitalization in dollars as of latest balance sheet: ",market_cap)
return market_cap
def get_net_value(ticker):
book_value = get_book_value(ticker)
market_cap = get_market_cap(ticker)
net_value = book_value - market_cap
return net_value
def get_stock_numeric_rating(ticker, csv_file_name):
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_name)
# Filter the DataFrame to find the row with the given ticker
filtered_df = df[df['Ticker'] == ticker]
# Extract the "Overall Rating" value for the given ticker
# Assuming there's only one row for each ticker, and it's the first column
overall_rating = filtered_df.iloc[0]['Overall Rating']
return overall_rating
#def get_stock_txt_rating(ten_k,ten_q):
def get_stock_txt_rating(company_name):
# Uses Embeddings + LLMs to ask ~20 questions to company's 10-Ks and 10-Qs and assess its overall health
prompt = f"What is overall financial health of {company_name}?"
url = "https://api.perplexity.ai/chat/completions"
payload = {
"model": "sonar-small-online",
"temperature": 0,
"messages": [
{
"role": "user",
"content": prompt
}
]
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": "Bearer " + PERPLEXITY_API
}
response = requests.post(url, json=payload, headers=headers)
json_data = response.text
parsed_json = json.loads(json_data)
answer = parsed_json["choices"][0]["message"]["content"]
return answer
################################################################################################
# Full Step 5: Estimate a chance for a stock to fix the problem
################################################################################################
# using monte carlo simulation and bayes rule
# based on the stock fundamentals
# reasons why stock fell
# company's unique advantages
# company's health compared to competitotrs
# general market trends
# General idea:
# 1. Probability of recovering stock price due to health of fundamentals (0-1)
# p(H) = probability to recover stock price from bad news (general case)
# p(E) = probability of a company having an E financial health (health is the value from 0 to 1)
# P(E|H) = probability of a company to have an E financial health after recovering stock price from bad news
# General formula :
### P(H|E) = ( P(E|H) * P(H) ) / P(E)
#
# 2. Probability of recovering stock price due to how bad are the news
# 3. average?
# NOT USED NOT USED
'''
def chance_to_recover(company_name,ticker):
why_fell = why_stock_fell(ticker)
stock_n_rating = get_stock_numeric_rating(ticker, "StockRatings.csv")
print(f"{company_name} Overall Financial score: {stock_n_rating}")
stock_txt_rating = get_stock_txt_rating(company_name)
print(f"{company_name} Overall Financial health: {stock_txt_rating}")
prompt = f"""You are the greatest and most competent financial analyst that can understand and predict company's future.
Read this context about the company:\n
Reason {company_name} stock fell last 24 hours: {why_fell}.\n
{company_name} overall description: {stock_txt_rating}\n
{company_name} overall financial rating from world class analytical experts: {stock_n_rating} out of 100.\n
Question: What is the chance of a company to recover its stock price based on the reason the stock fell, company description, company financial health?
Answer from a financial expert:
"""
result = llm_call(prompt)
print(f"{company_name} chances to recover stock price: {result}")
return result
'''
################################################################################################
# Full Step 6: Calculate end value of a company
################################################################################################
def main():
losers_data = get_losers()
print("losers gathered")
sorted_losers_data = sorted(losers_data, key=lambda x: x[1], reverse=True)
top_3_losers = sorted_losers_data[:3]
# Print the extracted data
for ticker, percentage_drop in top_3_losers:
print("###########################################################################\n")
company_name = get_company_name(ticker)
print(f"{company_name} ({ticker}) -{percentage_drop}%")
why_fell = why_stock_fell(company_name)
print(f"{company_name} Stock drop reasons: {why_fell}")
net_value = get_net_value(ticker)
print(f"{company_name} Net Value (Book - MarketCap): {net_value}")
stock_n_rating = get_stock_numeric_rating(ticker, "StockRatings.csv")
print(f"{company_name} Overall Funamental Financials health score: {stock_n_rating}")
stock_txt_rating = get_stock_txt_rating(company_name)
#print(f"{company_name} Overall Financial health: {stock_txt_rating}")
prompt = f"""You are the greatest and most competent financial analyst that can understand and predict company's future.
Read this context about the company:\n
Reason {company_name} stock fell last 24 hours: {why_fell}.\n
{company_name} overall description: {stock_txt_rating}\n
{company_name} overall financial rating from world class analytical experts: {stock_n_rating} out of 100.\n
Question: What is the chance of a company to recover its stock price based on the reason the stock fell, company description, company financial health?
Answer from a financial expert:
"""
answer = llm_call(prompt)
print(f"{company_name} chances to recover: {answer}\n")
if __name__ == "__main__":
main()