Skip to content

Commit 31ac6ab

Browse files
Init commit
1 parent c833b83 commit 31ac6ab

13 files changed

+6383
-0
lines changed

backtesting.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Preprocessing
2+
import numpy as np
3+
import pandas as pd
4+
from sklearn.model_selection import train_test_split
5+
from sklearn.ensemble import RandomForestClassifier
6+
from sklearn.metrics import precision_score
7+
from utils import status_calc
8+
9+
10+
def backtest():
11+
data_df = pd.read_csv("keystats.csv", index_col='Date')
12+
data_df.dropna(axis=0, how='any', inplace=True)
13+
14+
features = data_df.columns[6:]
15+
X = data_df[features].values
16+
17+
y = list(status_calc(data_df["stock_p_change"],
18+
data_df["SP500_p_change"],
19+
outperformance=10))
20+
21+
z = np.array(data_df[["stock_p_change", "SP500_p_change"]])
22+
23+
X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(
24+
X, y, z, test_size=0.2)
25+
26+
clf = RandomForestClassifier(n_estimators=100, random_state=0)
27+
clf.fit(X_train, y_train)
28+
29+
y_pred = clf.predict(X_test)
30+
print("Classifier performance\n", "=" * 20)
31+
print(f"Accuracy score: {clf.score(X_test, y_test): .2f}")
32+
print(f"Precision score: {precision_score(y_test, y_pred): .2f}")
33+
34+
num_positive_predictions = sum(y_pred)
35+
if num_positive_predictions < 0:
36+
print("No stocks predicted!")
37+
38+
stock_returns = 1 + z_test[y_pred, 0] / 100
39+
market_returns = 1 + z_test[y_pred, 1] / 100
40+
41+
avg_predicted_stock_growth = sum(stock_returns) / num_positive_predictions
42+
index_growth = sum(market_returns) / num_positive_predictions
43+
percentage_stock_returns = 100 * (avg_predicted_stock_growth - 1)
44+
percentage_market_returns = 100 * (index_growth - 1)
45+
total_outperformance = percentage_stock_returns - percentage_market_returns
46+
47+
print("\n Stock prediction performance report \n", "=" * 40)
48+
print(f"Total Trades:", num_positive_predictions)
49+
print(
50+
f"Average return for stock predictions: {percentage_stock_returns: .1f} %")
51+
print(
52+
f"Average market return in the same period: {percentage_market_returns: .1f}% ")
53+
print(
54+
f"Compared to the index, our strategy earns {total_outperformance: .1f} percentage points more")
55+
56+
57+
if __name__ == "__main__":
58+
backtest()

current_data.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import pandas as pd
2+
import os
3+
import re
4+
import time
5+
import requests
6+
import numpy as np
7+
from tqdm import tqdm
8+
from utils import data_string_to_float
9+
10+
statspath = "intraQuarter/_KeyStats/"
11+
12+
features = [ # Valuation measures
13+
'Market Cap',
14+
'Enterprise Value',
15+
'Trailing P/E',
16+
'Forward P/E',
17+
'PEG Ratio',
18+
'Price/Sales',
19+
'Price/Book',
20+
'Enterprise Value/Revenue',
21+
'Enterprise Value/EBITDA',
22+
# Financials
23+
'Profit Margin',
24+
'Operating Margin',
25+
'Return on Assets',
26+
'Return on Equity',
27+
'Revenue',
28+
'Revenue Per Share',
29+
'Quarterly Revenue Growth',
30+
'Gross Profit',
31+
'EBITDA',
32+
'Net Income Avi to Common',
33+
'Diluted EPS',
34+
'Quarterly Earnings Growth',
35+
'Total Cash',
36+
'Total Cash Per Share',
37+
'Total Debt',
38+
'Total Debt/Equity',
39+
'Current Ratio',
40+
'Book Value Per Share',
41+
'Operating Cash Flow',
42+
'Levered Free Cash Flow',
43+
# Trading information
44+
'Beta',
45+
'50-Day Moving Average',
46+
'200-Day Moving Average',
47+
'Avg Vol (3 month)',
48+
'Shares Outstanding',
49+
'Float',
50+
'% Held by Insiders',
51+
'% Held by Institutions',
52+
'Shares Short',
53+
'Short Ratio',
54+
'Short % of Float',
55+
'Shares Short (prior month']
56+
57+
58+
def check_yahoo():
59+
if not os.path.exists('forward/'):
60+
os.makedirs('forward/')
61+
62+
ticker_list = os.listdir(statspath)
63+
64+
# fix .ds_store issue on mac
65+
if '.DS_Store' in ticker_list:
66+
ticker_list.remove('.DS_Store')
67+
68+
for ticker in tqdm(ticker_list, desc="Download progress:", unit="tickers"):
69+
try:
70+
link = f"http://finance.yahoo.com/quote/{ticker.upper()}/key-statistics"
71+
resp = requests.get(link)
72+
73+
save = f"forward/{ticker}.html"
74+
with open(save, 'w') as file:
75+
file.write(resp.text)
76+
77+
except Exception as e:
78+
print(f"{ticker}: {str(e)}\n")
79+
time.sleep(2)
80+
81+
82+
def forward():
83+
df_columns = ['Date',
84+
'Unix',
85+
'Ticker',
86+
'Price',
87+
'stock_p_change',
88+
'SP500',
89+
'SP500_p_change'] + features
90+
91+
df = pd.DataFrame(columns=df_columns)
92+
93+
tickerfile_list = os.listdir('forward/')
94+
95+
# fix .ds_store issue on mac
96+
if '.DS_Store' in tickerfile_list:
97+
tickerfile_list.remove('.DS_Store')
98+
99+
for tickerfile in tqdm(tickerfile_list, desc="Parsing progress:", unit="tickers"):
100+
ticker = tickerfile.split('.html')[0].upper()
101+
source = open(f"forward/{tickerfile}").read()
102+
source = source.replace(',', '')
103+
104+
value_list = []
105+
for variable in features:
106+
try:
107+
regex = r'>' + re.escape(variable) + r'.*?(\-?\d+\.*\d*K?M?B?|N/A[\\n|\s]*|>0|NaN)%?' \
108+
r'(</td>|</span>)'
109+
value = re.search(regex, source, flags=re.DOTALL).group(1)
110+
111+
value_list.append(data_string_to_float(value))
112+
113+
except AttributeError:
114+
value_list.append('N/A')
115+
116+
new_df_row = [0, 0, ticker,
117+
0, 0, 0, 0] + value_list
118+
119+
df = df.append(dict(zip(df_columns, new_df_row)), ignore_index=True)
120+
121+
return df.replace('N/A', np.nan)
122+
123+
124+
if __name__ == '__main__':
125+
check_yahoo()
126+
current_df = forward()
127+
current_df.to_csv('forward_sample.csv', index=False)

download_historical_prices.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import os
2+
from pandas_datareader import data as pdr
3+
import pandas as pd
4+
import fix_yahoo_finance as yf
5+
yf.pdr_override()
6+
7+
8+
START_DATE = "2003-08-01"
9+
END_DATE = "2015-01-01"
10+
11+
12+
def build_stock_dataset(start=START_DATE, end=END_DATE):
13+
statspath = "intraQuarter/_KeyStats/"
14+
ticker_list = os.listdir(statspath)
15+
16+
# fix .ds_store issue on mac
17+
if '.DS_Store' in ticker_list:
18+
os.remove(f"{statspath}/.DS_Store")
19+
ticker_list.remove('.DS_Store')
20+
21+
all_data = pdr.get_data_yahoo(ticker_list, start, end)
22+
stock_data = all_data['Adj Close']
23+
24+
stock_data.dropna(how='all', axis=1, inplace=True)
25+
missing_tickers = [
26+
ticker for ticker in ticker_list if ticker.upper() not in stock_data.columns]
27+
print(f"{len(missing_tickers)} tickers are missing: \n {missing_tickers} ")
28+
stock_data.ffill(inplace=True)
29+
stock_data.to_csv('stock_prices.csv')
30+
31+
32+
def build_sp500_dataset(start=START_DATE, end=END_DATE):
33+
index_data = pdr.get_data_yahoo('SPY', start=START_DATE, end=END_DATE)
34+
index_data.to_csv("sp500_index.csv")
35+
36+
37+
def build_dataset_iteratively(idx_start, idx_end, date_start=START_DATE, date_end=END_DATE):
38+
statspath = "intraQuarter/_KeyStats/"
39+
ticker_list = os.listdir(statspath)
40+
41+
df = pd.DataFrame()
42+
# possible methods. Also works better for batches.
43+
for ticker in ticker_list:
44+
ticker = ticker.upper()
45+
46+
stock_ohlc = pdr.get_data_yahoo(
47+
ticker, start=date_start, end=date_end)
48+
if stock_ohlc.empty:
49+
print(f"No data for {ticker}")
50+
continue
51+
adj_close = stock_ohlc['Adj Close'].rename(ticker)
52+
df = pd.concat([df, adj_close], axis=1)
53+
df.to_csv('stock_prices.csv')
54+
55+
56+
if __name__ == "__main__":
57+
build_stock_dataset()
58+
build_sp500_dataset()

0 commit comments

Comments
 (0)