Skip to content

Commit 9b56612

Browse files
authored
Add files via upload
1 parent 22e50f3 commit 9b56612

File tree

1 file changed

+221
-0
lines changed

1 file changed

+221
-0
lines changed

svm_simulation.py

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
import pandas as pd
2+
from sklearn.model_selection import train_test_split, GridSearchCV
3+
from sklearn.svm import SVC
4+
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
5+
from sklearn.preprocessing import StandardScaler
6+
from sklearn.pipeline import Pipeline
7+
import ta
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
11+
# Load the sentiment data from the Excel file
12+
sentiment_file_path = '/Users/umuteyidogan/Desktop/IGP_Project/Daily_Sentiment_Analysis_Lem_Headline.xlsx'
13+
sentiment_data = pd.read_excel(sentiment_file_path)
14+
15+
# Load the Bitcoin price data from the CSV file
16+
bitcoin_file_path = '/Users/umuteyidogan/Desktop/IGP_Project/bitcoin_price_with_5_labels_2.csv'
17+
bitcoin_data = pd.read_csv(bitcoin_file_path)
18+
19+
# Load the trading volume data with labels from the CSV file
20+
trading_volume_file_path = '/Users/umuteyidogan/Desktop/IGP_Project/trading_volume_with_labels.csv'
21+
trading_volume_data = pd.read_csv(trading_volume_file_path)
22+
23+
# Ensure the date formats are consistent and convert to datetime
24+
sentiment_data['Published date'] = pd.to_datetime(sentiment_data['Published date'])
25+
bitcoin_data['Date'] = pd.to_datetime(bitcoin_data['Date'])
26+
trading_volume_data['Date'] = pd.to_datetime(trading_volume_data['Date'])
27+
28+
# Merge the sentiment data with the Bitcoin price data on the date
29+
merged_data = pd.merge(sentiment_data, bitcoin_data, left_on='Published date', right_on='Date', how='inner')
30+
31+
# Merge the resulting data with the trading volume data
32+
final_data = pd.merge(merged_data, trading_volume_data, on='Date', how='inner')
33+
34+
# Calculate technical indicators
35+
final_data['SMA_7'] = ta.trend.sma_indicator(final_data['Close'], window=7)
36+
final_data['EMA_14'] = ta.trend.ema_indicator(final_data['Close'], window=14)
37+
final_data['RSI'] = ta.momentum.rsi(final_data['Close'], window=14)
38+
final_data['MACD'] = ta.trend.macd(final_data['Close'])
39+
final_data['MACD_Signal'] = ta.trend.macd_signal(final_data['Close'])
40+
final_data['Bollinger_High'] = ta.volatility.bollinger_hband(final_data['Close'])
41+
final_data['Bollinger_Low'] = ta.volatility.bollinger_lband(final_data['Close'])
42+
43+
# Drop rows with NaN values caused by the indicators calculation
44+
final_data = final_data.dropna()
45+
46+
# Select relevant columns for the final dataset
47+
final_data = final_data[['Published date', 'Positive_Percentage', 'Negative_Percentage', 'Neutral_Percentage',
48+
'Volume', 'SMA_7', 'EMA_14', 'RSI', 'MACD', 'MACD_Signal', 'Bollinger_High',
49+
'Bollinger_Low', 'Close', 'Label']]
50+
51+
# Shuffle the dataset to remove any ordering bias
52+
final_data = final_data.sample(frac=1, random_state=42).reset_index(drop=True)
53+
54+
# Define the features and target variable
55+
features = final_data[['Positive_Percentage', 'Negative_Percentage', 'Neutral_Percentage', 'Volume',
56+
'SMA_7', 'EMA_14', 'RSI', 'MACD', 'MACD_Signal', 'Bollinger_High',
57+
'Bollinger_Low']]
58+
target = final_data['Label']
59+
60+
# Split the data into training and testing sets
61+
X_train, X_test, y_train, y_test, close_train, close_test = train_test_split(features, target, final_data['Close'], test_size=0.2, random_state=42)
62+
63+
# Define a pipeline with a scaler and SVM classifier
64+
pipeline = Pipeline([
65+
('scaler', StandardScaler()), # Standardize features
66+
('svc', SVC(probability=True)) # SVM Classifier with probability estimates
67+
])
68+
69+
# Define the parameter grid for GridSearchCV
70+
param_grid = {
71+
'svc__C': [0.1, 1, 10, 100], # Regularization parameter
72+
'svc__gamma': [1, 0.1, 0.01, 0.001], # Kernel coefficient
73+
'svc__kernel': ['linear', 'rbf'] # Specifies the kernel type to be used in the algorithm
74+
}
75+
76+
# Initialize GridSearchCV with 5-fold cross-validation
77+
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
78+
79+
# Fit GridSearchCV
80+
grid_search.fit(X_train, y_train)
81+
82+
# Get the best parameters
83+
best_params = grid_search.best_params_
84+
85+
# Train the SVM model with the best parameters
86+
best_model = grid_search.best_estimator_
87+
best_model.fit(X_train, y_train)
88+
89+
# Predict the probabilities on the test set
90+
y_pred_prob = best_model.predict_proba(X_test)
91+
92+
# Convert predicted probabilities to integer labels (predictions)
93+
y_pred = best_model.predict(X_test)
94+
95+
# Evaluate the model
96+
accuracy = accuracy_score(y_test, y_pred)
97+
conf_matrix = confusion_matrix(y_test, y_pred)
98+
class_report = classification_report(y_test, y_pred)
99+
100+
# Display the results
101+
print(f"Best parameters: {best_params}")
102+
print(f"Accuracy: {accuracy}")
103+
print("Confusion Matrix:")
104+
print(conf_matrix)
105+
print("Classification Report:")
106+
print(class_report)
107+
108+
# Verify that the confusion matrix sums up to the total number of test instances
109+
print(f"Total number of test instances: {len(y_test)}")
110+
print(f"Sum of confusion matrix values: {conf_matrix.sum()}")
111+
112+
# Trading simulation
113+
# Initialize parameters
114+
initial_capital = 10000.0 # Starting capital for each person, ensure it is a float
115+
num_people = 100 # Number of people in each group
116+
trade_amount = 1000 # Amount to trade each time (Notional value of each trade)
117+
num_trades = len(y_test) # Number of trades is 390
118+
119+
# Ensure trades are integers
120+
model_trades = np.tile(y_pred, (num_people, 1)).astype(int)
121+
122+
# Simulate random trading
123+
np.random.seed(42)
124+
random_trades = np.random.choice([0, 1, 2, 3, 4], size=(num_people, num_trades)).astype(int) # Random decisions
125+
126+
# Function to simulate trades with new labels
127+
def simulate_trades(trades, prices):
128+
capital = np.full(trades.shape[0], initial_capital, dtype=np.float64)
129+
for i in range(1, trades.shape[1]): # Start from 1 to avoid index error
130+
# Calculate the percentage change in price
131+
pct_change = (prices[i] - prices[i - 1]) / prices[i - 1]
132+
133+
for j in range(trades.shape[0]):
134+
if trades[j, i] == 0: # Strong Sell
135+
capital[j] -= 2 * trade_amount * pct_change
136+
elif trades[j, i] == 1: # Sell
137+
capital[j] -= trade_amount * pct_change
138+
elif trades[j, i] == 2: # Hold
139+
continue
140+
elif trades[j, i] == 3: # Buy
141+
capital[j] += trade_amount * pct_change
142+
elif trades[j, i] == 4: # Strong Buy
143+
capital[j] += 2 * trade_amount * pct_change
144+
145+
return capital
146+
147+
# Use the Close prices from the test set
148+
prices = close_test.values
149+
150+
# Simulate random trades
151+
random_capital_end = simulate_trades(random_trades, prices)
152+
153+
# Simulate model-based trades
154+
model_capital_end = simulate_trades(model_trades, prices)
155+
156+
# Calculate average ending capital for both strategies
157+
random_average_end_capital = np.mean(random_capital_end)
158+
model_average_end_capital = np.mean(model_capital_end)
159+
160+
# Display the results
161+
print(f"Average ending capital for random strategy: ${random_average_end_capital:.2f}")
162+
print(f"Average ending capital for model-based strategy: ${model_average_end_capital:.2f}")
163+
164+
# Additional debugging info
165+
print(f"Random strategy capital range: {random_capital_end.min()} to {random_capital_end.max()}")
166+
print(f"Model-based strategy capital range: {model_capital_end.min()} to {model_capital_end.max()}")
167+
168+
# Plotting the results
169+
plt.figure(figsize=(12, 6))
170+
171+
# Adjust the bins to capture the distributions better
172+
bins = np.linspace(-100000, 200000, 100)
173+
174+
plt.hist(random_capital_end, bins=bins, alpha=0.7, label='Random Strategy')
175+
plt.hist(model_capital_end, bins=bins, alpha=0.7, label='Model-Based Strategy')
176+
177+
plt.axvline(random_average_end_capital, color='blue', linestyle='dashed', linewidth=1)
178+
plt.axvline(model_average_end_capital, color='orange', linestyle='dashed', linewidth=1)
179+
180+
plt.xlabel('Ending Capital')
181+
plt.ylabel('Frequency')
182+
plt.legend()
183+
plt.title('Distribution of Ending Capital for Random and Model-Based Strategies')
184+
plt.show()
185+
186+
# Let's assume simulate_trades is a function that returns capital over time instead of just the final capital
187+
def simulate_trades_over_time(trades, prices):
188+
capital = np.full((trades.shape[0], trades.shape[1]), initial_capital, dtype=np.float64)
189+
for i in range(1, trades.shape[1]):
190+
pct_change = (prices[i] - prices[i - 1]) / prices[i - 1]
191+
for j in range(trades.shape[0]):
192+
if trades[j, i] == 0: # Strong Sell
193+
capital[j, i] = capital[j, i - 1] - 2 * trade_amount * pct_change
194+
elif trades[j, i] == 1: # Sell
195+
capital[j, i] = capital[j, i - 1] - trade_amount * pct_change
196+
elif trades[j, i] == 2: # Hold
197+
capital[j, i] = capital[j, i - 1]
198+
elif trades[j, i] == 3: # Buy
199+
capital[j, i] = capital[j, i - 1] + trade_amount * pct_change
200+
elif trades[j, i] == 4: # Strong Buy
201+
capital[j, i] = capital[j, i - 1] + 2 * trade_amount * pct_change
202+
return capital
203+
204+
# Simulate trades over time for both strategies
205+
random_capital_over_time = simulate_trades_over_time(random_trades, prices)
206+
model_capital_over_time = simulate_trades_over_time(model_trades, prices)
207+
208+
# Calculate average capital over time
209+
random_average_capital_over_time = np.mean(random_capital_over_time, axis=0)
210+
model_average_capital_over_time = np.mean(model_capital_over_time, axis=0)
211+
212+
# Plot the average capital over time for both strategies
213+
plt.figure(figsize=(12, 6))
214+
plt.plot(random_average_capital_over_time, label='Random Strategy', color='red', linestyle='--')
215+
plt.plot(model_average_capital_over_time, label='Model-based Strategy', color='blue')
216+
plt.title('Trading Simulation: Capital Over Time')
217+
plt.xlabel('Trade Number')
218+
plt.ylabel('Average Capital ($)')
219+
plt.legend()
220+
plt.grid(True)
221+
plt.show()

0 commit comments

Comments
 (0)