Skip to content

Commit 75dc15c

Browse files
authored
Add files via upload
1 parent 69e2954 commit 75dc15c

16 files changed

+25359
-0
lines changed

Crypto_Sentiment_Analysis_VADER.xlsx

1.4 MB
Binary file not shown.
1.39 MB
Binary file not shown.

Daily_Sentiment_Analysis_VADER.csv

Lines changed: 1983 additions & 0 deletions
Large diffs are not rendered by default.
70.7 KB
Binary file not shown.

EDA2.ipynb

Lines changed: 597 additions & 0 deletions
Large diffs are not rendered by default.

News cleaning script.ipynb

Lines changed: 874 additions & 0 deletions
Large diffs are not rendered by default.

Project_Gantt_Chart.pdf

129 KB
Binary file not shown.

Sentiment analysis words.ipynb

Lines changed: 1028 additions & 0 deletions
Large diffs are not rendered by default.

Textblob_latest.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import pandas as pd
2+
from textblob import TextBlob
3+
4+
notebook_path = "/Users/umuteyidogan/Desktop/IGP_Project/crypto_headline_data.csv"
5+
6+
df = pd.read_csv(notebook_path)
7+
8+
crypto_lexicon = {
9+
'bullish': 2.0,
10+
'bearish': -2.0,
11+
'hodl': 1.5,
12+
'fomo': -1.5,
13+
'pump': 1.5,
14+
'dump': -1.5,
15+
'moon': 2.0,
16+
'whale': 0.5,
17+
'altcoin': 0.5,
18+
'scam': -2.5,
19+
'rugpull': -2.0,
20+
'pump and dump': -2.0,
21+
'moonshot': 2.0,
22+
'to the moon': 2.0,
23+
'bear market': -2.0,
24+
'bull market': 2.0,
25+
'crypto': 0.5,
26+
'blockchain': 0.5,
27+
'yield farming': 1.0,
28+
'staking': 1.0,
29+
'token': 0.5,
30+
'gas fee': -0.5,
31+
'mining': 0.5,
32+
'hashrate': 0.5,
33+
'volatile': -1.5,
34+
'regulation': -0.5,
35+
'adoption': 1.5,
36+
'innovation': 2.0,
37+
'security': 1.0,
38+
'fraud': -2.5,
39+
'hack': -2.0,
40+
'partnership': 1.5,
41+
'investment': 1.5,
42+
'exchange': 0.5,
43+
'wallet': 0.5,
44+
'halving': 1.5,
45+
'funding': 1.0,
46+
'launch': 1.5,
47+
'collapse': -2.5,
48+
'lawsuit': -2.0,
49+
'profit': 2.0,
50+
'loss': -2.0,
51+
'growth': 1.5,
52+
'decline': -1.5,
53+
'risk': -1.0,
54+
'opportunity': 2.0,
55+
'recovery': 1.5,
56+
'crash': -2.5,
57+
'surge': 2.0,
58+
'plummet': -2.0,
59+
'rebound': 1.5,
60+
'stable': 1.0,
61+
'plunge': -2.0,
62+
'airdrop': 1.0,
63+
'bull': 1.5,
64+
'bear': -1.5,
65+
'fud': -1.5,
66+
'rekt': -2.0,
67+
'satoshi': 0.5,
68+
'burn': 1.0,
69+
'mint': 1.0,
70+
'whitelist': 1.0,
71+
'blacklist': -1.0,
72+
'whale': 0.5,
73+
'staking reward': 1.0,
74+
'buy the dip': 2.0,
75+
'sell the news': -1.0,
76+
'short squeeze': 1.5,
77+
'margin call': -1.5,
78+
'paper hands': -1.5,
79+
'diamond hands': 1.5,
80+
'moonboy': 1.5,
81+
'bagholder': -1.5,
82+
'bear trap': -1.5,
83+
'bull trap': -1.5,
84+
'dead cat bounce': -1.5,
85+
'double top': -1.5,
86+
'double bottom': 1.5,
87+
'cup and handle': 1.5,
88+
'head and shoulders': -1.5,
89+
'golden cross': 1.5,
90+
'death cross': -1.5,
91+
'consolidation': 0.5,
92+
'take profit': 1.5,
93+
'breakout': 1.5,
94+
'breakdown': -1.5,
95+
'bull flag': 1.5,
96+
'bear flag': -1.5,
97+
'buy wall': 1.5,
98+
'sell wall': -1.5,
99+
'stop-loss': 0.5,
100+
'take profit': 1.5,
101+
'recession': -1.0,
102+
'economic downturn': -2.0,
103+
'market correction': -1.5,
104+
'bull run': 2.0,
105+
'bear run': -2.0,
106+
'alt season': 1.5,
107+
'defi': 0.5,
108+
'dao': 1.0,
109+
'dapp': 1.0,
110+
'nft': 0.5,
111+
'smart contract': 0.5,
112+
'ico': 1.5,
113+
'fud': -2.0,
114+
'ath': 2.0,
115+
'atl': -2.0,
116+
'btc': 0.5,
117+
'xbt': 0.5,
118+
'confirmation': 0.5,
119+
'cold storage': 1.0,
120+
'consensus': 0.5,
121+
'cross-chain': 0.5,
122+
'cryptography': 0.5,
123+
'decryption': 0.5,
124+
'dominance': 0.5,
125+
'double spending': -2.0,
126+
'dusting attack': -2.0,
127+
'emission': 0.5,
128+
'encryption': 0.5,
129+
'impermanent loss': -1.5,
130+
'memecoin': -1.0,
131+
'node': 0.5,
132+
'order book': 0.5,
133+
'parachains': 0.5,
134+
'peer-to-peer (P2P)': 0.5,
135+
'phishing attack': -2.0,
136+
'smart contract': 0.5
137+
138+
139+
}
140+
141+
def get_custom_sentiment(text, lexicon):
142+
if isinstance(text, str):
143+
words = text.split()
144+
sentiment_score = 0.0
145+
for word in words:
146+
if word in lexicon:
147+
sentiment_score += lexicon[word]
148+
else:
149+
analysis = TextBlob(word).sentiment
150+
sentiment_score += analysis.polarity
151+
return sentiment_score / len(words) if words else 0
152+
return 0
153+
154+
df['Sentiment'] = df['Headline'].apply(lambda x: get_custom_sentiment(x, crypto_lexicon))
155+
156+
# Assign sentiment labels
157+
def assign_label(score):
158+
if score > 0:
159+
return 1
160+
elif score < 0:
161+
return -1
162+
else:
163+
return 0
164+
165+
df['Sentiment_Label'] = df['Sentiment'].apply(assign_label)
166+
167+
print(df)
168+
169+
#Aggregating daily sentiment
170+
df['Published date'] = pd.to_datetime(df['Published date']).dt.date # Ensure the date column is in datetime format
171+
daily_sentiment = df.groupby('Published date').agg(
172+
Positive_Count=('Sentiment_Label', lambda x: (x == 1).sum()),
173+
Negative_Count=('Sentiment_Label', lambda x: (x == -1).sum()),
174+
Neutral_Count=('Sentiment_Label', lambda x: (x == 0).sum())
175+
).reset_index()
176+
177+
# Calculate percentages
178+
daily_sentiment['Total'] = daily_sentiment['Positive_Count'] + daily_sentiment['Negative_Count'] + daily_sentiment['Neutral_Count']
179+
daily_sentiment['Positive_Percentage'] = (daily_sentiment['Positive_Count'] / daily_sentiment['Total']) * 100
180+
daily_sentiment['Negative_Percentage'] = (daily_sentiment['Negative_Count'] / daily_sentiment['Total']) * 100
181+
daily_sentiment['Neutral_Percentage'] = (daily_sentiment['Neutral_Count'] / daily_sentiment['Total']) * 100
182+
183+
print(daily_sentiment)
184+
185+
# Save the results
186+
187+
output_path = "/Users/umuteyidogan/Desktop/IGP_Project/Crypto_Sentiment_Analysis.xlsx"
188+
df.to_excel(output_path, index=False)
189+
190+
print(f"Analysis results have been saved to {output_path}")
191+
192+
# Save daily sentiment to a new file
193+
daily_sentiment_output_path = "/Users/umuteyidogan/Desktop/IGP_Project/Daily_Sentiment_Analysis.xlsx"
194+
daily_sentiment.to_excel(daily_sentiment_output_path, index=False)
195+
print(f"Daily sentiment resultsHere's the updated code that includes the columns for daily sentiment aggregation and calculates the percentages of positive, negative, and neutral sentiments:")
196+
197+
'''
198+
import matplotlib.pyplot as plt
199+
200+
plt.figure(figsize=(10, 6))
201+
plt.hist(df['Sentiment'], bins=20, edgecolor='k', alpha=0.7)
202+
plt.title('Sentiment Analysis Distribution')
203+
plt.xlabel('Sentiment Polarity')
204+
plt.ylabel('Frequency')
205+
plt.grid(True)
206+
plt.show()
207+
208+
'''

0 commit comments

Comments
 (0)