1
+ import pandas as pd
2
+ from textblob import TextBlob
3
+
4
+ notebook_path = "/Users/umuteyidogan/Desktop/IGP_Project/crypto_headline_data.csv"
5
+
6
+ df = pd .read_csv (notebook_path )
7
+
8
+ crypto_lexicon = {
9
+ 'bullish' : 2.0 ,
10
+ 'bearish' : - 2.0 ,
11
+ 'hodl' : 1.5 ,
12
+ 'fomo' : - 1.5 ,
13
+ 'pump' : 1.5 ,
14
+ 'dump' : - 1.5 ,
15
+ 'moon' : 2.0 ,
16
+ 'whale' : 0.5 ,
17
+ 'altcoin' : 0.5 ,
18
+ 'scam' : - 2.5 ,
19
+ 'rugpull' : - 2.0 ,
20
+ 'pump and dump' : - 2.0 ,
21
+ 'moonshot' : 2.0 ,
22
+ 'to the moon' : 2.0 ,
23
+ 'bear market' : - 2.0 ,
24
+ 'bull market' : 2.0 ,
25
+ 'crypto' : 0.5 ,
26
+ 'blockchain' : 0.5 ,
27
+ 'yield farming' : 1.0 ,
28
+ 'staking' : 1.0 ,
29
+ 'token' : 0.5 ,
30
+ 'gas fee' : - 0.5 ,
31
+ 'mining' : 0.5 ,
32
+ 'hashrate' : 0.5 ,
33
+ 'volatile' : - 1.5 ,
34
+ 'regulation' : - 0.5 ,
35
+ 'adoption' : 1.5 ,
36
+ 'innovation' : 2.0 ,
37
+ 'security' : 1.0 ,
38
+ 'fraud' : - 2.5 ,
39
+ 'hack' : - 2.0 ,
40
+ 'partnership' : 1.5 ,
41
+ 'investment' : 1.5 ,
42
+ 'exchange' : 0.5 ,
43
+ 'wallet' : 0.5 ,
44
+ 'halving' : 1.5 ,
45
+ 'funding' : 1.0 ,
46
+ 'launch' : 1.5 ,
47
+ 'collapse' : - 2.5 ,
48
+ 'lawsuit' : - 2.0 ,
49
+ 'profit' : 2.0 ,
50
+ 'loss' : - 2.0 ,
51
+ 'growth' : 1.5 ,
52
+ 'decline' : - 1.5 ,
53
+ 'risk' : - 1.0 ,
54
+ 'opportunity' : 2.0 ,
55
+ 'recovery' : 1.5 ,
56
+ 'crash' : - 2.5 ,
57
+ 'surge' : 2.0 ,
58
+ 'plummet' : - 2.0 ,
59
+ 'rebound' : 1.5 ,
60
+ 'stable' : 1.0 ,
61
+ 'plunge' : - 2.0 ,
62
+ 'airdrop' : 1.0 ,
63
+ 'bull' : 1.5 ,
64
+ 'bear' : - 1.5 ,
65
+ 'fud' : - 1.5 ,
66
+ 'rekt' : - 2.0 ,
67
+ 'satoshi' : 0.5 ,
68
+ 'burn' : 1.0 ,
69
+ 'mint' : 1.0 ,
70
+ 'whitelist' : 1.0 ,
71
+ 'blacklist' : - 1.0 ,
72
+ 'whale' : 0.5 ,
73
+ 'staking reward' : 1.0 ,
74
+ 'buy the dip' : 2.0 ,
75
+ 'sell the news' : - 1.0 ,
76
+ 'short squeeze' : 1.5 ,
77
+ 'margin call' : - 1.5 ,
78
+ 'paper hands' : - 1.5 ,
79
+ 'diamond hands' : 1.5 ,
80
+ 'moonboy' : 1.5 ,
81
+ 'bagholder' : - 1.5 ,
82
+ 'bear trap' : - 1.5 ,
83
+ 'bull trap' : - 1.5 ,
84
+ 'dead cat bounce' : - 1.5 ,
85
+ 'double top' : - 1.5 ,
86
+ 'double bottom' : 1.5 ,
87
+ 'cup and handle' : 1.5 ,
88
+ 'head and shoulders' : - 1.5 ,
89
+ 'golden cross' : 1.5 ,
90
+ 'death cross' : - 1.5 ,
91
+ 'consolidation' : 0.5 ,
92
+ 'take profit' : 1.5 ,
93
+ 'breakout' : 1.5 ,
94
+ 'breakdown' : - 1.5 ,
95
+ 'bull flag' : 1.5 ,
96
+ 'bear flag' : - 1.5 ,
97
+ 'buy wall' : 1.5 ,
98
+ 'sell wall' : - 1.5 ,
99
+ 'stop-loss' : 0.5 ,
100
+ 'take profit' : 1.5 ,
101
+ 'recession' : - 1.0 ,
102
+ 'economic downturn' : - 2.0 ,
103
+ 'market correction' : - 1.5 ,
104
+ 'bull run' : 2.0 ,
105
+ 'bear run' : - 2.0 ,
106
+ 'alt season' : 1.5 ,
107
+ 'defi' : 0.5 ,
108
+ 'dao' : 1.0 ,
109
+ 'dapp' : 1.0 ,
110
+ 'nft' : 0.5 ,
111
+ 'smart contract' : 0.5 ,
112
+ 'ico' : 1.5 ,
113
+ 'fud' : - 2.0 ,
114
+ 'ath' : 2.0 ,
115
+ 'atl' : - 2.0 ,
116
+ 'btc' : 0.5 ,
117
+ 'xbt' : 0.5 ,
118
+ 'confirmation' : 0.5 ,
119
+ 'cold storage' : 1.0 ,
120
+ 'consensus' : 0.5 ,
121
+ 'cross-chain' : 0.5 ,
122
+ 'cryptography' : 0.5 ,
123
+ 'decryption' : 0.5 ,
124
+ 'dominance' : 0.5 ,
125
+ 'double spending' : - 2.0 ,
126
+ 'dusting attack' : - 2.0 ,
127
+ 'emission' : 0.5 ,
128
+ 'encryption' : 0.5 ,
129
+ 'impermanent loss' : - 1.5 ,
130
+ 'memecoin' : - 1.0 ,
131
+ 'node' : 0.5 ,
132
+ 'order book' : 0.5 ,
133
+ 'parachains' : 0.5 ,
134
+ 'peer-to-peer (P2P)' : 0.5 ,
135
+ 'phishing attack' : - 2.0 ,
136
+ 'smart contract' : 0.5
137
+
138
+
139
+ }
140
+
141
+ def get_custom_sentiment (text , lexicon ):
142
+ if isinstance (text , str ):
143
+ words = text .split ()
144
+ sentiment_score = 0.0
145
+ for word in words :
146
+ if word in lexicon :
147
+ sentiment_score += lexicon [word ]
148
+ else :
149
+ analysis = TextBlob (word ).sentiment
150
+ sentiment_score += analysis .polarity
151
+ return sentiment_score / len (words ) if words else 0
152
+ return 0
153
+
154
+ df ['Sentiment' ] = df ['Headline' ].apply (lambda x : get_custom_sentiment (x , crypto_lexicon ))
155
+
156
+ # Assign sentiment labels
157
+ def assign_label (score ):
158
+ if score > 0 :
159
+ return 1
160
+ elif score < 0 :
161
+ return - 1
162
+ else :
163
+ return 0
164
+
165
+ df ['Sentiment_Label' ] = df ['Sentiment' ].apply (assign_label )
166
+
167
+ print (df )
168
+
169
+ #Aggregating daily sentiment
170
+ df ['Published date' ] = pd .to_datetime (df ['Published date' ]).dt .date # Ensure the date column is in datetime format
171
+ daily_sentiment = df .groupby ('Published date' ).agg (
172
+ Positive_Count = ('Sentiment_Label' , lambda x : (x == 1 ).sum ()),
173
+ Negative_Count = ('Sentiment_Label' , lambda x : (x == - 1 ).sum ()),
174
+ Neutral_Count = ('Sentiment_Label' , lambda x : (x == 0 ).sum ())
175
+ ).reset_index ()
176
+
177
+ # Calculate percentages
178
+ daily_sentiment ['Total' ] = daily_sentiment ['Positive_Count' ] + daily_sentiment ['Negative_Count' ] + daily_sentiment ['Neutral_Count' ]
179
+ daily_sentiment ['Positive_Percentage' ] = (daily_sentiment ['Positive_Count' ] / daily_sentiment ['Total' ]) * 100
180
+ daily_sentiment ['Negative_Percentage' ] = (daily_sentiment ['Negative_Count' ] / daily_sentiment ['Total' ]) * 100
181
+ daily_sentiment ['Neutral_Percentage' ] = (daily_sentiment ['Neutral_Count' ] / daily_sentiment ['Total' ]) * 100
182
+
183
+ print (daily_sentiment )
184
+
185
+ # Save the results
186
+
187
+ output_path = "/Users/umuteyidogan/Desktop/IGP_Project/Crypto_Sentiment_Analysis.xlsx"
188
+ df .to_excel (output_path , index = False )
189
+
190
+ print (f"Analysis results have been saved to { output_path } " )
191
+
192
+ # Save daily sentiment to a new file
193
+ daily_sentiment_output_path = "/Users/umuteyidogan/Desktop/IGP_Project/Daily_Sentiment_Analysis.xlsx"
194
+ daily_sentiment .to_excel (daily_sentiment_output_path , index = False )
195
+ print (f"Daily sentiment resultsHere's the updated code that includes the columns for daily sentiment aggregation and calculates the percentages of positive, negative, and neutral sentiments:" )
196
+
197
+ '''
198
+ import matplotlib.pyplot as plt
199
+
200
+ plt.figure(figsize=(10, 6))
201
+ plt.hist(df['Sentiment'], bins=20, edgecolor='k', alpha=0.7)
202
+ plt.title('Sentiment Analysis Distribution')
203
+ plt.xlabel('Sentiment Polarity')
204
+ plt.ylabel('Frequency')
205
+ plt.grid(True)
206
+ plt.show()
207
+
208
+ '''
0 commit comments