-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcontract_comp_viz.py
151 lines (112 loc) · 3.65 KB
/
contract_comp_viz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""Summary
Attributes:
contract_ranking (list): Description
df (TYPE): Description
per_cols (list): Description
salary_cols (list): Description
standardize (TYPE): Description
"""
import os
import re
#import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.offline as pltly
import plotly.graph_objs as go
from data import SALARY_DATA_20_21, PER_DATA_20_21, TM_DATA_20_21, TM_DATA_17_18
SALARY_DATA = SALARY_DATA_20_21
PER_DATA = PER_DATA_20_21
TM_DATA = TM_DATA_20_21
# create mapping for team names to team abbreviations
team = TM_DATA_17_18['Team']
tm = TM_DATA_17_18['Tm']
team_tm_map = {x.replace('*', ''): y for x, y in zip(team, tm)}
"""
Resources:
https://www.basketball-reference.com/contracts/players.html
https://www.reddit.com/r/nba/comments/6dvmr1/best_advanced_stat_to_measure_how_good_a_player_is/
https://fansided.com/2017/01/31/nylon-calculus-reinventing-per/
Looks like best all in one stats are
PER (Player Efficiency Rating)
WS (Win Shares)
RPM (Real Plus Minus)
BPM (Boxscore Plus Minus)
"""
# get rid of the stuff to the right of \ i.e. LeBron James\jamesle01
standardize = lambda f: f.rsplit('\\')[0].lower()
# function for getting team abbreviation
get_tm_abbreviation = lambda f: team_tm_map[f.replace("*", "")]
# make money a float not a string
def monify(m):
"""Summary
Args:
m (string): money string
Returns:
float: money value
"""
try:
return float(re.sub('[$,]', '', m))
except:
return m
SALARY_DATA['Player'] = SALARY_DATA['Player'].map(standardize)
SALARY_DATA['Guaranteed'] = SALARY_DATA['Guaranteed'].map(monify)
# Make sure you only include players that have played at least 30 minutes in the season
PER_DATA = PER_DATA.loc[PER_DATA.MP > 30]
PER_DATA['Player'] = PER_DATA['Player'].map(standardize)
TM_DATA['Win Percentage'] = TM_DATA['W'] / (TM_DATA['W'] + TM_DATA['L'])
# add Team abbreviations (they stopped being available in later data pulls)
TM_DATA['Tm'] = TM_DATA['Team'].map(get_tm_abbreviation)
salary_cols = ['Player', 'Tm', 'Signed Using', 'Guaranteed']
per_cols = ['Player', 'PER', 'BPM']
tm_cols = ['Tm', 'Win Percentage']
df = TM_DATA[tm_cols].merge(
SALARY_DATA[salary_cols].merge(
PER_DATA[per_cols],
on='Player'),
on='Tm')
df.drop_duplicates(subset=['Player', 'Tm'], inplace=True)
print(df)
"""
Visualization
"""
sns.set(style="whitegrid")
f, ax = plt.subplots(figsize=(6.5, 6.5))
sns.scatterplot(x="PER", y="Guaranteed",
hue="Tm",
sizes=(1, 8), linewidth=0,
data=df, ax=ax)
ax.get_yaxis().get_major_formatter().set_scientific(False)
plt.suptitle("Player Efficiency vs. Guaranteed $\$ by Team")
# plt.show()
tracePER = go.Scatter(x=df['PER'], y=df['Guaranteed'], mode='markers', name='PER',
text=df['Player'] + " (" + df['Tm'] + ")", # string formatting didnt work here
marker=dict(
size=16,
color=df['Win Percentage'], # set color equal to a variable
colorbar=dict(title='Team Win Percentage (%)'),
colorscale='Viridis',
showscale=True
))
layout = go.Layout(
title='Good Contract, Bad Contract',
xaxis=dict(
title='Player Efficiency Rating (PER)',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
),
yaxis=dict(
title='Contract Guarantee ($)',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f'
)
)
)
fig = go.Figure(data=[tracePER], layout=layout)
# data = [tracePER]
pltly.plot(fig)
# pltly.plot([traceBPM])