|
| 1 | +import pandas as pd |
| 2 | +import numpy as np |
| 3 | + |
| 4 | +# raw_df = pd.read_csv(cv_file, names=header_list, error_bad_lines=False, dtype=str) |
| 5 | +raw_df1 = pd.read_csv("data/Sample_Game_2_RawTrackingData_Away_Team.csv") |
| 6 | +# sample every 5th row |
| 7 | +raw_df1 = raw_df1.iloc[::7, :] |
| 8 | + |
| 9 | +raw_df2 = pd.read_csv("data/Sample_Game_2_RawTrackingData_Home_Team.csv") |
| 10 | +raw_df2 = raw_df2.iloc[::7, :] |
| 11 | + |
| 12 | +column = 1 |
| 13 | +df = pd.DataFrame(columns=["half", "frame", "time", "x", "y"]) |
| 14 | +# x range needs adjusted depending on how many columns there are. Should really calculate this not eyeball items |
| 15 | +# and do it manually. But hey it's just for a demo not ongoing |
| 16 | +for x in range(0, 13): |
| 17 | + column = column + 2 |
| 18 | + df_temp = raw_df1.iloc[:, [0, 1, 2, column, column + 1]].copy() |
| 19 | + df_temp.columns = ["half", "frame", "time", "x", "y"] |
| 20 | + df_temp["jersey_number"] = raw_df1.columns[column] |
| 21 | + df = pd.concat([df, df_temp]).reset_index(drop=True) |
| 22 | +df["team"] = "Away" |
| 23 | +df.loc[df["jersey_number"] == "0", "team"] = "Ball" |
| 24 | +df.loc[df["x"].isna(), "x"] = None |
| 25 | +df.loc[df["y"].isna(), "y"] = None |
| 26 | +df = df[df["x"].notna()] |
| 27 | +df.drop(df.loc[df["half"] == "Period"].index, inplace=True) |
| 28 | + |
| 29 | +column = 1 |
| 30 | +df2 = pd.DataFrame(columns=["half", "frame", "time", "x", "y"]) |
| 31 | +for x in range(0, 12): |
| 32 | + column = column + 2 |
| 33 | + df_temp2 = raw_df2.iloc[:, [0, 1, 2, column, column + 1]].copy() |
| 34 | + df_temp2.columns = ["half", "frame", "time", "x", "y"] |
| 35 | + df_temp2["jersey_number"] = raw_df2.columns[column] |
| 36 | + df2 = pd.concat([df2, df_temp2]).reset_index(drop=True) |
| 37 | +df2["team"] = "Home" |
| 38 | +df2.loc[df2["x"].isna(), "x"] = 0.5 |
| 39 | +df2.loc[df2["y"].isna(), "y"] = 0.5 |
| 40 | +df2 = df2[df2["x"].notna()] |
| 41 | +df2.drop(df2.loc[df2["half"] == "Period"].index, inplace=True) |
| 42 | + |
| 43 | +df = df.iloc[1:] |
| 44 | +df["frame"] = df["frame"].apply(pd.to_numeric, errors="coerce") |
| 45 | +df = df.sort_values(by=["frame"]) |
| 46 | + |
| 47 | +df2 = df2.iloc[1:] |
| 48 | +df2["frame"] = df2["frame"].apply(pd.to_numeric, errors="coerce") |
| 49 | +df2 = df2.sort_values(by=["frame"]) |
| 50 | + |
| 51 | +df_export = pd.concat([df, df2]).reset_index(drop=True) |
| 52 | +df_export = df_export.sort_values(by=["frame"]) |
| 53 | +df_export["time"] = df_export["time"].apply(pd.to_numeric, errors="coerce") |
| 54 | +df_export["time"] = df_export["time"].div(60).round(4) |
| 55 | +export_file_name = input( |
| 56 | + "Please enter a name for the file to be exported (ending with .csv): " |
| 57 | +) |
| 58 | +export_file_name = "data/" + export_file_name |
| 59 | +df_export.to_csv(export_file_name, index=False) |
0 commit comments