-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloader.py
84 lines (61 loc) · 2.42 KB
/
loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import pandas as pd
import re
from config import DATA_DIR, CONDITIONS, SENSOR_TYPES
COLUMN_MAPPING = {
# ACC
"Phone timestamp": "phone_datetime",
"sensor timestamp [ns]": "sensor_clock[ns]",
"X [mg]": "acc_x[mg]",
"Y [mg]": "acc_y[mg]",
"Z [mg]": "acc_z[mg]",
# PPG
"channel 0": "ppg_ch0",
"channel 1": "ppg_ch1",
"channel 2": "ppg_ch2",
"ambient": "ppg_amb",
# HR
"HR [bpm]": "heart_rate[bpm]"
}
def clean_col_names(df: pd.DataFrame) -> pd.DataFrame:
"""Rename columns using a predefined mapping and strip whitespaces."""
df.columns = df.columns.str.strip() # Remove spaces
df.rename(columns=COLUMN_MAPPING, inplace=True) # Rename columns
return df
def load_data_for_participant(participant_dir: str) -> dict:
"""
Loads and categorizes data for a given subject.
Args:
participant_dir: str - directory of files
Returns:
dict([condition][sensor_type][sensor_df])
"""
data = {category: {key: pd.DataFrame() for key in SENSOR_TYPES.keys()} for category in CONDITIONS}
for category in CONDITIONS:
category_path = os.path.join(DATA_DIR, participant_dir, category)
if not os.path.exists(category_path):
print(f"Missing category: {category} for {participant_dir}")
continue
for filename in os.listdir(category_path):
file_path = os.path.join(category_path, filename)
# Debugging: Print the detected files
print(f"Checking file: {filename} in {category}")
df = pd.read_csv(file_path, delimiter=";", header="infer")
df = clean_col_names(df)
for key, pattern in SENSOR_TYPES.items():
if re.search(pattern, filename):
print(f"File matched pattern {key}: {filename}")
data[category][key] = pd.concat([data[category][key], df], axis=0)
return data
def load_all_participants() -> dict:
"""
Loads data for all participants in the dataset.
Returns:
dict( subject_id{ condition{ sensor_type{ sensor_df{ pd.DataFrame}}}}])
"""
participants = [d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))]
all_data = {}
for participant in participants:
print(f"Loading data for: {participant}")
all_data[participant] = load_data_for_participant(participant)
return all_data