-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathutils.py
66 lines (49 loc) · 2.14 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import re
import datetime
import pandas as pd
from typing import Union
def convert_dt(timestamp_string, postfix=' 00:00:00'):
if type(timestamp_string) is datetime.date:
timestamp_string = timestamp_string.strftime('%Y-%m-%d')
if type(timestamp_string) is datetime.datetime:
timestamp_string = timestamp_string.strftime('%Y-%m-a%d %H:%M:%S')
timestamp_string = timestamp_string.replace('Z', '').replace('T', ' ')
timestamp_string = timestamp_string[:19]
if re.match(r'\d\d\d\d-\d\d-\d\d.\d\d:\d\d:\d\d', timestamp_string):
return timestamp_string[:10] + ' ' + timestamp_string[11:]
elif re.match(r'\d\d\d\d-\d\d-\d\d', timestamp_string):
return timestamp_string + postfix
else:
raise Exception(f"Unknown format: {timestamp_string} !")
def str_to_ts(x):
if isinstance(x, datetime.datetime):
return x
return datetime.datetime.strptime(convert_dt(x), '%Y-%m-%d %H:%M:%S')
def parse_str_to_timedelta(time_str):
regex = re.compile(r'((?P<days>\d+?)d)?((?P<hours>\d+?)h)?((?P<minutes>\d+?)m)?((?P<seconds>\d+?)s)?')
parts = regex.match(time_str.lower())
if not parts:
return
parts = parts.groupdict()
time_params = {}
for name, param in parts.items():
if param:
time_params[name] = int(param)
return datetime.timedelta(**time_params)
def resample_dataframe(source_df: pd.DataFrame,
resample_interval: Union[str, datetime.timedelta],
values_column_name: str,
grouping_column_name: Union[str, None] = None,
resample_function: str = 'pad'
):
if isinstance(resample_interval, str):
resample_interval = parse_str_to_timedelta(resample_interval)
if not isinstance(resample_interval, datetime.timedelta):
return
df = source_df.copy()
if grouping_column_name:
df = df.groupby(grouping_column_name)
df = pd.DataFrame(getattr(df[values_column_name].resample(resample_interval), resample_function)())
if grouping_column_name:
df = df.reset_index(grouping_column_name)
return df