Skip to content

Commit a188d69

Browse files
committed
Folder restructure
1 parent 6d13bf3 commit a188d69

34 files changed

+257
-129
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from setuptools import setup, find_packages
2-
import vanpy
2+
from src import vanpy
33

44
# with open('requirements.txt') as f:
55
# required = f.read().splitlines()
File renamed without changes.

run.py renamed to src/run.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
from vanpy.core.ClassificationPipline import ClassificationPipeline
2-
from vanpy.core.FeatureExtractionPipline import FeatureExtractionPipeline
3-
from vanpy.core.PreprocessPipline import PreprocessPipeline
4-
from vanpy.core.CombinedPipeline import CombinedPipeline
1+
from src.vanpy.core.FeatureExtractionPipline import FeatureExtractionPipeline
2+
from src.vanpy.core.PreprocessPipline import PreprocessPipeline
3+
from src.vanpy.core.CombinedPipeline import CombinedPipeline
54
import logging
6-
from vanpy.utils.utils import load_config
7-
from vanpy.core.ComponentPayload import ComponentPayload
8-
import pandas as pd
5+
from src.vanpy.utils.utils import load_config
6+
7+
98
# import asyncio
109

1110

File renamed without changes.

vanpy/core/BasePipline.py renamed to src/vanpy/core/BasePipline.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
from logging import Logger
55
from typing import Dict, List
66
from yaml import YAMLObject
7-
import inspect
87

9-
from vanpy.core.ComponentPayload import ComponentPayload
10-
from vanpy.core.PiplineComponent import PipelineComponent
8+
from src.vanpy.core.ComponentPayload import ComponentPayload
9+
from src.vanpy.core.PiplineComponent import PipelineComponent
1110

1211

1312
@dataclass

vanpy/core/ClassificationPipline.py renamed to src/vanpy/core/ClassificationPipline.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from typing import List
22
from yaml import YAMLObject
3-
from vanpy.core.BasePipline import BasePipeline
4-
from vanpy.core.segment_classification_components.CVAgeClassifier import CVAgeClassifier
5-
from vanpy.core.segment_classification_components.CVGenderClassifier import CVGenderClassifier
6-
from vanpy.core.segment_classification_components.IEMOCAPEmotionClassifier import IEMOCAPEmotionClassifier
7-
from vanpy.core.segment_classification_components.Wav2Vec2STT import Wav2Vec2STT
3+
from src.vanpy.core.BasePipline import BasePipeline
4+
from src.vanpy.core.segment_classification_components.CVAgeClassifier import CVAgeClassifier
5+
from src.vanpy.core.segment_classification_components.CVGenderClassifier import CVGenderClassifier
6+
from src.vanpy.core.segment_classification_components.IEMOCAPEmotionClassifier import IEMOCAPEmotionClassifier
7+
from src.vanpy.core.segment_classification_components.Wav2Vec2STT import Wav2Vec2STT
88

99

1010
class ClassificationPipeline(BasePipeline):

vanpy/core/CombinedPipeline.py renamed to src/vanpy/core/CombinedPipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import logging
55
from typing import List
66

7-
from vanpy.core.BasePipline import BasePipeline
8-
from vanpy.core.PiplineComponent import ComponentPayload
7+
from src.vanpy.core.BasePipline import BasePipeline
8+
from src.vanpy.core.PiplineComponent import ComponentPayload
99
from yaml import YAMLObject
1010

1111

src/vanpy/core/ComponentPayload.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from dataclasses import dataclass
2+
from typing import Dict, Tuple, List
3+
import pandas as pd
4+
5+
6+
@dataclass
7+
class ComponentPayload:
8+
"""
9+
A class that represents a container for payload (dataframe and metadata) passed between pipline components.
10+
"""
11+
metadata: dict
12+
df: pd.DataFrame
13+
14+
def __init__(self, input_path: str = '', metadata: Dict = None, df: pd.DataFrame = None):
15+
"""
16+
Initializes the ComponentPayload class with the given input_path, metadata and dataframe.
17+
18+
:param input_path: the input path of the data
19+
:type input_path: str
20+
:param metadata: the metadata of the data
21+
:type metadata: Dict
22+
:param df: the dataframe containing the data
23+
:type df: pd.DataFrame
24+
"""
25+
self.metadata = metadata
26+
self.df = df
27+
if not self.metadata:
28+
self.metadata = {'input_path': '', 'paths_column': '', 'all_paths_columns': [],
29+
'meta_columns': [], 'feature_columns': [], 'classification_columns': []}
30+
if input_path:
31+
self.metadata['input_path'] = input_path
32+
if ('input_path' not in self.metadata or self.metadata['input_path'] == '') and \
33+
('paths_column' not in self.metadata or self.metadata['paths_column'] == ''):
34+
raise AttributeError(
35+
"You must supply at least input_path or metadata['paths_column'] when initializing ComponentPayload")
36+
for col in ['all_paths_columns', 'meta_columns', 'feature_columns', 'classification_columns']:
37+
if col not in self.metadata:
38+
self.metadata[col] = []
39+
if 'paths_column' in self.metadata and not self.metadata['all_paths_columns']:
40+
self.metadata['all_paths_columns'].append(self.metadata['paths_column'])
41+
if self.df is None:
42+
self.df = pd.DataFrame()
43+
44+
def unpack(self) -> Tuple[Dict, pd.DataFrame]:
45+
"""
46+
Returns a tuple of payload's metadata and the dataframe.
47+
48+
:return: tuple of metadata and the dataframe
49+
:rtype: Tuple[Dict, pd.DataFrame]
50+
"""
51+
return self.metadata, self.df
52+
53+
def get_columns(self, all_paths_columns=False, meta_columns=False):
54+
"""
55+
Returns the list of column names stored in metadata, filtered based on the input parameters.
56+
57+
:param all_paths_columns: whether to include all paths columns in the returned list
58+
:type all_paths_columns: bool
59+
:param meta_columns: whether to include meta columns in the returned list
60+
:type meta_columns: bool
61+
:return: list of column names
62+
:rtype: List[str]
63+
"""
64+
if not all_paths_columns:
65+
columns = [self.metadata['paths_column']]
66+
else:
67+
columns = self.metadata['all_paths_columns']
68+
if meta_columns:
69+
columns.extend(self.metadata['meta_columns'])
70+
return columns
71+
72+
def get_declared_columns(self, ext_columns: List[str], all_paths_columns=False, meta_columns=False):
73+
"""
74+
Returns a payload's dataframe containing the specified columns.
75+
76+
:param ext_columns: the list of columns to include in the returned dataframe
77+
:type ext_columns: List[str]
78+
:param all_paths_columns: whether to include all paths columns in the returned dataframe
79+
:type all_paths_columns: bool
80+
:param meta_columns: whether to include meta columns in the returned dataframe
81+
:type meta_columns: bool
82+
:return: a dataframe containing the specified columns
83+
:rtype: pd.DataFrame
84+
"""
85+
columns = self.get_columns(all_paths_columns, meta_columns)
86+
for cols in ext_columns:
87+
columns.extend(self.metadata[cols])
88+
columns = list(set(columns) & set(self.df.columns))
89+
return self.df[columns]
90+
91+
def get_features_df(self, all_paths_columns=False, meta_columns=False):
92+
"""
93+
Returns a dataframe containing the feature columns of the payload.
94+
95+
:param all_paths_columns: whether to include all paths columns in the returned dataframe
96+
:type all_paths_columns: bool
97+
:param meta_columns: whether to include meta columns in the returned dataframe
98+
:type meta_columns: bool
99+
:return: a dataframe containing the feature columns
100+
:rtype: pd.DataFrame
101+
"""
102+
return self.get_declared_columns(['feature_columns'], all_paths_columns, meta_columns)
103+
104+
def get_classification_df(self, all_paths_columns=False, meta_columns=False):
105+
"""
106+
Returns a dataframe containing the classification columns of the payload.
107+
108+
:param all_paths_columns: whether to include all paths columns in the returned dataframe
109+
:type all_paths_columns: bool
110+
:param meta_columns: whether to include meta columns in the returned dataframe
111+
:type meta_columns: bool
112+
:return: a dataframe containing the classification columns
113+
:rtype: pd.DataFrame
114+
"""
115+
return self.get_declared_columns(['classification_columns'], all_paths_columns, meta_columns)
116+
117+
def get_full_df(self, all_paths_columns=False, meta_columns=False):
118+
"""
119+
Returns a dataframe containing the feature and classification columns of the payload.
120+
121+
:param all_paths_columns: whether to include all paths columns in the returned dataframe
122+
:type all_paths_columns: bool
123+
:param meta_columns: whether to include meta columns in the returned dataframe
124+
:type meta_columns: bool
125+
:return: a dataframe containing the feature and classification columns
126+
:rtype: pd.DataFrame
127+
"""
128+
return self.get_declared_columns(['feature_columns', 'classification_columns'], all_paths_columns, meta_columns)
129+
130+
def remove_redundant_index_columns(self):
131+
"""
132+
Removes any columns from the payload's dataframe that have a name that starts with "Unnamed" or is an empty string.
133+
"""
134+
for c in self.df.columns:
135+
if c.startswith('Unnamed') or c == '':
136+
self.df.drop([c], axis=1, inplace=True)
137+

0 commit comments

Comments
 (0)