Open
Description
Environment Details
Please indicate the following details about the environment in which you found the bug:
- SDMetrics version: 0.3.2
- Python version: 3.7
Error Description
Metrics TSFClassifierEfficacy
and TSFCDetection
crash when we try to compute the metrics of time series with non-fixed length. That is because sktime.classification.compose.TimeSeriesForestClassifier
does not handle time series of variable length. We get the following error:
ValueError: Tabularization failed, it's possible that not all series were of equal length
We could either pad or truncate the timeseries handle this situation.
Steps to reproduce
import pandas as pd
from sdmetrics.timeseries import TSFCDetection, TSFClassifierEfficacy
real = pd.DataFrame({
"seq_index": [1, 1, 2, 2, 2],
"dim_0": [0, 0, 0, 0, 1],
"dim_1": [3, 4, 3, 3, 3]
})
synth = pd.DataFrame({
"seq_index": [1, 1, 2, 2, 2],
"dim_0": [1, 1, 0, 0, 1],
"dim_1": [4, 4, 3, 3, 3]
})
TSFClassifierEfficacy.compute(real, synth, entity_columns=['seq_index'], target='dim_0')
full trace
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-29d0a52ef600> in <module>()
----> 1 TSFClassifierEfficacy.compute(real, synth, entity_columns=['seq_index'], target='dim_0')
/usr/local/lib/python3.7/dist-packages/sdmetrics/timeseries/efficacy/base.py in compute(cls, real_data, synthetic_data, metadata, entity_columns, target)
107 real_data, synthetic_data, metadata, entity_columns, target)
108
--> 109 return cls._compute_score(real_data, synthetic_data, entity_columns, target)
/usr/local/lib/python3.7/dist-packages/sdmetrics/timeseries/efficacy/base.py in _compute_score(cls, real_data, synthetic_data, entity_columns, target)
78
79 real_acc = cls._scorer(real_x_train, real_x_test, real_y_train, real_y_test)
---> 80 synt_acc = cls._scorer(synt_x, real_x_test, synt_y, real_y_test)
81
82 return synt_acc / real_acc
/usr/local/lib/python3.7/dist-packages/sdmetrics/timeseries/ml_scorers.py in tsf_classifier(X_train, X_test, y_train, y_test)
19 ]
20 clf = Pipeline(steps)
---> 21 clf.fit(X_train, y_train)
22 return clf.score(X_test, y_test)
23
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
339 """
340 fit_params_steps = self._check_fit_params(**fit_params)
--> 341 Xt = self._fit(X, y, **fit_params_steps)
342 with _print_elapsed_time('Pipeline',
343 self._log_message(len(self.steps) - 1)):
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
305 message_clsname='Pipeline',
306 message=self._log_message(step_idx),
--> 307 **fit_params_steps[name])
308 # Replace the transformer of the step with the fitted
309 # transformer. This is necessary when loading the transformer
/usr/local/lib/python3.7/dist-packages/joblib/memory.py in __call__(self, *args, **kwargs)
350
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
/usr/local/lib/python3.7/dist-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
752 with _print_elapsed_time(message_clsname, message):
753 if hasattr(transformer, 'fit_transform'):
--> 754 res = transformer.fit_transform(X, y, **fit_params)
755 else:
756 res = transformer.fit(X, y, **fit_params).transform(X)
/usr/local/lib/python3.7/dist-packages/sktime/transformations/base.py in fit_transform(self, Z, X)
89 else:
90 # Fit method of arity 2 (supervised transformation)
---> 91 return self.fit(Z, X).transform(Z)
92
93 # def inverse_transform(self, Z, X=None):
/usr/local/lib/python3.7/dist-packages/sktime/transformations/panel/compose.py in transform(self, X, y)
217 # them into a single column
218 if isinstance(X, pd.DataFrame):
--> 219 Xt = from_nested_to_2d_array(X)
220 else:
221 Xt = from_3d_numpy_to_2d_array(X)
/usr/local/lib/python3.7/dist-packages/sktime/utils/data_processing.py in from_nested_to_2d_array(X, return_numpy)
178 if Xt.ndim != 2:
179 raise ValueError(
--> 180 "Tabularization failed, it's possible that not "
181 "all series were of equal length"
182 )
ValueError: Tabularization failed, it's possible that not all series were of equal length