-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinference_data_preprocessor.py
39 lines (30 loc) · 1.22 KB
/
inference_data_preprocessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# {% include 'template/license_header' %}
from typing_extensions import Annotated
import pandas as pd
from sklearn.pipeline import Pipeline
from zenml import step
@step
def inference_data_preprocessor(
dataset_inf: pd.DataFrame,
preprocess_pipeline: Pipeline,
target: str,
) -> Annotated[pd.DataFrame, "inference_dataset"]:
"""Data preprocessor step.
This is an example of a data processor step that prepares the data so that
it is suitable for model inference. It takes in a dataset as an input step
artifact and performs any necessary preprocessing steps based on pretrained
preprocessing pipeline.
Args:
dataset_inf: The inference dataset.
preprocess_pipeline: Pretrained `Pipeline` to process dataset.
target: Name of target columns in dataset.
Returns:
The processed dataframe: dataset_inf.
"""
### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ###
# artificially adding `target` column to avoid Pipeline issues
dataset_inf[target] = pd.Series([1] * dataset_inf.shape[0])
dataset_inf = preprocess_pipeline.transform(dataset_inf)
dataset_inf.drop(columns=["target"], inplace=True)
### YOUR CODE ENDS HERE ###
return dataset_inf