From 7c62f6baecc55a93dcab6119c21553e56cab840a Mon Sep 17 00:00:00 2001 From: jsuper Date: Thu, 15 Jun 2023 17:52:01 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=BADataTransform=E7=BB=84=E4=BB=B6?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=9B=B4=E7=BB=86=E7=B2=92=E5=BA=A6=E7=9A=84?= =?UTF-8?q?=E7=BC=BA=E5=A4=B1=E5=80=BC=E5=A1=AB=E5=85=85=E5=8F=82=E6=95=B0?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DataTransform组件使用了FeatureImputer去做缺失值填充,但是只能设置统一的填充方法,不能为不同的列设置不同的填充方式。 因此增加一个参数,能够支持细粒度的控制不同列的填充方法。 Signed-off-by: jsuper --- python/federatedml/util/data_transform.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/federatedml/util/data_transform.py b/python/federatedml/util/data_transform.py index 3ce3674d35..b0c549a322 100644 --- a/python/federatedml/util/data_transform.py +++ b/python/federatedml/util/data_transform.py @@ -80,6 +80,8 @@ def __init__(self, data_transform_param): else: self.exclusive_data_type = None + self.col_missing_fill_method = data_transform_param.col_missing_fill_method + def _update_param(self, schema): meta = schema["meta"] self.delimitor = meta.get("delimiter", ",") @@ -229,7 +231,8 @@ def fill_missing_value(self, input_data_features, mode="fit"): if mode == "fit": input_data_features, self.default_value = imputer_processor.fit(input_data_features, replace_method=self.missing_fill_method, - replace_value=self.default_value) + replace_value=self.default_value, + col_replace_method=self.col_missing_fill_method) if self.missing_impute is None: self.missing_impute = imputer_processor.get_missing_value_list() else: @@ -693,6 +696,7 @@ def __init__(self, data_transform_param): self.missing_impute = None self.anonymous_generator = None self.anonymous_header = None + self.col_missing_fill_method = data_transform_param.col_missing_fill_method def _update_param(self, schema): meta = schema["meta"] @@ -806,7 +810,8 @@ def fill_missing_value(self, input_data, tags_dict, schema, mode="fit"): if mode == "fit": data, self.default_value = imputer_processor.fit(input_data, replace_method=self.missing_fill_method, - replace_value=self.default_value) + replace_value=self.default_value, + col_replace_method=self.col_missing_fill_method) LOGGER.debug("self.default_value is {}".format(self.default_value)) else: data = imputer_processor.transform(input_data,