Skip to content

Commit

Permalink
Add type hints to hlink.linking.core.transforms.generate_transforms()
Browse files Browse the repository at this point in the history
  • Loading branch information
riley-harper committed May 30, 2024
1 parent da9db20 commit 04ea049
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions hlink/linking/core/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# in this project's top-level directory, and also on-line at:
# https://github.com/ipums/hlink

from typing import Any

from pyspark.sql.functions import (
array,
collect_list,
Expand All @@ -24,13 +26,18 @@
)
from pyspark.sql.types import ArrayType, LongType, StringType
from pyspark.ml import Pipeline
from pyspark.sql import Window
from pyspark.sql import DataFrame, SparkSession, Window
from pyspark.ml.feature import NGram, RegexTokenizer, CountVectorizer, MinHashLSH


def generate_transforms(
spark, df_selected, feature_selections, link_task, is_a, id_col
):
spark: SparkSession,
df_selected: DataFrame,
feature_selections: list[dict[str, Any]],
link_task,
is_a: bool,
id_col: str,
) -> DataFrame:
not_skipped_feature_selections = [
c
for c in feature_selections
Expand All @@ -43,7 +50,9 @@ def generate_transforms(
if ("post_agg_feature" in c) and c["post_agg_feature"]
]

def parse_feature_selections(df_selected, feature_selection, is_a):
def parse_feature_selections(
df_selected: DataFrame, feature_selection: dict[str, Any], is_a: bool
) -> DataFrame:
transform = feature_selection["transform"]

if not feature_selection.get("output_column", False):
Expand Down Expand Up @@ -300,7 +309,7 @@ def union_list(list_a, list_b):
for feature_selection in not_skipped_feature_selections:
df_selected = parse_feature_selections(df_selected, feature_selection, is_a)

def get_transforms(name, is_a):
def get_transforms(name: str, is_a: bool) -> list[dict[str, Any]]:
to_process = []
for f in not_skipped_feature_selections:
if ("override_column_a" in f) and is_a:
Expand Down

0 comments on commit 04ea049

Please sign in to comment.