Skip to content

Commit 64dd204

Browse files
committed
adding types; move import
1 parent ab2d6c0 commit 64dd204

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

python/pyspark/sql/pandas/serializers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2174,8 +2174,9 @@ def load_stream(self, stream):
21742174
from pyspark.sql.streaming.stateful_processor_util import (
21752175
TransformWithStateInPandasFuncMode,
21762176
)
2177+
from typing import Iterator, Any, Optional, Tuple
21772178

2178-
def generate_data_batches(batches):
2179+
def generate_data_batches(batches) -> Iterator[Tuple[Any, Optional[Any], Optional[Any]]]:
21792180
"""
21802181
Deserialize ArrowRecordBatches and return a generator of Row.
21812182
The deserialization logic assumes that Arrow RecordBatches contain the data with the
@@ -2186,7 +2187,7 @@ def generate_data_batches(batches):
21862187
into the data generator.
21872188
"""
21882189

2189-
def extract_rows(cur_batch, col_name, key_offsets):
2190+
def extract_rows(cur_batch, col_name, key_offsets) -> Optional[Iterator[Tuple[Any, Any]]]:
21902191
data_column = cur_batch.column(cur_batch.schema.get_field_index(col_name))
21912192

21922193
# Check if the entire column is null

python/pyspark/worker.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3069,8 +3069,8 @@ def values_gen():
30693069
ser.init_key_offsets = parsed_offsets[1][0]
30703070
stateful_processor_api_client = StatefulProcessorApiClient(state_server_port, key_schema)
30713071

3072-
import pandas as pd
30733072
def mapper(a):
3073+
import pandas as pd
30743074
mode = a[0]
30753075

30763076
if mode == TransformWithStateInPandasFuncMode.PROCESS_DATA:
@@ -3233,7 +3233,6 @@ def mapper(a):
32333233

32343234
parsed_offsets = extract_key_value_indexes(arg_offsets)
32353235

3236-
import pandas as pd
32373236
def mapper(a):
32383237
df1_keys = [a[0][o] for o in parsed_offsets[0][0]]
32393238
df1_vals = [a[0][o] for o in parsed_offsets[0][1]]

0 commit comments

Comments
 (0)