@@ -124,49 +124,37 @@ def _explode(
124
124
if exploding_column .get ("expand_length" , False ):
125
125
expand_length = exploding_column ["expand_length" ]
126
126
derived_from_column = exploding_column ["derived_from" ]
127
- explode_selects = [
128
- (
129
- explode (self ._expand (derived_from_column , expand_length )).alias (
130
- exploding_column_name
131
- )
132
- if exploding_column_name == column
133
- else column
134
- )
135
- for column in all_column_names
136
- ]
127
+
128
+ explode_col_expr = explode (
129
+ self ._expand (derived_from_column , expand_length )
130
+ )
137
131
else :
138
- explode_selects = [
139
- (
140
- explode (col (exploding_column_name )).alias (exploding_column_name )
141
- if exploding_column_name == c
142
- else c
143
- )
144
- for c in all_column_names
145
- ]
132
+ explode_col_expr = explode (col (exploding_column_name ))
133
+
146
134
if "dataset" in exploding_column :
147
135
derived_from_column = exploding_column ["derived_from" ]
148
- explode_selects_with_derived_column = [
149
- (
150
- col (derived_from_column ).alias (exploding_column_name )
151
- if exploding_column_name == column
152
- else column
153
- )
154
- for column in all_column_names
155
- ]
136
+ no_explode_col_expr = col (derived_from_column )
137
+
156
138
if exploding_column ["dataset" ] == "a" :
157
- exploded_df = (
158
- exploded_df .select (explode_selects )
159
- if is_a
160
- else exploded_df .select (explode_selects_with_derived_column )
161
- )
139
+ expr = explode_col_expr if is_a else no_explode_col_expr
140
+ exploded_df = exploded_df .withColumn (exploding_column_name , expr )
162
141
elif exploding_column ["dataset" ] == "b" :
163
- exploded_df = (
164
- exploded_df .select (explode_selects )
165
- if not (is_a )
166
- else exploded_df .select (explode_selects_with_derived_column )
167
- )
142
+ expr = explode_col_expr if not is_a else no_explode_col_expr
143
+ exploded_df = exploded_df .withColumn (exploding_column_name , expr )
168
144
else :
169
- exploded_df = exploded_df .select (explode_selects )
145
+ exploded_df = exploded_df .withColumn (
146
+ exploding_column_name , explode_col_expr
147
+ )
148
+
149
+ # If there are exploding columns, then select out "all_column_names".
150
+ # Otherwise, just let all of the columns through without selecting
151
+ # specific ones. I believe this is an artifact of a previous
152
+ # implementation, but the tests currently enforce it. It may or may not
153
+ # be a breaking change to remove this. We'd have to look into the
154
+ # ramifications.
155
+ if len (all_exploding_columns ) > 0 :
156
+ exploded_df = exploded_df .select (sorted (all_column_names ))
157
+
170
158
return exploded_df
171
159
172
160
def _expand (self , column_name : str , expand_length : int ) -> Column :
0 commit comments