You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Some VINs might not include any information about the transmission.
Using the example VINs from VIDA I couldn't find a VIN which did not specify at least the engine, so I'm still requiring that to be specified.
# For cases where the VIN represents multiple combinations of engines and transmissions we match the table with itself to create
115
-
# rows with every possible combination of engines and transmissions.
115
+
# rows with every possible combination of engines and transmissions. However, we also want to allow VINs that only represent a single engine.
116
116
combined=duckdb.sql("""
117
117
SELECT DISTINCT c1.fkVehicleModel, c1.fkModelYear, c1.fkPartnerGroup, c1.fkBodyStyle, c1.fkEngine, c2.fkTransmission FROM components AS c1, components AS c2
118
-
WHERE c1.fkEngine IS NOT NULL AND c2.fkTransmission IS NOT NULL
119
-
""")
120
-
121
-
# Filter all the engine/transmission combinations for actually valid ones from the VehicleProfile table
122
-
filtered=duckdb.sql("""
123
-
SELECT DISTINCT combined.* FROM combined
124
-
INNER JOIN vehicle_profile vp on vp.fkVehicleModel=combined.fkVehicleModel AND vp.fkModelYear=combined.fkModelYear
125
-
WHERE combined.fkEngine=vp.fkEngine AND combined.fkTransmission=vp.fkTransmission
118
+
WHERE (c1.fkEngine IS NOT NULL AND c2.fkTransmission IS NOT NULL) OR c1.fkEngine IS NOT NULL
126
119
""").df()
127
120
121
+
# Filter all the engine/transmission combinations for actually valid ones from the VehicleProfile table if more than one exists
122
+
iflen(combined) >1:
123
+
filtered=duckdb.sql("""
124
+
SELECT DISTINCT combined.* FROM combined
125
+
INNER JOIN vehicle_profile vp on vp.fkVehicleModel=combined.fkVehicleModel AND vp.fkModelYear=combined.fkModelYear
126
+
WHERE combined.fkEngine=vp.fkEngine AND combined.fkTransmission=vp.fkTransmission
127
+
""").df()
128
+
combined=filtered
129
+
128
130
# Replace possible NaN values with 'None' to avoid having float64 columns, and cast everything to int to not use numpy types
0 commit comments