@@ -44,6 +44,7 @@ import org.apache.spark.sql.execution.streaming.runtime.StreamingQueryCheckpoint
4444import org .apache .spark .sql .execution .streaming .state .{InMemoryStateSchemaProvider , KeyStateEncoderSpec , NoPrefixKeyStateEncoderSpec , PrefixKeyScanStateEncoderSpec , RocksDBStateStoreProvider , StateSchemaCompatibilityChecker , StateSchemaMetadata , StateSchemaProvider , StateStore , StateStoreColFamilySchema , StateStoreConf , StateStoreId , StateStoreProviderId }
4545import org .apache .spark .sql .execution .streaming .state .OfflineStateRepartitionErrors
4646import org .apache .spark .sql .execution .streaming .utils .StreamingUtils
47+ import org .apache .spark .sql .internal .SQLConf
4748import org .apache .spark .sql .sources .DataSourceRegister
4849import org .apache .spark .sql .streaming .TimeMode
4950import org .apache .spark .sql .types .StructType
@@ -75,9 +76,18 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
7576 sourceOptions.resolvedCpLocation,
7677 stateConf.providerClass)
7778 }
78- val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
79+ val ( stateStoreReaderInfo, storeMetadata) = getStoreMetadataAndRunChecks(
7980 sourceOptions)
8081
82+ // Extract stateFormatVersion from StateStoreConf for SYMMETRIC_HASH_JOIN operator
83+ val isJoin = (
84+ storeMetadata.head.operatorName == StatefulOperatorsUtils .SYMMETRIC_HASH_JOIN_EXEC_OP_NAME )
85+ val stateFormatVersion : Int = if (storeMetadata.nonEmpty && isJoin) {
86+ session.conf.get(SQLConf .STREAMING_JOIN_STATE_FORMAT_VERSION )
87+ } else {
88+ 1
89+ }
90+
8191 // The key state encoder spec should be available for all operators except stream-stream joins
8292 val keyStateEncoderSpec = if (stateStoreReaderInfo.keyStateEncoderSpecOpt.isDefined) {
8393 stateStoreReaderInfo.keyStateEncoderSpecOpt.get
@@ -91,19 +101,28 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
91101 stateStoreReaderInfo.stateStoreColFamilySchemaOpt,
92102 stateStoreReaderInfo.stateSchemaProviderOpt,
93103 stateStoreReaderInfo.joinColFamilyOpt,
94- Option (stateStoreReaderInfo.allColumnFamiliesReaderInfo))
104+ Option (stateStoreReaderInfo.allColumnFamiliesReaderInfo),
105+ Option (stateFormatVersion))
95106 }
96107
97108 override def inferSchema (options : CaseInsensitiveStringMap ): StructType = {
98109 val sourceOptions = StateSourceOptions .modifySourceOptions(hadoopConf,
99110 StateSourceOptions .apply(session, hadoopConf, options))
100111
101- val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
102- sourceOptions)
112+ val (stateStoreReaderInfo, storeMetadata) = getStoreMetadataAndRunChecks(sourceOptions)
103113 val oldSchemaFilePaths = StateDataSource .getOldSchemaFilePaths(sourceOptions, hadoopConf)
104114
115+ // Extract stateFormatVersion from StateStoreConf for SYMMETRIC_HASH_JOIN operator
116+ val stateFormatVersion = if (storeMetadata.nonEmpty &&
117+ (storeMetadata.head.operatorName ==
118+ StatefulOperatorsUtils .SYMMETRIC_HASH_JOIN_EXEC_OP_NAME )) {
119+ Some (session.conf.get(SQLConf .STREAMING_JOIN_STATE_FORMAT_VERSION ))
120+ } else {
121+ None
122+ }
123+
105124 val stateCheckpointLocation = sourceOptions.stateCheckpointLocation
106- try {
125+ // try {
107126 val (keySchema, valueSchema) = sourceOptions.joinSide match {
108127 case JoinSideValues .left =>
109128 StreamStreamJoinStateHelper .readKeyValueSchema(session, stateCheckpointLocation.toString,
@@ -120,14 +139,24 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
120139 (resultSchema.keySchema, resultSchema.valueSchema)
121140 }
122141
142+ println(" transformWithStateVariableInfoOpt" ,
143+ stateStoreReaderInfo.transformWithStateVariableInfoOpt)
144+ val stateVarInfo : Option [TransformWithStateVariableInfo ] = if (
145+ sourceOptions.internalOnlyReadAllColumnFamilies) {
146+ Option (stateStoreReaderInfo.allColumnFamiliesReaderInfo.stateVariableInfos.head)
147+ } else {
148+ stateStoreReaderInfo.transformWithStateVariableInfoOpt
149+ }
123150 SchemaUtil .getSourceSchema(sourceOptions, keySchema,
124151 valueSchema,
125- stateStoreReaderInfo.transformWithStateVariableInfoOpt,
126- stateStoreReaderInfo.stateStoreColFamilySchemaOpt)
127- } catch {
128- case NonFatal (e) =>
129- throw StateDataSourceErrors .failedToReadStateSchema(sourceOptions, e)
130- }
152+ stateVarInfo,
153+ stateStoreReaderInfo.stateStoreColFamilySchemaOpt,
154+ storeMetadata,
155+ stateFormatVersion)
156+ // } catch {
157+ // case NonFatal(e) =>
158+ // throw StateDataSourceErrors.failedToReadStateSchema(sourceOptions, e)
159+ // }
131160 }
132161
133162 override def supportsExternalMetadata (): Boolean = false
@@ -256,7 +285,7 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
256285 }
257286
258287 private def getStoreMetadataAndRunChecks (sourceOptions : StateSourceOptions ):
259- StateStoreReaderInfo = {
288+ ( StateStoreReaderInfo , Array [ StateMetadataTableEntry ]) = {
260289 val storeMetadata = StateDataSource .getStateStoreMetadata(sourceOptions, hadoopConf)
261290 if (! sourceOptions.internalOnlyReadAllColumnFamilies) {
262291 // skipping runStateVarChecks for StatePartitionAllColumnFamiliesReader because
@@ -354,14 +383,14 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
354383 }
355384 }
356385
357- StateStoreReaderInfo (
386+ ( StateStoreReaderInfo (
358387 keyStateEncoderSpecOpt,
359388 stateStoreColFamilySchemaOpt,
360389 transformWithStateVariableInfoOpt,
361390 stateSchemaProvider,
362391 joinColFamilyOpt,
363392 AllColumnFamiliesReaderInfo (stateStoreColFamilySchemas, stateVariableInfos)
364- )
393+ ), storeMetadata)
365394 }
366395
367396 private def getKeyStateEncoderSpec (
0 commit comments