apache · gaogaotiantian · Nov 21, 2025 · cloud-fan · Nov 24, 2025
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -199,6 +199,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
      conf.get(PYTHON_DAEMON_KILL_WORKER_ON_FLUSH_FAILURE)
   protected val hideTraceback: Boolean = false
   protected val simplifiedTraceback: Boolean = false
+  protected val sessionLocalTimeZone = conf.getOption("spark.sql.session.timeZone")
 
   // All the Python functions should have the same exec, version and envvars.
   protected val envVars: java.util.Map[String, String] = funcs.head.funcs.head.envVars
@@ -282,6 +283,9 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     if (simplifiedTraceback) {
       envVars.put("SPARK_SIMPLIFIED_TRACEBACK", "1")
     }
+    if (sessionLocalTimeZone.isDefined) {
+      envVars.put("SPARK_SESSION_LOCAL_TIMEZONE", sessionLocalTimeZone.get)
+    }
     // SPARK-30299 this could be wrong with standalone mode when executor
     // cores might not be correct because it defaults to all cores on the box.
     val execCores = execCoresProp.map(_.toInt).getOrElse(conf.get(EXECUTOR_CORES))

diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
@@ -452,9 +452,8 @@ def needConversion(self) -> bool:
 
     def toInternal(self, dt: datetime.datetime) -> int:
         if dt is not None:
-            seconds = (
-                calendar.timegm(dt.utctimetuple()) if dt.tzinfo else time.mktime(dt.timetuple())
-            )
+            tzinfo = dt.tzinfo if dt.tzinfo else self.tz_info
+            seconds = calendar.timegm(dt.utctimetuple()) if tzinfo else time.mktime(dt.timetuple())
             return int(seconds) * 1000000 + dt.microsecond
 
     def fromInternal(self, ts: int) -> datetime.datetime:

diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
@@ -27,6 +27,7 @@
 import inspect
 import itertools
 import json
+import zoneinfo
 from typing import Any, Callable, Iterable, Iterator, Optional, Tuple
 
 from pyspark.accumulators import (
@@ -3304,8 +3305,12 @@ def main(infile, outfile):
             sys.exit(-1)
         start_faulthandler_periodic_traceback()
 
-        # Use the local timezone to convert the timestamp
-        tz = datetime.datetime.now().astimezone().tzinfo
+        tzname = os.environ.get("SPARK_SESSION_LOCAL_TIMEZONE", None)
+        if tzname:
+            tz = zoneinfo.ZoneInfo(tzname)
+        else:
+            # Use the local timezone to convert the timestamp
+            tz = datetime.datetime.now().astimezone().tzinfo
         TimestampType.tz_info = tz
 
         check_python_version(infile)