mesos · shimingfei · Apr 18, 2013 · Apr 18, 2013 · Apr 18, 2013 · Apr 18, 2013
diff --git a/core/src/main/scala/spark/JobLogger.scala b/core/src/main/scala/spark/JobLogger.scala
@@ -0,0 +1,228 @@
+package spark
+
+import java.util.Date
+import java.text.SimpleDateFormat
+import java.io.PrintWriter
+import java.io.File
+import java.io.FileNotFoundException
+import scala.collection.mutable.Map
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.ListBuffer
+import spark.scheduler.Stage
+import scala.io.Source
+import spark.executor.TaskMetrics
+import spark.scheduler.cluster.TaskInfo
+
+//it is used to record runtime information for each job, including RDD graph tasks start/stop and shuffle information 
+// and query plan information if there is any
+
+sealed trait JobLogger extends Logging {
+
+  def createLogWriter(jobID: Int): Unit
+
+  def addStageIDToJobID(stages: List[Stage], jobID: Int): Unit
+
+  def writeJobLog(jobID: Int, info: String, withTime: Boolean): Unit
+
+  def writeStageLog(stageID: Int, info: String, withTime: Boolean): Unit
+
+  def closeLogWriter(jobID: Int): Unit
+
+  def addJobIDToStageIDs(jobID: Int, stages: List[Stage]): Unit
+
+  def recordRDDGraph(rdd: RDD[_], finalStage: Stage, shuffleToMapStage: Map[Int,Stage], jobID: Int): Unit
+
+  def recordTaskMetrics(stageID: Int, status: String, taskInfo: TaskInfo, taskMetrics: TaskMetrics): Unit
+}
+
+object JobLogger {
+  private val logSwitch = System.getProperty("spark.joblogger.switch", "true").toBoolean
+
+  def init() = {
+    if (logSwitch) {
+      new JobLoggerOn
+    } else {
+      new JobLoggerOff
+    }
+  }
+}
+
+class JobLoggerOff extends JobLogger{
+
+  def createLogWriter(jobID: Int): Unit = { }
+
+  def addStageIDToJobID(stages: List[Stage], jobID: Int): Unit = { }
+
+  def writeJobLog(jobID: Int, info: String, withTime: Boolean): Unit = { }
+
+  def writeStageLog(stageID: Int, info: String, withTime: Boolean): Unit = { }
+
+  def closeLogWriter(jobID: Int): Unit = { }
+
+  def addJobIDToStageIDs(jobID: Int, stages: List[Stage]): Unit = { }
+
+  def recordRDDGraph(rdd: RDD[_], finalStage: Stage, shuffleToMapStage: Map[Int,Stage], jobID: Int): Unit = { }
+
+  def recordTaskMetrics(stageID: Int, status: String, taskInfo: TaskInfo, taskMetrics: TaskMetrics): Unit = { }
+}
+
+class JobLoggerOn(val contextDirName: String) extends JobLogger {
+  private val logDir = { if (System.getenv("SPARK_LOG_DIR") != null)   System.getenv("SPARK_LOG_DIR")
+                         else   "/tmp/spark"
+                       } //get log directory setting default is /tmp/spark 
+  private var jobIDToPrintWriter = new HashMap[Int, PrintWriter] 
+  private var stageIDToJobID = new HashMap[Int, Int]
+  private var jobIDToStageIDs = new HashMap[Int, ListBuffer[Int]]
+
+  val DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
+
+  createContextDir()
+
+  def this() = this(String.valueOf(System.currentTimeMillis()))
+
+  //create a folder for each SparkContext, the folder's name is the creation time of the jobLogger
+  def createContextDir() {
+    val dir = new File(logDir + "/" + contextDirName + "/")
+    if (dir.exists()) {
+      return;
+    }
+    if (dir.mkdirs() == false) {
+      logError("create context directory error:" + logDir + "/" + contextDirName + "/")
+    }
+  }
+
+  //create a log file for one job, the file name is the jobID(which is an Int starting from 0) 
+  def createLogWriter(jobID: Int): Unit = {
+    try{
+      val fileWriter = new PrintWriter(logDir + "/" + contextDirName + "/" + jobID)
+      jobIDToPrintWriter += (jobID->fileWriter)
+      jobIDToStageIDs += (jobID->new ListBuffer[Int])
+      } catch {
+        case e: FileNotFoundException => e.printStackTrace();
+      }
+  }
+
+  //close log file for one job, and clean the stages related to the job in stageIDToJobID 
+  def closeLogWriter(jobID: Int): Unit = {
+    jobIDToPrintWriter.get(jobID) match {
+      case Some(fileWriter) => fileWriter.close()
+           jobIDToPrintWriter -= jobID
+           cleanStageIDToJobID(jobID)
+           jobIDToStageIDs -= jobID
+      case None =>
+    }
+  }
+
+  //write log information to log file by JobID, withTime parameter controls whether to recored time stamp for the information
+  def writeJobLog(jobID: Int, info: String, withTime: Boolean): Unit = {
+    var writeInfo = info
+    if (withTime) {
+      val date = new Date(System.currentTimeMillis())
+      writeInfo = DATE_FORMAT.format(date) + ": " +info
+    }
+    jobIDToPrintWriter.get(jobID) match {
+      case Some(fileWriter) => fileWriter.println(writeInfo)
+      case None =>
+    }
+  }
+
+  //write log information to log file by stageID, withTime parameter controls whether to recored time stamp for the information
+  def writeStageLog(stageID: Int, info: String, withTime: Boolean): Unit = {
+    stageIDToJobID.get(stageID) match {
+      case Some(jobID) => writeJobLog(jobID, info, withTime)
+      case None =>
+    }
+  }
+
+  def addJobIDToStageIDs(jobID: Int, stages: List[Stage]): Unit = {
+    jobIDToStageIDs.get(jobID) match {
+      case Some(listBuffer) => for(stage <- stages) listBuffer.append(stage.id)
+      case None =>
+    }
+  }
+
+  //add a list of stages to  stageIDToJobID  
+  def addStageIDToJobID(stages: List[Stage], jobID: Int): Unit = {
+    for(stage <- stages){
+      stageIDToJobID += (stage.id->jobID)
+    }
+  }
+
+  //clean stages related to one job in stageIDToJobID
+  def cleanStageIDToJobID(jobID: Int): Unit = {
+    jobIDToStageIDs.get(jobID) match{
+      case Some(stageIDList) => for(stageid <- stageIDList) stageIDToJobID -= stageid 
+      case None =>
+    }
+  }
+
+  //generate indents and convert to String
+  def indentString(indent: Int): String = {
+    val sb = new StringBuilder()
+    for (i <- 0 to indent) {
+      sb.append(" ")
+    }
+    sb.toString()
+  }
+
+  //recored RDD graph for a given RDD, print the RDD recursively and represent the parent child relationship by indent.
+  def recordRDDGraph(jobID: Int, rdd: RDD[_], finalStage: Stage, shuffleToMapStage: Map[Int,Stage]): Unit = {
+    def recordRDDGraphInternal(rdd: RDD[_], indent: Int): Unit={
+      val space = indentString(indent)
+      var rddName = rdd.getClass.getName
+      for (dep <- rdd.dependencies) {
+        var rddDesc: String=""
+        dep match{
+          case shufDep: ShuffleDependency[_,_] => //if dependency is shuffle, parent is in a new stage
+                 var rddName = shufDep.rdd.getClass.getName
+                 if (shufDep.rdd.name != null) {
+                   rddName = shufDep.rdd.name 
+                 }
+                 shuffleToMapStage.get(shufDep.shuffleId) match{
+                   case Some(stage) => rddDesc = space + "RDD_ID:" + shufDep.rdd.id + " (" + rddName + " " + rdd.generator + ")" +
+                                               " SHUFFLE_ID:" + shufDep.shuffleId + " STAGE_ID:" + stage.id
+                   case None => rddDesc = space + "RDD_ID:" + shufDep.rdd.id + " (" + rddName + " " + rdd.generator + ")" +
+                                        " SHUFFLE_ID:" + shufDep.shuffleId + " STAGE_ID:"
+                 }
+          case _ => 
+                 var rddName = dep.rdd.getClass.getName
+                 if (dep.rdd.name != null) {
+                   rddName = dep.rdd.name 
+                 }
+                 rddDesc = space + "RDD_ID:" + dep.rdd.id + " (" + rddName + " " + rdd.generator + ")"
+        }
+        writeJobLog(jobID, rddDesc, false)
+        recordRDDGraphInternal(dep.rdd, indent+2)
+      }
+    }
+    var rddName = rdd.getClass.getName
+    if (rdd.name != null) {
+      rddName = rdd.name 
+    }
+    writeJobLog(jobID, "RDD_ID:" + rdd.id + " (" + rddName + " " + rdd.generator + ")" + " RESULT_STAGE STAGE_ID:" + finalStage.id, false)
+    recordRDDGraphInternal(rdd, 1)
+  }
+
+  def recordTaskMetrics(stageID: Int, status: String, taskInfo: TaskInfo, taskMetrics: TaskMetrics): Unit = {
+    val info = " TID=" + taskInfo.taskId + " STAGE_ID=" + stageID + " START_TIME=" + taskInfo.launchTime + " FINISH_TIME=" + taskInfo.finishTime + 
+               " DURATION=" + taskInfo.duration + " EXECUTOR_ID=" + taskInfo.executorId + " HOST=" + taskInfo.host
+
+    val executorRunTime = " EXECUTOR_RUN_TIME=" + taskMetrics.executorRunTime
+
+    val readMetrics = { taskMetrics.shuffleReadMetrics match{
+                          case Some(metrics) => " BLOCK_FETCHED_TOTAL=" + metrics.totalBlocksFetched + " BLOCK_FETCHED_LOCAL=" + metrics.localBlocksFetched + 
+                                                " BLOCK_FETCHED_REMOTE=" + metrics.remoteBlocksFetched + " REMOTE_FETCH_WAIT_TIME=" + metrics.fetchWaitTime + 
+                                                " REMOTE_FETCH_TIME=" + metrics.remoteFetchTime + " REMOTE_BYTES_READ=" + metrics.remoteBytesRead + 
+                                                " SHUFFLE_BYTES_READ_TIME=" + metrics.shuffleReadMillis
+                          case None => ""
+                        }
+                      }
+    val writeMetrics = { taskMetrics.shuffleWriteMetrics match{
+                           case Some(metrics) => " SHUFFLE_BYTES_WRITTEN=" + metrics.shuffleBytesWritten
+                           case None => ""
+                         }
+                       }
+
+    writeStageLog(stageID, status + info + executorRunTime + readMetrics + writeMetrics, true)
+  }
+}
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
@@ -114,6 +114,14 @@ abstract class RDD[T: ClassManifest](
     name = _name
     this
   }
+
+  /**generator of this RDD*/
+  var generator = Utils.getRddGenerator
+
+  /**reset generator*/
+  def setGenerator(_generator: String) = {
+    generator = _generator
+  }
 
   /**
    * Set this RDD's storage level to persist its values across operations after the first time

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
@@ -42,7 +42,7 @@ import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend
 import spark.storage.BlockManagerUI
 import spark.util.{MetadataCleaner, TimeStampedHashMap}
 import spark.storage.{StorageStatus, StorageUtils, RDDInfo}
-
+import scala.util.DynamicVariable
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
  * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
@@ -65,6 +65,10 @@ class SparkContext(
   // Ensure logging is initialized before we spawn any threads
   initLogging()
 
+  val jobLogger = JobLogger.init
+
+  val addInfo = new DynamicVariable[String]("")
+
   // Set Spark driver host and port system properties
   if (System.getProperty("spark.driver.host") == null) {
     System.setProperty("spark.driver.host", Utils.localIpAddress)
@@ -578,7 +582,7 @@ class SparkContext(
     val callSite = Utils.getSparkCallSite
     logInfo("Starting job: " + callSite)
     val start = System.nanoTime
-    val result = dagScheduler.runJob(rdd, func, partitions, callSite, allowLocal, resultHandler)
+    val result = dagScheduler.runJob(rdd, func, partitions, callSite + "|" + addInfo.value.toString, allowLocal, resultHandler)
     logInfo("Job finished: " + callSite + ", took " + (System.nanoTime - start) / 1e9 + " s")
     rdd.doCheckpoint()
     result

diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
@@ -475,4 +475,21 @@ private object Utils extends Logging {
     }
     return false
   }
+
+  def getRddGenerator = {//first class name out of Spark
+    var generator: String = ""
+    var finished: Boolean = false
+    val trace = Thread.currentThread.getStackTrace().filter( el =>
+      (!el.getMethodName.contains("getStackTrace")))//get all elements not containing getStackTrace
+
+    for (el <- trace) {
+      if (!finished) {
+        if (!el.getClassName.startsWith("spark.")) {
+          generator = el.getClassName
+          finished = true
+        }
+      }
+    }
+    generator
+  }
 }