Skip to content

Commit 6668289

Browse files
authored
[SW-2146] Cleanup in tests -> move to right ai.h2o.sparkling packages (#2012)
1 parent 6aafd25 commit 6668289

File tree

269 files changed

+1214
-1556
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

269 files changed

+1214
-1556
lines changed

build.gradle

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,11 @@ ext {
4747
project(':sparkling-water-extensions')
4848
]
4949
// Projects with integration tests
50-
integTestProjects = [project(':sparkling-water-core'), project(':sparkling-water-examples')]
50+
integTestProjects = [
51+
project(':sparkling-water-core'),
52+
project(':sparkling-water-examples'),
53+
project(':sparkling-water-ml')
54+
]
5155

5256
// Projects with benchmarks
5357
benchProjects = [project(':sparkling-water-core'), project(':sparkling-water-benchmarks')]

core/build.gradle

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,6 @@ dependencies {
9797

9898
benchImplementation("org.scalatest:scalatest_${scalaBaseVersion}:${scalaTestVersion}")
9999
benchImplementation("junit:junit:4.11")
100-
101-
// Put Spark Assembly on runtime path
102-
integTestRuntimeOnly(fileTree(dir: new File((String) sparkHome, "lib/"), include: '*.jar'))
103-
benchRuntimeOnly(fileTree(dir: new File((String) sparkHome, "lib/"), include: '*.jar'))
104100
}
105101

106102
task createSparkVersionFile {

core/src/test/scala/org/apache/spark/h2o/utils/BenchUtils.scala renamed to core/src/bench/scala/ai/h2o/sparkling/bench/BenchResult.scala

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,52 +15,13 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.spark.h2o.utils
19-
20-
import java.util.concurrent.TimeUnit
18+
package ai.h2o.sparkling.bench
2119

2220
import scala.concurrent.duration.TimeUnit
2321

24-
object BenchUtils {
25-
26-
/**
27-
* Measure execution time of given block in nanoseconds.
28-
*
29-
* @param block block to measure
30-
* @return number of ns to execute given block
31-
*/
32-
def timer(block: => Unit): Long = {
33-
val now = System.nanoTime()
34-
block
35-
System.nanoTime() - now
36-
}
37-
38-
/**
39-
* Benchmark given block of code.
40-
*
41-
* @param iterations number of iterations to execute the block of code
42-
* @param block block to execute as benchmark
43-
* @return
44-
*/
45-
def bench(iterations: Int, warmup: Int = 4, outputTimeUnit: TimeUnit = TimeUnit.MILLISECONDS)(
46-
block: => Unit): BenchResult = {
47-
val times = new Array[Long](iterations)
48-
// Warmup
49-
for (i <- 0 until warmup) {
50-
timer(block)
51-
}
52-
// Measure
53-
for (i <- 0 until iterations) {
54-
times(i) = timer(block)
55-
}
56-
57-
BenchResult(times, TimeUnit.NANOSECONDS, outputTimeUnit)
58-
}
59-
}
60-
6122
case class BenchResult(mean: Float, stdDev: Float, min: Float, max: Float, unit: TimeUnit) {
6223
def show(): String = {
63-
f"${mean}%4f ± ${stdDev}%4f (${min}%4f, ${max}%4f)"
24+
f"$mean%4f ± $stdDev%4f ($min%4f, $max%4f)"
6425
}
6526
}
6627

@@ -69,7 +30,7 @@ object BenchResult {
6930
val convMeasurements = measurements.map(x => outputUnit.convert(x, inputUnit))
7031
val mean = convMeasurements.sum.toFloat / convMeasurements.length
7132
val stdev =
72-
(Math.sqrt(convMeasurements.map(x => (x - mean) * (x - mean)).sum / (convMeasurements.length - 1))).toFloat
33+
Math.sqrt(convMeasurements.map(x => (x - mean) * (x - mean)).sum / (convMeasurements.length - 1)).toFloat
7334
new BenchResult(mean, stdev, convMeasurements.min, convMeasurements.max, outputUnit)
7435
}
7536
}

core/src/bench/scala/ai/h2o/sparkling/bench/BenchSuite.scala

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,57 @@ package ai.h2o.sparkling.bench
1919

2020
import java.util.concurrent.TimeUnit
2121

22-
import org.apache.spark.h2o.utils.BenchUtils.bench
2322
import org.scalatest.FunSuite
2423

24+
import scala.concurrent.duration.TimeUnit
25+
2526
class BenchSuite extends FunSuite {
2627

2728
protected def benchTest(
2829
testName: String,
2930
iterations: Int = 5,
3031
warmUp: Int = 1,
3132
outputTimeUnit: TimeUnit = TimeUnit.MILLISECONDS)(testFun: => Unit): Unit = {
32-
def body: Unit = {
33+
def body(): Unit = {
3334
val result = bench(iterations, warmUp, outputTimeUnit) {
34-
val evaluated = testFun
35+
testFun
3536
}
3637
println(s"$testName: ${result.show()}")
3738
}
39+
registerTest(testName)(body())
40+
}
41+
42+
/**
43+
* Measure execution time of given block in nanoseconds.
44+
*
45+
* @param block block to measure
46+
* @return number of ns to execute given block
47+
*/
48+
private def timer(block: => Unit): Long = {
49+
val now = System.nanoTime()
50+
block
51+
System.nanoTime() - now
52+
}
53+
54+
/**
55+
* Benchmark given block of code.
56+
*
57+
* @param iterations number of iterations to execute the block of code
58+
* @param block block to execute as benchmark
59+
* @return
60+
*/
61+
private def bench(iterations: Int, warmup: Int = 4, outputTimeUnit: TimeUnit = TimeUnit.MILLISECONDS)(
62+
block: => Unit): BenchResult = {
63+
val times = new Array[Long](iterations)
64+
// Warmup
65+
for (_ <- 0 until warmup) {
66+
timer(block)
67+
}
68+
// Measure
69+
for (i <- 0 until iterations) {
70+
times(i) = timer(block)
71+
}
3872

39-
registerTest(testName)(body)
73+
BenchResult(times, TimeUnit.NANOSECONDS, outputTimeUnit)
4074
}
4175
}

core/src/bench/scala/ai/h2o/sparkling/bench/DataFrameConverterBenchSuite.scala

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717

1818
package ai.h2o.sparkling.bench
1919

20-
import ai.h2o.sparkling.ml.utils.{FlatArraysOnlySchema, FlatSchema, SchemaUtils, StructsOnlySchema}
21-
import org.apache.spark.SparkContext
22-
import org.apache.spark.h2o.testdata.{DenseVectorHolder, SparseVectorHolder}
23-
import org.apache.spark.h2o.utils.{SharedH2OTestContext, TestFrameUtils}
20+
import ai.h2o.sparkling.TestUtils.{DenseVectorHolder, SparseVectorHolder}
21+
import ai.h2o.sparkling.ml.utils.SchemaUtils
22+
import ai.h2o.sparkling.{SharedH2OTestContext, TestUtils}
2423
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vectors}
24+
import org.apache.spark.sql.SparkSession
2525
import org.junit.runner.RunWith
2626
import org.scalatest.junit.JUnitRunner
2727

@@ -30,10 +30,10 @@ import scala.util.Random
3030
@RunWith(classOf[JUnitRunner])
3131
class DataFrameConverterBenchSuite extends BenchSuite with SharedH2OTestContext {
3232

33-
override def createSparkContext =
34-
new SparkContext("local-cluster[2, 1, 2048]", getClass.getSimpleName, defaultSparkConf)
33+
override def createSparkSession(): SparkSession = sparkSession("local-cluster[2, 1, 2048]")
34+
import spark.implicits._
3535

36-
private val settings = TestFrameUtils.GenerateDataFrameSettings(
36+
private val settings = TestUtils.GenerateDataFrameSettings(
3737
numberOfRows = 8000,
3838
rowsPerPartition = 500,
3939
maxCollectionSize = 100,
@@ -75,29 +75,28 @@ class DataFrameConverterBenchSuite extends BenchSuite with SharedH2OTestContext
7575
rowToSchema(FlatArraysOnlySchema)
7676
}
7777

78-
private def testPerSchema(schemaHolder: TestFrameUtils.SchemaHolder): Unit = {
79-
val df = TestFrameUtils.generateDataFrame(spark, schemaHolder, settings)
78+
private def testPerSchema(schemaHolder: TestUtils.SchemaHolder): Unit = {
79+
val df = TestUtils.generateDataFrame(spark, schemaHolder, settings)
8080
val hf = hc.asH2OFrame(df)
8181
hf.remove()
8282
}
8383

84-
private def testflattenOnlyPerSchema(schemaHolder: TestFrameUtils.SchemaHolder): Unit = {
85-
val df = TestFrameUtils.generateDataFrame(spark, schemaHolder, settings)
84+
private def testflattenOnlyPerSchema(schemaHolder: TestUtils.SchemaHolder): Unit = {
85+
val df = TestUtils.generateDataFrame(spark, schemaHolder, settings)
8686
SchemaUtils.flattenDataFrame(df).foreach(_ => {})
8787
}
8888

89-
private def testflattenSchema(schemaHolder: TestFrameUtils.SchemaHolder): Unit = {
90-
val df = TestFrameUtils.generateDataFrame(spark, schemaHolder, settings)
89+
private def testflattenSchema(schemaHolder: TestUtils.SchemaHolder): Unit = {
90+
val df = TestUtils.generateDataFrame(spark, schemaHolder, settings)
9191
SchemaUtils.flattenSchema(df)
9292
}
9393

94-
private def rowToSchema(schemaHolder: TestFrameUtils.SchemaHolder): Unit = {
95-
val df = TestFrameUtils.generateDataFrame(spark, schemaHolder, settings)
94+
private def rowToSchema(schemaHolder: TestUtils.SchemaHolder): Unit = {
95+
val df = TestUtils.generateDataFrame(spark, schemaHolder, settings)
9696
SchemaUtils.rowsToRowSchemas(df).foreach(_ => {})
9797
}
9898

9999
benchTest("Measure performance of conversion to H2OFrame on a data frame with wide sparse vectors") {
100-
import sqlContext.implicits._
101100
val numberOfCols = 50 * 1000
102101
val sparsity = 0.2
103102
val numberOfRows = 3 * 1000
@@ -112,7 +111,6 @@ class DataFrameConverterBenchSuite extends BenchSuite with SharedH2OTestContext
112111
}
113112

114113
benchTest("Measure performance of conversion to H2OFrame on a data frame with wide dense vectors") {
115-
import sqlContext.implicits._
116114
val numberOfCols = 10 * 1000
117115
val numberOfRows = 3 * 1000
118116
val partitions = 4
@@ -128,7 +126,6 @@ class DataFrameConverterBenchSuite extends BenchSuite with SharedH2OTestContext
128126
benchTest(
129127
"Measure performance of conversion to H2OFrame on a matrix 10x11 represented by sparse vectors",
130128
iterations = 10) {
131-
import sqlContext.implicits._
132129

133130
val numberOfRows = 10
134131
val numberOfCols = 11

core/src/test/scala/ai/h2o/sparkling/ml/utils/FlatArraysOnlySchema.scala renamed to core/src/bench/scala/ai/h2o/sparkling/bench/FlatArraysOnlySchema.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
* limitations under the License.
1616
*/
1717

18-
package ai.h2o.sparkling.ml.utils
18+
package ai.h2o.sparkling.bench
1919

20-
import org.apache.spark.h2o.utils.TestFrameUtils
20+
import ai.h2o.sparkling.TestUtils
2121
import org.apache.spark.sql.types._
2222

23-
case object FlatArraysOnlySchema extends TestFrameUtils.SchemaHolder {
23+
case object FlatArraysOnlySchema extends TestUtils.SchemaHolder {
2424
@transient lazy val schema: StructType = StructType(
2525
Seq(
2626
StructField("field_GHEYZJXM36Y", ArrayType(LongType)),

core/src/test/scala/ai/h2o/sparkling/ml/utils/FlatSchema.scala renamed to core/src/bench/scala/ai/h2o/sparkling/bench/FlatSchema.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
* limitations under the License.
1616
*/
1717

18-
package ai.h2o.sparkling.ml.utils
18+
package ai.h2o.sparkling.bench
1919

20-
import org.apache.spark.h2o.utils.TestFrameUtils
20+
import ai.h2o.sparkling.TestUtils
2121
import org.apache.spark.sql.types._
2222

23-
case object FlatSchema extends TestFrameUtils.SchemaHolder {
23+
case object FlatSchema extends TestUtils.SchemaHolder {
2424
@transient lazy val schema: StructType = StructType(
2525
Seq(
2626
StructField("field_9AHKOGTE4", LongType),

core/src/test/scala/ai/h2o/sparkling/ml/utils/StructsOnlySchema.scala renamed to core/src/bench/scala/ai/h2o/sparkling/bench/StructsOnlySchema.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
* limitations under the License.
1616
*/
1717

18-
package ai.h2o.sparkling.ml.utils
18+
package ai.h2o.sparkling.bench
1919

20-
import org.apache.spark.h2o.utils.TestFrameUtils
20+
import ai.h2o.sparkling.TestUtils
2121
import org.apache.spark.sql.types._
2222

23-
case object StructsOnlySchema extends TestFrameUtils.SchemaHolder {
23+
case object StructsOnlySchema extends TestUtils.SchemaHolder {
2424
@transient lazy val schema: StructType = {
2525
StructType(
2626
Seq(

0 commit comments

Comments
 (0)