@@ -60,20 +60,19 @@ class VCFFeatureSource(vcfSource: VCFSource, converter: VariantToFeatureConverte
60
60
vcfSource.genotypes().map(converterRef.convert)
61
61
}
62
62
63
- lazy val sampleNamesStructArr : Array [StructField ] =
64
- sampleNames.map(StructField (_, ByteType , true )).toArray
65
-
66
- lazy val featureDFSchema : StructType =
67
- StructType (Seq (StructField (" variant_id" , StringType , true )) ++ sampleNamesStructArr)
68
-
69
- def toDF (sqlContext : SQLContext ): DataFrame = {
63
+ def head (sqlContext : SQLContext , rowLim : Int = 10 , colLim : Int = 10 ): DataFrame = {
64
+ lazy val sampleNamesStructArr : Array [StructField ] =
65
+ sampleNames.take(colLim).map(StructField (_, ByteType , true )).toArray
66
+ lazy val featureDFSchema : StructType =
67
+ StructType (Seq (StructField (" variant_id" , StringType , true )) ++ sampleNamesStructArr)
70
68
val sc = sqlContext.sparkContext
71
69
72
- val featureRDD : RDD [Row ] =
73
- features.mapPartitions { it =>
74
- it.map { f => Row .fromSeq(f.label +: f.valueAsByteArray.toSeq) }
70
+ val slicedFeatureArray : Array [Row ] =
71
+ features.take(rowLim).map { f =>
72
+ Row .fromSeq(f.label +: f.valueAsByteArray.take(colLim). toSeq)
75
73
}
76
- sqlContext.createDataFrame(featureRDD, featureDFSchema)
74
+ val slicedFeatureRDD : RDD [Row ] = sc.parallelize(slicedFeatureArray)
75
+ sqlContext.createDataFrame(slicedFeatureRDD, featureDFSchema)
77
76
}
78
77
79
78
}
0 commit comments