Skip to content

Commit 1ecc221

Browse files
committed
Fixed bugs
I've fixed the bugs detailed in the diff. One of the bugs was already fixed on the local file (forgot to commit).
1 parent 42f8847 commit 1ecc221

File tree

2 files changed

+16
-7500008
lines changed

2 files changed

+16
-7500008
lines changed

core/src/main/scala/spark/RDD.scala

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -96,32 +96,34 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
9696
var total = 0
9797
var multiplier = 3.0
9898
var initialCount = count()
99-
99+
var maxSelected = 0
100+
101+
if (initialCount > Integer.MAX_VALUE) {
102+
maxSelected = Integer.MAX_VALUE
103+
}
104+
else {
105+
maxSelected = initialCount.toInt
106+
}
107+
100108
if (num > initialCount) {
101-
total = Math.min(initialCount, Integer.MAX_VALUE)
102-
total = total.toInt
103-
fraction = 1.0
109+
total = maxSelected
110+
fraction = Math.min(multiplier*(maxSelected+1)/initialCount, 1.0)
104111
}
105112
else if (num < 0) {
106-
throw(new IllegalArgumentException())
113+
throw(new IllegalArgumentException("Negative number of elements requested"))
107114
}
108115
else {
109-
fraction = Math.min(multiplier*(num+1)/count(), 1.0)
116+
fraction = Math.min(multiplier*(num+1)/initialCount, 1.0)
110117
total = num.toInt
111118
}
112119

113-
var r = new SampledRDD(this, withReplacement, fraction, seed)
114-
var samples = r.collect()
120+
var samples = this.sample(withReplacement, fraction, seed).collect()
115121

116122
while (samples.length < total) {
117-
r = new SampledRDD(this, withReplacement, fraction, seed)
123+
samples = this.sample(withReplacement, fraction, seed).collect()
118124
}
119125

120-
var arr = new Array[T](total)
121-
122-
for (i <- 0 to total - 1) {
123-
arr(i) = samples(i)
124-
}
126+
val arr = samples.take(total)
125127

126128
return arr
127129
}

0 commit comments

Comments
 (0)