aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLu WANG <lu.wang@databricks.com>2018-06-04 16:08:27 -0700
committerXiangrui Meng <meng@databricks.com>2018-06-04 16:08:27 -0700
commitff0501b0c27dc8149bd5fb38a19d9b0056698766 (patch)
treee21c936c5e8d60495d7b99c9f14acd8232335a7c
parentb24d3dba6571fd3c9e2649aceeaadc3f9c6cc90f (diff)
[SPARK-24300][ML] change the way to set seed in ml.cluster.LDASuite.generateLDAData
## What changes were proposed in this pull request? Using different RNG in all different partitions. ## How was this patch tested? manually Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lu WANG <lu.wang@databricks.com> Closes #21492 from ludatabricks/SPARK-24300.
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala3
1 files changed, 1 insertions, 2 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
index 096b541689..db92132d18 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -34,9 +34,8 @@ object LDASuite {
vocabSize: Int): DataFrame = {
val avgWC = 1 // average instances of each word in a doc
val sc = spark.sparkContext
- val rng = new java.util.Random()
- rng.setSeed(1)
val rdd = sc.parallelize(1 to rows).map { i =>
+ val rng = new java.util.Random(i)
Vectors.dense(Array.fill(vocabSize)(rng.nextInt(2 * avgWC).toDouble))
}.map(v => new TestRow(v))
spark.createDataFrame(rdd)