aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLu WANG <lu.wang@databricks.com>2018-06-04 14:54:31 -0700
committerXiangrui Meng <meng@databricks.com>2018-06-04 14:54:31 -0700
commitb24d3dba6571fd3c9e2649aceeaadc3f9c6cc90f (patch)
tree0a42a0209e5421a4cc047f10a1840243192f5ca4
parent7297ae04d87b6e3d48b747a7c1d53687fcc3971c (diff)
[SPARK-24290][ML] add support for Array input for instrumentation.logNamedValue
## What changes were proposed in this pull request? Extend instrumentation.logNamedValue to support Array input change the logging for "clusterSizes" to new method ## How was this patch tested? N/A Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lu WANG <lu.wang@databricks.com> Closes #21347 from ludatabricks/SPARK-24290.
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala13
4 files changed, 16 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 1ad4e09724..9c9614509c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -276,8 +276,7 @@ class BisectingKMeans @Since("2.0.0") (
val summary = new BisectingKMeansSummary(
model.transform(dataset), $(predictionCol), $(featuresCol), $(k))
model.setSummary(Some(summary))
- // TODO: need to extend logNamedValue to support Array
- instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]"))
+ instr.logNamedValue("clusterSizes", summary.clusterSizes)
instr.logSuccess(model)
model
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 3091bb5a2e..64ecc1ebda 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -426,8 +426,7 @@ class GaussianMixture @Since("2.0.0") (
$(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood)
model.setSummary(Some(summary))
instr.logNamedValue("logLikelihood", logLikelihood)
- // TODO: need to extend logNamedValue to support Array
- instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]"))
+ instr.logNamedValue("clusterSizes", summary.clusterSizes)
instr.logSuccess(model)
model
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index e72d7f9485..1704412741 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -359,8 +359,7 @@ class KMeans @Since("1.5.0") (
model.transform(dataset), $(predictionCol), $(featuresCol), $(k))
model.setSummary(Some(summary))
- // TODO: need to extend logNamedValue to support Array
- instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]"))
+ instr.logNamedValue("clusterSizes", summary.clusterSizes)
instr.logSuccess(model)
if (handlePersistence) {
instances.unpersist()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
index 467130b37c..3a1c166d46 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
@@ -132,6 +132,19 @@ private[spark] class Instrumentation[E <: Estimator[_]] private (
log(compact(render(name -> value)))
}
+ def logNamedValue(name: String, value: Array[String]): Unit = {
+ log(compact(render(name -> compact(render(value.toSeq)))))
+ }
+
+ def logNamedValue(name: String, value: Array[Long]): Unit = {
+ log(compact(render(name -> compact(render(value.toSeq)))))
+ }
+
+ def logNamedValue(name: String, value: Array[Double]): Unit = {
+ log(compact(render(name -> compact(render(value.toSeq)))))
+ }
+
+
/**
* Logs the successful completion of the training session.
*/