aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHuaxin Gao <huaxing@us.ibm.com>2018-06-06 08:31:35 +0700
committerhyukjinkwon <gurwls223@apache.org>2018-06-06 08:31:35 +0700
commite9efb62e0795c8d5233b7e5bfc276d74953942b8 (patch)
tree955e77a5422e3c88ccaa99d72dd7f60c6727bce6
parent93df3cd03503fca7745141fbd2676b8bf70fe92f (diff)
[SPARK-24187][R][SQL] Add array_join function to SparkRHEADmaster
## What changes were proposed in this pull request? This PR adds array_join function to SparkR ## How was this patch tested? Add unit test in test_sparkSQL.R Author: Huaxin Gao <huaxing@us.ibm.com> Closes #21313 from huaxingao/spark-24187.
-rw-r--r--R/pkg/NAMESPACE1
-rw-r--r--R/pkg/R/functions.R29
-rw-r--r--R/pkg/R/generics.R4
-rw-r--r--R/pkg/tests/fulltests/test_sparkSQL.R15
4 files changed, 46 insertions, 3 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 73a33af4dd..9696f6987a 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -201,6 +201,7 @@ exportMethods("%<=>%",
"approxCountDistinct",
"approxQuantile",
"array_contains",
+ "array_join",
"array_max",
"array_min",
"array_position",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index abc91aeeb4..3bff633fbc 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -221,7 +221,9 @@ NULL
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
-#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
+#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
+#' tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
+#' head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))}
NULL
#' Window functions for Column operations
@@ -3007,6 +3009,27 @@ setMethod("array_contains",
})
#' @details
+#' \code{array_join}: Concatenates the elements of column using the delimiter.
+#' Null values are replaced with nullReplacement if set, otherwise they are ignored.
+#'
+#' @param delimiter a character string that is used to concatenate the elements of column.
+#' @param nullReplacement an optional character string that is used to replace the Null values.
+#' @rdname column_collection_functions
+#' @aliases array_join array_join,Column-method
+#' @note array_join since 2.4.0
+setMethod("array_join",
+ signature(x = "Column", delimiter = "character"),
+ function(x, delimiter, nullReplacement = NULL) {
+ jc <- if (is.null(nullReplacement)) {
+ callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, delimiter)
+ } else {
+ callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, delimiter,
+ as.character(nullReplacement))
+ }
+ column(jc)
+ })
+
+#' @details
#' \code{array_max}: Returns the maximum value of the array.
#'
#' @rdname column_collection_functions
@@ -3197,8 +3220,8 @@ setMethod("size",
#' (or starting from the end if start is negative) with the specified length.
#'
#' @rdname column_collection_functions
-#' @param start an index indicating the first element occuring in the result.
-#' @param length a number of consecutive elements choosen to the result.
+#' @param start an index indicating the first element occurring in the result.
+#' @param length a number of consecutive elements chosen to the result.
#' @aliases slice slice,Column-method
#' @note slice since 2.4.0
setMethod("slice",
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 8894cb1c5b..9321bbaf96 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -759,6 +759,10 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain
#' @rdname column_collection_functions
#' @name NULL
+setGeneric("array_join", function(x, delimiter, ...) { standardGeneric("array_join") })
+
+#' @rdname column_collection_functions
+#' @name NULL
setGeneric("array_max", function(x) { standardGeneric("array_max") })
#' @rdname column_collection_functions
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 16c1fd5a06..36e0f78bb0 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1518,6 +1518,21 @@ test_that("column functions", {
result <- collect(select(df, arrays_overlap(df[[1]], df[[2]])))[[1]]
expect_equal(result, c(TRUE, FALSE, NA))
+ # Test array_join()
+ df <- createDataFrame(list(list(list("Hello", "World!"))))
+ result <- collect(select(df, array_join(df[[1]], "#")))[[1]]
+ expect_equal(result, "Hello#World!")
+ df2 <- createDataFrame(list(list(list("Hello", NA, "World!"))))
+ result <- collect(select(df2, array_join(df2[[1]], "#", "Beautiful")))[[1]]
+ expect_equal(result, "Hello#Beautiful#World!")
+ result <- collect(select(df2, array_join(df2[[1]], "#")))[[1]]
+ expect_equal(result, "Hello#World!")
+ df3 <- createDataFrame(list(list(list("Hello", NULL, "World!"))))
+ result <- collect(select(df3, array_join(df3[[1]], "#", "Beautiful")))[[1]]
+ expect_equal(result, "Hello#Beautiful#World!")
+ result <- collect(select(df3, array_join(df3[[1]], "#")))[[1]]
+ expect_equal(result, "Hello#World!")
+
# Test array_sort() and sort_array()
df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))