aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuming Wang <yumwang@ebay.com>2018-06-04 10:16:13 -0700
committerTakuya UESHIN <ueshin@databricks.com>2018-06-04 10:16:13 -0700
commit0be5aa27460f87b5627f9de16ec25b09368d205a (patch)
tree581b8be0d7c2ff2939f77b2d3f39ddf7f8a4ca7b
parent1d9338bb10b953daddb23b8879ff99aa5c57dbea (diff)
[SPARK-23903][SQL] Add support for date extract
## What changes were proposed in this pull request? Add support for date `extract` function: ```sql spark-sql> SELECT EXTRACT(YEAR FROM TIMESTAMP '2000-12-16 12:21:13'); 2000 ``` Supported field same as [Hive](https://github.com/apache/hive/blob/rel/release-2.3.3/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g#L308-L316): `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `DAYOFWEEK`, `HOUR`, `MINUTE`, `SECOND`. ## How was this patch tested? unit tests Author: Yuming Wang <yumwang@ebay.com> Closes #21479 from wangyum/SPARK-23903.
-rw-r--r--sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g43
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala28
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala2
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/extract.sql21
-rw-r--r--sql/core/src/test/resources/sql-tests/results/extract.sql.out96
5 files changed, 149 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 7c54851097..3fe00eefde 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -592,6 +592,7 @@ primaryExpression
| identifier #columnReference
| base=primaryExpression '.' fieldName=identifier #dereference
| '(' expression ')' #parenthesizedExpression
+ | EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract
;
constant
@@ -739,6 +740,7 @@ nonReserved
| VIEW | REPLACE
| IF
| POSITION
+ | EXTRACT
| NO | DATA
| START | TRANSACTION | COMMIT | ROLLBACK | IGNORE
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
@@ -878,6 +880,7 @@ TRAILING: 'TRAILING';
IF: 'IF';
POSITION: 'POSITION';
+EXTRACT: 'EXTRACT';
EQ : '=' | '==';
NSEQ: '<=>';
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b9ece295c2..383ebde322 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1207,6 +1207,34 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
}
/**
+ * Create a Extract expression.
+ */
+ override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) {
+ ctx.field.getText.toUpperCase(Locale.ROOT) match {
+ case "YEAR" =>
+ Year(expression(ctx.source))
+ case "QUARTER" =>
+ Quarter(expression(ctx.source))
+ case "MONTH" =>
+ Month(expression(ctx.source))
+ case "WEEK" =>
+ WeekOfYear(expression(ctx.source))
+ case "DAY" =>
+ DayOfMonth(expression(ctx.source))
+ case "DAYOFWEEK" =>
+ DayOfWeek(expression(ctx.source))
+ case "HOUR" =>
+ Hour(expression(ctx.source))
+ case "MINUTE" =>
+ Minute(expression(ctx.source))
+ case "SECOND" =>
+ Second(expression(ctx.source))
+ case other =>
+ throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
+ }
+ }
+
+ /**
* Create a (windowed) Function expression.
*/
override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 89903c2825..ff0de0fb7c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -51,7 +51,7 @@ class TableIdentifierParserSuite extends SparkFunSuite {
"rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
"true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
"bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
- "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing")
+ "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract")
val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
"natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql
new file mode 100644
index 0000000000..9adf5d7005
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql
@@ -0,0 +1,21 @@
+CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c;
+
+select extract(year from c) from t;
+
+select extract(quarter from c) from t;
+
+select extract(month from c) from t;
+
+select extract(week from c) from t;
+
+select extract(day from c) from t;
+
+select extract(dayofweek from c) from t;
+
+select extract(hour from c) from t;
+
+select extract(minute from c) from t;
+
+select extract(second from c) from t;
+
+select extract(not_supported from c) from t;
diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
new file mode 100644
index 0000000000..160e4c7d78
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
@@ -0,0 +1,96 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 11
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+select extract(year from c) from t
+-- !query 1 schema
+struct<year(CAST(c AS DATE)):int>
+-- !query 1 output
+2011
+
+
+-- !query 2
+select extract(quarter from c) from t
+-- !query 2 schema
+struct<quarter(CAST(c AS DATE)):int>
+-- !query 2 output
+2
+
+
+-- !query 3
+select extract(month from c) from t
+-- !query 3 schema
+struct<month(CAST(c AS DATE)):int>
+-- !query 3 output
+5
+
+
+-- !query 4
+select extract(week from c) from t
+-- !query 4 schema
+struct<weekofyear(CAST(c AS DATE)):int>
+-- !query 4 output
+18
+
+
+-- !query 5
+select extract(day from c) from t
+-- !query 5 schema
+struct<dayofmonth(CAST(c AS DATE)):int>
+-- !query 5 output
+6
+
+
+-- !query 6
+select extract(dayofweek from c) from t
+-- !query 6 schema
+struct<dayofweek(CAST(c AS DATE)):int>
+-- !query 6 output
+6
+
+
+-- !query 7
+select extract(hour from c) from t
+-- !query 7 schema
+struct<hour(CAST(c AS TIMESTAMP)):int>
+-- !query 7 output
+7
+
+
+-- !query 8
+select extract(minute from c) from t
+-- !query 8 schema
+struct<minute(CAST(c AS TIMESTAMP)):int>
+-- !query 8 output
+8
+
+
+-- !query 9
+select extract(second from c) from t
+-- !query 9 schema
+struct<second(CAST(c AS TIMESTAMP)):int>
+-- !query 9 output
+9
+
+
+-- !query 10
+select extract(not_supported from c) from t
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7)
+
+== SQL ==
+select extract(not_supported from c) from t
+-------^^^