aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheng Lian <lian@databricks.com>2014-11-18 17:41:54 -0800
committerMichael Armbrust <michael@databricks.com>2014-11-18 17:41:54 -0800
commit423baea953996a66dde671ff6db2fb1f32fbe8cb (patch)
tree8eb19bf5acfc99a771092ca09dd09cd0a20e9ca0
parent7f22fa81ebd5e501fcb0e1da5506d1d4fb9250cf (diff)
[SPARK-4468][SQL] Fixes Parquet filter creation for inequality predicates with literals on the left hand side
For expressions like `10 < someVar`, we should create an `Operators.Gt` filter, but right now an `Operators.Lt` is created. This issue affects all inequality predicates with literals on the left hand side. (This bug existed before #3317 and affects branch-1.1. #3338 was opened to backport this to branch-1.1.) <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/3334) <!-- Reviewable:end --> Author: Cheng Lian <lian@databricks.com> Closes #3334 from liancheng/fix-parquet-comp-filter and squashes the following commits: 0130897 [Cheng Lian] Fixes Parquet comparison filter generation
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala8
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala12
2 files changed, 16 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
index 3a9e1499e2..6fb5f49b13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
@@ -135,22 +135,22 @@ private[sql] object ParquetFilters {
case LessThan(NamedExpression(name, _), Literal(value, dataType)) =>
makeLt.lift(dataType).map(_(name, value))
case LessThan(Literal(value, dataType), NamedExpression(name, _)) =>
- makeLt.lift(dataType).map(_(name, value))
+ makeGt.lift(dataType).map(_(name, value))
case LessThanOrEqual(NamedExpression(name, _), Literal(value, dataType)) =>
makeLtEq.lift(dataType).map(_(name, value))
case LessThanOrEqual(Literal(value, dataType), NamedExpression(name, _)) =>
- makeLtEq.lift(dataType).map(_(name, value))
+ makeGtEq.lift(dataType).map(_(name, value))
case GreaterThan(NamedExpression(name, _), Literal(value, dataType)) =>
makeGt.lift(dataType).map(_(name, value))
case GreaterThan(Literal(value, dataType), NamedExpression(name, _)) =>
- makeGt.lift(dataType).map(_(name, value))
+ makeLt.lift(dataType).map(_(name, value))
case GreaterThanOrEqual(NamedExpression(name, _), Literal(value, dataType)) =>
makeGtEq.lift(dataType).map(_(name, value))
case GreaterThanOrEqual(Literal(value, dataType), NamedExpression(name, _)) =>
- makeGtEq.lift(dataType).map(_(name, value))
+ makeLtEq.lift(dataType).map(_(name, value))
case And(lhs, rhs) =>
(createFilter(lhs) ++ createFilter(rhs)).reduceOption(FilterApi.and)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index d31a9d8418..7ee4f3c1e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -461,9 +461,21 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
}
checkFilter[Operators.Eq[Integer]]('a.int === 1)
+ checkFilter[Operators.Eq[Integer]](Literal(1) === 'a.int)
+
checkFilter[Operators.Lt[Integer]]('a.int < 4)
+ checkFilter[Operators.Lt[Integer]](Literal(4) > 'a.int)
+ checkFilter[Operators.LtEq[Integer]]('a.int <= 4)
+ checkFilter[Operators.LtEq[Integer]](Literal(4) >= 'a.int)
+
+ checkFilter[Operators.Gt[Integer]]('a.int > 4)
+ checkFilter[Operators.Gt[Integer]](Literal(4) < 'a.int)
+ checkFilter[Operators.GtEq[Integer]]('a.int >= 4)
+ checkFilter[Operators.GtEq[Integer]](Literal(4) <= 'a.int)
+
checkFilter[Operators.And]('a.int === 1 && 'a.int < 4)
checkFilter[Operators.Or]('a.int === 1 || 'a.int < 4)
+ checkFilter[Operators.Not](!('a.int === 1))
checkFilter('a.int > 'b.int, defined = false)
checkFilter(('a.int > 'b.int) && ('a.int > 'b.int), defined = false)