aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiles Yucht <miles@databricks.com>2018-05-26 20:42:23 -0700
committerXiao Li <gatorsmile@gmail.com>2018-05-26 20:42:23 -0700
commitd440699192f21b14dfb8ec0dc5673537e1003b55 (patch)
tree3825ef3db4d57e88ae5d1178ec8a79848d97fa98
parented1a65448f228776afe2e5c6b1ac4228d2ed2854 (diff)
[SPARK-24381][TESTING] Add unit tests for NOT IN subquery around null values
## What changes were proposed in this pull request? This PR adds several unit tests along the `cols NOT IN (subquery)` pathway. There are a scattering of tests here and there which cover this codepath, but there doesn't seem to be a unified unit test of the correctness of null-aware anti joins anywhere. I have also added a brief explanation of how this expression behaves in SubquerySuite. Lastly, I made some clarifying changes in the NOT IN pathway in RewritePredicateSubquery. ## How was this patch tested? Added unit tests! There should be no behavioral change in this PR. Author: Miles Yucht <miles@databricks.com> Closes #21425 from mgyucht/spark-24381.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala9
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql39
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql98
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql42
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql123
-rw-r--r--sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out54
-rw-r--r--sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out134
-rw-r--r--sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out69
-rw-r--r--sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out149
-rw-r--r--sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out2
10 files changed, 714 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 709db6d8be..de89e17e51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -116,15 +116,16 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
// (a1,a2,...) = (b1,b2,...)
// to
// (a1=b1 OR isnull(a1=b1)) AND (a2=b2 OR isnull(a2=b2)) AND ...
- val joinConds = splitConjunctivePredicates(joinCond.get)
+ val baseJoinConds = splitConjunctivePredicates(joinCond.get)
+ val nullAwareJoinConds = baseJoinConds.map(c => Or(c, IsNull(c)))
// After that, add back the correlated join predicate(s) in the subquery
// Example:
// SELECT ... FROM A WHERE A.A1 NOT IN (SELECT B.B1 FROM B WHERE B.B2 = A.A2 AND B.B3 > 1)
// will have the final conditions in the LEFT ANTI as
- // (A.A1 = B.B1 OR ISNULL(A.A1 = B.B1)) AND (B.B2 = A.A2)
- val pairs = (joinConds.map(c => Or(c, IsNull(c))) ++ conditions).reduceLeft(And)
+ // (A.A1 = B.B1 OR ISNULL(A.A1 = B.B1)) AND (B.B2 = A.A2) AND B.B3 > 1
+ val finalJoinCond = (nullAwareJoinConds ++ conditions).reduceLeft(And)
// Deduplicate conflicting attributes if any.
- dedupJoin(Join(outerPlan, sub, LeftAnti, Option(pairs)))
+ dedupJoin(Join(outerPlan, sub, LeftAnti, Option(finalJoinCond)))
case (p, predicate) =>
val (newCond, inputPlan) = rewriteExistentialExpr(Seq(predicate), p)
Project(p.output, Filter(newCond.get, inputPlan))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
new file mode 100644
index 0000000000..8eea84f4f5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql
@@ -0,0 +1,39 @@
+-- Unit tests for simple NOT IN predicate subquery across multiple columns.
+--
+-- See not-in-single-column-unit-tests.sql for an introduction.
+-- This file has the same test cases as not-in-unit-tests-multi-column.sql with literals instead of
+-- subqueries. Small changes have been made to the literals to make them typecheck.
+
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, null),
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b);
+
+-- Case 1 (not possible to write a literal with no rows, so we ignore it.)
+-- (subquery is empty -> row is returned)
+
+-- Cases 2, 3 and 4 are currently broken, so I have commented them out here.
+-- Filed https://issues.apache.org/jira/browse/SPARK-24395 to fix and restore these test cases.
+
+ -- Case 5
+ -- (one null column with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Matches (null, 1.0)
+ AND (a, b) NOT IN ((2, 3.0));
+
+ -- Case 6
+ -- (no null columns with match -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Matches (2, 3.0)
+ AND (a, b) NOT IN ((2, 3.0));
+
+ -- Case 7
+ -- (no null columns with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 5.0 -- Matches (4, 5.0)
+ AND (a, b) NOT IN ((2, 3.0));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
new file mode 100644
index 0000000000..9f8dc7fca3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-multi-column.sql
@@ -0,0 +1,98 @@
+-- Unit tests for simple NOT IN predicate subquery across multiple columns.
+--
+-- See not-in-single-column-unit-tests.sql for an introduction.
+--
+-- Test cases for multi-column ``WHERE a NOT IN (SELECT c FROM r ...)'':
+-- | # | does subquery include null? | do filter columns contain null? | a = c? | b = d? | row included in result? |
+-- | 1 | empty | * | * | * | yes |
+-- | 2 | 1+ row has null for all columns | * | * | * | no |
+-- | 3 | no row has null for all columns | (yes, yes) | * | * | no |
+-- | 4 | no row has null for all columns | (no, yes) | yes | * | no |
+-- | 5 | no row has null for all columns | (no, yes) | no | * | yes |
+-- | 6 | no | (no, no) | yes | yes | no |
+-- | 7 | no | (no, no) | _ | _ | yes |
+--
+-- This can be generalized to include more tests for more columns, but it covers the main cases
+-- when there is more than one column.
+
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, null),
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b);
+
+CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
+ (null, null),
+ (0, 1.0),
+ (2, 3.0),
+ (4, null)
+ AS s(c, d);
+
+ -- Case 1
+ -- (subquery is empty -> row is returned)
+SELECT *
+FROM m
+WHERE (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE d > 5.0) -- Matches no rows
+;
+
+ -- Case 2
+ -- (subquery contains a row with null in all columns -> row not returned)
+SELECT *
+FROM m
+WHERE (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c IS NULL AND d IS NULL) -- Matches only (null, null)
+;
+
+ -- Case 3
+ -- (probe-side columns are all null -> row not returned)
+SELECT *
+FROM m
+WHERE a IS NULL AND b IS NULL -- Matches only (null, null)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
+;
+
+ -- Case 4
+ -- (one column null, other column matches a row in the subquery result -> row not returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Matches (null, 1.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
+;
+
+ -- Case 5
+ -- (one null column with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Matches (null, 1.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c = 2) -- Matches (2, 3.0)
+;
+
+ -- Case 6
+ -- (no null columns with match -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Matches (2, 3.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c = 2) -- Matches (2, 3.0)
+;
+
+ -- Case 7
+ -- (no null columns with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 5.0 -- Matches (4, 5.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c = 2) -- Matches (2, 3.0)
+;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql
new file mode 100644
index 0000000000..b261363d1d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql
@@ -0,0 +1,42 @@
+-- Unit tests for simple NOT IN with a literal expression of a single column
+--
+-- More information can be found in not-in-unit-tests-single-column.sql.
+-- This file has the same test cases as not-in-unit-tests-single-column.sql with literals instead of
+-- subqueries.
+
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b);
+
+ -- Uncorrelated NOT IN Subquery test cases
+ -- Case 1 (not possible to write a literal with no rows, so we ignore it.)
+ -- (empty subquery -> all rows returned)
+
+ -- Case 2
+ -- (subquery includes null -> no rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (null);
+
+ -- Case 3
+ -- (probe column is null -> row not returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Only matches (null, 1.0)
+ AND a NOT IN (2);
+
+ -- Case 4
+ -- (probe column matches subquery row -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (2);
+
+ -- Case 5
+ -- (probe column does not match subquery row -> row is returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (6);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql
new file mode 100644
index 0000000000..2cc08e10ac
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-unit-tests-single-column.sql
@@ -0,0 +1,123 @@
+-- Unit tests for simple NOT IN predicate subquery across a single column.
+--
+-- ``col NOT IN expr'' is quite difficult to reason about. There are many edge cases, some of the
+-- rules are confusing to the uninitiated, and precedence and treatment of null values is plain
+-- unintuitive. To make this simpler to understand, I've come up with a plain English way of
+-- describing the expected behavior of this query.
+--
+-- - If the subquery is empty (i.e. returns no rows), the row should be returned, regardless of
+-- whether the filtered columns include nulls.
+-- - If the subquery contains a result with all columns null, then the row should not be returned.
+-- - If for all non-null filter columns there exists a row in the subquery in which each column
+-- either
+-- 1. is equal to the corresponding filter column or
+-- 2. is null
+-- then the row should not be returned. (This includes the case where all filter columns are
+-- null.)
+-- - Otherwise, the row should be returned.
+--
+-- Using these rules, we can come up with a set of test cases for single-column and multi-column
+-- NOT IN test cases.
+--
+-- Test cases for single-column ``WHERE a NOT IN (SELECT c FROM r ...)'':
+-- | # | does subquery include null? | is a null? | a = c? | row with a included in result? |
+-- | 1 | empty | | | yes |
+-- | 2 | yes | | | no |
+-- | 3 | no | yes | | no |
+-- | 4 | no | no | yes | no |
+-- | 5 | no | no | no | yes |
+--
+-- There are also some considerations around correlated subqueries. Correlated subqueries can
+-- cause cases 2, 3, or 4 to be reduced to case 1 by limiting the number of rows returned by the
+-- subquery, so the row from the parent table should always be included in the output.
+
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b);
+
+CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
+ (null, 1.0),
+ (2, 3.0),
+ (6, 7.0)
+ AS s(c, d);
+
+ -- Uncorrelated NOT IN Subquery test cases
+ -- Case 1
+ -- (empty subquery -> all rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (SELECT c
+ FROM s
+ WHERE d > 10.0) -- (empty subquery)
+;
+
+ -- Case 2
+ -- (subquery includes null -> no rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (SELECT c
+ FROM s
+ WHERE d = 1.0) -- Only matches (null, 1.0)
+;
+
+ -- Case 3
+ -- (probe column is null -> row not returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Only matches (null, 1.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = 3.0) -- Matches (2, 3.0)
+;
+
+ -- Case 4
+ -- (probe column matches subquery row -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = 3.0) -- Matches (2, 3.0)
+;
+
+ -- Case 5
+ -- (probe column does not match subquery row -> row is returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = 7.0) -- Matches (6, 7.0)
+;
+
+ -- Correlated NOT IN subquery test cases
+ -- Case 2->1
+ -- (subquery had nulls but they are removed by correlated subquery -> all rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (SELECT c
+ FROM s
+ WHERE d = b + 10) -- Matches no row
+;
+
+ -- Case 3->1
+ -- (probe column is null but subquery returns no rows -> row is returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Only matches (null, 1.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = b + 10) -- Matches no row
+;
+
+ -- Case 4->1
+ -- (probe column matches row which is filtered out by correlated subquery -> row is returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = b + 10) -- Matches no row
+;
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
new file mode 100644
index 0000000000..a16e98af9a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
@@ -0,0 +1,54 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query 0
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, null),
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+-- Case 5
+ -- (one null column with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Matches (null, 1.0)
+ AND (a, b) NOT IN ((2, 3.0))
+-- !query 1 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 1 output
+NULL 1
+
+
+-- !query 2
+-- Case 6
+ -- (no null columns with match -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Matches (2, 3.0)
+ AND (a, b) NOT IN ((2, 3.0))
+-- !query 2 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 2 output
+
+
+
+-- !query 3
+-- Case 7
+ -- (no null columns with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 5.0 -- Matches (4, 5.0)
+ AND (a, b) NOT IN ((2, 3.0))
+-- !query 3 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 3 output
+4 5
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
new file mode 100644
index 0000000000..aa5f64b8eb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
@@ -0,0 +1,134 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 9
+
+
+-- !query 0
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, null),
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
+ (null, null),
+ (0, 1.0),
+ (2, 3.0),
+ (4, null)
+ AS s(c, d)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+-- Case 1
+ -- (subquery is empty -> row is returned)
+SELECT *
+FROM m
+WHERE (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE d > 5.0) -- Matches no rows
+-- !query 2 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 2 output
+2 3
+4 5
+NULL 1
+NULL NULL
+
+
+-- !query 3
+-- Case 2
+ -- (subquery contains a row with null in all columns -> row not returned)
+SELECT *
+FROM m
+WHERE (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c IS NULL AND d IS NULL) -- Matches only (null, null)
+-- !query 3 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 3 output
+
+
+
+-- !query 4
+-- Case 3
+ -- (probe-side columns are all null -> row not returned)
+SELECT *
+FROM m
+WHERE a IS NULL AND b IS NULL -- Matches only (null, null)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
+-- !query 4 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 4 output
+
+
+
+-- !query 5
+-- Case 4
+ -- (one column null, other column matches a row in the subquery result -> row not returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Matches (null, 1.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
+-- !query 5 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 5 output
+
+
+
+-- !query 6
+-- Case 5
+ -- (one null column with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Matches (null, 1.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c = 2) -- Matches (2, 3.0)
+-- !query 6 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 6 output
+NULL 1
+
+
+-- !query 7
+-- Case 6
+ -- (no null columns with match -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Matches (2, 3.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c = 2) -- Matches (2, 3.0)
+-- !query 7 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 7 output
+
+
+
+-- !query 8
+-- Case 7
+ -- (no null columns with no match -> row is returned)
+SELECT *
+FROM m
+WHERE b = 5.0 -- Matches (4, 5.0)
+ AND (a, b) NOT IN (SELECT *
+ FROM s
+ WHERE c = 2) -- Matches (2, 3.0)
+-- !query 8 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 8 output
+4 5
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
new file mode 100644
index 0000000000..446447e890
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
@@ -0,0 +1,69 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 5
+
+
+-- !query 0
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+-- Uncorrelated NOT IN Subquery test cases
+ -- Case 1 (not possible to write a literal with no rows, so we ignore it.)
+ -- (empty subquery -> all rows returned)
+
+ -- Case 2
+ -- (subquery includes null -> no rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (null)
+-- !query 1 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 1 output
+
+
+
+-- !query 2
+-- Case 3
+ -- (probe column is null -> row not returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Only matches (null, 1.0)
+ AND a NOT IN (2)
+-- !query 2 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 2 output
+
+
+
+-- !query 3
+-- Case 4
+ -- (probe column matches subquery row -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (2)
+-- !query 3 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 3 output
+
+
+
+-- !query 4
+-- Case 5
+ -- (probe column does not match subquery row -> row is returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (6)
+-- !query 4 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 4 output
+2 3
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
new file mode 100644
index 0000000000..f58ebeacc2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
@@ -0,0 +1,149 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 10
+
+
+-- !query 0
+CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
+ (null, 1.0),
+ (2, 3.0),
+ (4, 5.0)
+ AS m(a, b)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
+ (null, 1.0),
+ (2, 3.0),
+ (6, 7.0)
+ AS s(c, d)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+-- Uncorrelated NOT IN Subquery test cases
+ -- Case 1
+ -- (empty subquery -> all rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (SELECT c
+ FROM s
+ WHERE d > 10.0) -- (empty subquery)
+-- !query 2 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 2 output
+2 3
+4 5
+NULL 1
+
+
+-- !query 3
+-- Case 2
+ -- (subquery includes null -> no rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (SELECT c
+ FROM s
+ WHERE d = 1.0) -- Only matches (null, 1.0)
+-- !query 3 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 3 output
+
+
+
+-- !query 4
+-- Case 3
+ -- (probe column is null -> row not returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Only matches (null, 1.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = 3.0) -- Matches (2, 3.0)
+-- !query 4 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 4 output
+
+
+
+-- !query 5
+-- Case 4
+ -- (probe column matches subquery row -> row not returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = 3.0) -- Matches (2, 3.0)
+-- !query 5 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 5 output
+
+
+
+-- !query 6
+-- Case 5
+ -- (probe column does not match subquery row -> row is returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = 7.0) -- Matches (6, 7.0)
+-- !query 6 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 6 output
+2 3
+
+
+-- !query 7
+-- Correlated NOT IN subquery test cases
+ -- Case 2->1
+ -- (subquery had nulls but they are removed by correlated subquery -> all rows returned)
+SELECT *
+FROM m
+WHERE a NOT IN (SELECT c
+ FROM s
+ WHERE d = b + 10) -- Matches no row
+-- !query 7 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 7 output
+2 3
+4 5
+NULL 1
+
+
+-- !query 8
+-- Case 3->1
+ -- (probe column is null but subquery returns no rows -> row is returned)
+SELECT *
+FROM m
+WHERE b = 1.0 -- Only matches (null, 1.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = b + 10) -- Matches no row
+-- !query 8 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 8 output
+NULL 1
+
+
+-- !query 9
+-- Case 4->1
+ -- (probe column matches row which is filtered out by correlated subquery -> row is returned)
+SELECT *
+FROM m
+WHERE b = 3.0 -- Only matches (2, 3.0)
+ AND a NOT IN (SELECT c
+ FROM s
+ WHERE d = b + 10) -- Matches no row
+-- !query 9 schema
+struct<a:int,b:decimal(2,1)>
+-- !query 9 output
+2 3
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
index 62befc5ca0..be637b66ab 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 11
+-- Number of queries: 14
-- !query 0