aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet
diff options
context:
space:
mode:
authorHanifi Gunes <hanifigunes@gmail.com>2015-11-16 18:33:13 -0800
committerHanifi Gunes <hanifigunes@gmail.com>2015-11-24 16:50:34 -0800
commit367d74a65ce2871a1452361cbd13bbd5f4a6cc95 (patch)
tree0a9ee837d559649f0ebefce841313dc01d3bc80c /exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet
parentbd39d30024431cf7eb2939f5b336b82c3b1dbe93 (diff)
DRILL-2618: handle queries over empty folders consistently so that they report table not found rather than failing.
Refactor FileSelection to eliminate redundancy, make it more managable Fix WorkspaceSchemaFactory to handle empty folders. Introduce ParquetFileSelection, a sub-class of FileSelection that carries along metadata cache Fix MagicStringMatcher so that it operate on files only. Unit test file selection
Diffstat (limited to 'exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet')
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java62
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java6
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java10
3 files changed, 71 insertions, 7 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
new file mode 100644
index 000000000..26ebfc5df
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import com.google.common.base.Preconditions;
+import org.apache.drill.exec.store.dfs.FileSelection;
+import org.apache.drill.exec.store.parquet.Metadata.ParquetTableMetadata_v1;
+
+/**
+ * Parquet specific {@link FileSelection selection} that carries out {@link ParquetTableMetadata_v1 metadata} along.
+ */
+public class ParquetFileSelection extends FileSelection {
+// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetFileSelection.class);
+
+ private final ParquetTableMetadata_v1 metadata;
+
+ protected ParquetFileSelection(final FileSelection delegate, final ParquetTableMetadata_v1 metadata) {
+ super(delegate);
+ this.metadata = Preconditions.checkNotNull(metadata, "Parquet metadata cannot be null");
+ }
+
+ /**
+ * Return the parquet table metadata that may have been read
+ * from a metadata cache file during creation of this file selection.
+ * It will always be null for non-parquet files and null for cases
+ * where no metadata cache was created.
+ */
+ public ParquetTableMetadata_v1 getParquetMetadata() {
+ return metadata;
+ }
+
+ /**
+ * Creates a new Parquet specific selection wrapping the given {@link FileSelection selection}.
+ *
+ * @param selection inner file selection
+ * @param metadata parquet metadata
+ * @return null if selection is null
+ * otherwise a new selection
+ */
+ public static ParquetFileSelection create(final FileSelection selection, final ParquetTableMetadata_v1 metadata) {
+ if (selection == null) {
+ return null;
+ }
+ return new ParquetFileSelection(selection, metadata);
+ }
+
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index 91cd11255..4932aafb4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -234,9 +234,9 @@ public class ParquetFormatPlugin implements FormatPlugin{
// file:/a/b. The reason is that the file names above have been created in the form
// /a/b/c.parquet and the format of the selection root must match that of the file names
// otherwise downstream operations such as partition pruning can break.
- Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
- return new FileSelection(fileNames, metaRootPath.toString(), metadata, /* save metadata for future use */
- selection.getFileStatusList(fs));
+ final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
+ final FileSelection newSelection = FileSelection.create(null, fileNames, metaRootPath.toString());
+ return ParquetFileSelection.create(newSelection, metadata);
} else {
// don't expand yet; ParquetGroupScan's metadata gathering operation
// does that.
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
index ce2f845ee..3a9fc0de3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
@@ -19,7 +19,6 @@ package org.apache.drill.exec.store.parquet;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -178,13 +177,16 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
this.fs = ImpersonationUtil.createFileSystem(userName, formatPlugin.getFsConf());
this.entries = Lists.newArrayList();
- List<FileStatus> files = selection.getFileStatusList(fs);
+ final List<FileStatus> files = selection.getStatuses(fs);
for (FileStatus file : files) {
entries.add(new ReadEntryWithPath(file.getPath().toString()));
}
this.selectionRoot = selectionRoot;
- this.parquetTableMetadata = selection.getParquetMetadata();
+ if (selection instanceof ParquetFileSelection) {
+ final ParquetFileSelection pfs = ParquetFileSelection.class.cast(selection);
+ this.parquetTableMetadata = pfs.getParquetMetadata();
+ }
init();
}
@@ -341,7 +343,7 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
public void modifyFileSelection(FileSelection selection) {
entries.clear();
fileSet = Sets.newHashSet();
- for (String fileName : selection.getAsFiles()) {
+ for (String fileName : selection.getFiles()) {
entries.add(new ReadEntryWithPath(fileName));
fileSet.add(fileName);
}