aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src
diff options
context:
space:
mode:
authorHanifi Gunes <hanifigunes@gmail.com>2015-11-16 18:33:13 -0800
committerHanifi Gunes <hanifigunes@gmail.com>2015-11-24 16:50:34 -0800
commit367d74a65ce2871a1452361cbd13bbd5f4a6cc95 (patch)
tree0a9ee837d559649f0ebefce841313dc01d3bc80c /exec/java-exec/src
parentbd39d30024431cf7eb2939f5b336b82c3b1dbe93 (diff)
DRILL-2618: handle queries over empty folders consistently so that they report table not found rather than failing.
Refactor FileSelection to eliminate redundancy, make it more managable Fix WorkspaceSchemaFactory to handle empty folders. Introduce ParquetFileSelection, a sub-class of FileSelection that carries along metadata cache Fix MagicStringMatcher so that it operate on files only. Unit test file selection
Diffstat (limited to 'exec/java-exec/src')
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java4
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java5
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java12
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java279
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java1
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java4
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java24
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java6
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java1
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java62
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java6
-rw-r--r--exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java10
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java66
13 files changed, 319 insertions, 161 deletions
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
index fd2f850e8..7297c2706 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
@@ -84,8 +84,8 @@ public class FileSystemPartitionDescriptor extends AbstractPartitionDescriptor {
@Override
public GroupScan createNewGroupScan(List<String> newFiles) throws IOException {
- final FileSelection newFileSelection = new FileSelection(newFiles, getBaseTableLocation(), true);
- final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newFileSelection);
+ final FileSelection newSelection = FileSelection.create(null, newFiles, getBaseTableLocation());
+ final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newSelection);
return newScan;
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
index 5294b3081..cda5a5eed 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
@@ -46,7 +46,6 @@ public class ParquetPartitionDescriptor extends AbstractPartitionDescriptor {
private final List<SchemaPath> partitionColumns;
private final DrillScanRel scanRel;
- static final int MAX_NESTED_SUBDIRS = 10;
public ParquetPartitionDescriptor(PlannerSettings settings, DrillScanRel scanRel) {
ParquetGroupScan scan = (ParquetGroupScan) scanRel.getGroupScan();
@@ -81,8 +80,8 @@ public class ParquetPartitionDescriptor extends AbstractPartitionDescriptor {
@Override
public GroupScan createNewGroupScan(List<String> newFiles) throws IOException {
- final FileSelection newFileSelection = new FileSelection(newFiles, getBaseTableLocation(), true);
- final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newFileSelection);
+ final FileSelection newSelection = FileSelection.create(null, newFiles, getBaseTableLocation());
+ final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newSelection);
return newScan;
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
index 37011d9cd..fb51bfcc5 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
@@ -138,9 +138,19 @@ public class BasicFormatMatcher extends FormatMatcher{
}
public boolean matches(DrillFileSystem fs, FileStatus status) throws IOException{
- if (ranges.isEmpty()) {
+ if (ranges.isEmpty() || status.isDirectory()) {
return false;
}
+ // walk all the way down in the symlinks until a hard entry is reached
+ FileStatus current = status;
+ while (current.isSymlink()) {
+ current = fs.getFileStatus(status.getSymlink());
+ }
+ // if hard entry is not a file nor can it be a symlink then it is not readable simply deny matching.
+ if (!current.isFile()) {
+ return false;
+ }
+
final Range<Long> fileRange = Range.closedOpen( 0L, status.getLen());
try (FSDataInputStream is = fs.open(status.getPath())) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
index d17cfed41..e6cb6535f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
@@ -19,82 +19,82 @@ package org.apache.drill.exec.store.dfs;
import java.io.IOException;
import java.net.URI;
-import java.util.Collections;
import java.util.List;
-import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
-import com.google.common.base.Stopwatch;
-import org.apache.commons.lang3.ArrayUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.drill.exec.store.parquet.Metadata.ParquetTableMetadata_v1;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Strings;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.google.common.collect.Lists;
-
/**
- * Jackson serializable description of a file selection. Maintains an internal set of file statuses. However, also
- * serializes out as a list of Strings. All accessing methods first regenerate the FileStatus objects if they are not
- * available. This allows internal movement of FileStatus and the ability to serialize if need be.
+ * Jackson serializable description of a file selection.
*/
public class FileSelection {
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSelection.class);
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSelection.class);
+ private static final String PATH_SEPARATOR = System.getProperty("file.separator");
- @JsonIgnore
private List<FileStatus> statuses;
public List<String> files;
- public String selectionRoot;
-
- // this is a temporary location for the reference to Parquet metadata
- // TODO: ideally this should be in a Parquet specific derived class.
- private ParquetTableMetadata_v1 parquetMeta = null;
+ public final String selectionRoot;
- public FileSelection() {
- }
-
- public FileSelection(List<String> files, String selectionRoot, boolean dummy) {
- this.files = files;
- this.selectionRoot = selectionRoot;
- }
-
- public FileSelection(List<String> files, boolean dummy) {
+ /**
+ * Creates a {@link FileSelection selection} out of given file statuses/files and selection root.
+ *
+ * @param statuses list of file statuses
+ * @param files list of files
+ * @param selectionRoot root path for selections
+ */
+ protected FileSelection(final List<FileStatus> statuses, final List<String> files, final String selectionRoot) {
+ this.statuses = statuses;
this.files = files;
+ this.selectionRoot = Preconditions.checkNotNull(selectionRoot);
}
- public FileSelection(List<FileStatus> statuses) {
- this(statuses, null);
+ /**
+ * Copy constructor for convenience.
+ */
+ protected FileSelection(final FileSelection selection) {
+ Preconditions.checkNotNull(selection, "selection cannot be null");
+ this.statuses = selection.statuses;
+ this.files = selection.files;
+ this.selectionRoot = selection.selectionRoot;
}
- public FileSelection(List<String> files, String selectionRoot,
- ParquetTableMetadata_v1 meta) {
- this.files = files;
- this.selectionRoot = selectionRoot;
- this.parquetMeta = meta;
+ public String getSelectionRoot() {
+ return selectionRoot;
}
- public FileSelection(List<FileStatus> statuses, String selectionRoot) {
- this.statuses = statuses;
- this.files = Lists.newArrayList();
- for (FileStatus f : statuses) {
- files.add(f.getPath().toString());
+ public List<FileStatus> getStatuses(final DrillFileSystem fs) throws IOException {
+ if (statuses == null) {
+ final List<FileStatus> newStatuses = Lists.newArrayList();
+ for (final String pathStr:files) {
+ newStatuses.add(fs.getFileStatus(new Path(pathStr)));
+ }
+ statuses = newStatuses;
}
- this.selectionRoot = selectionRoot;
+ return statuses;
}
- public FileSelection(List<String> files, String selectionRoot,
- ParquetTableMetadata_v1 meta, List<FileStatus> statuses) {
- this.files = files;
- this.selectionRoot = selectionRoot;
- this.parquetMeta = meta;
- this.statuses = statuses;
+ public List<String> getFiles() {
+ if (files == null) {
+ final List<String> newFiles = Lists.newArrayList();
+ for (final FileStatus status:statuses) {
+ newFiles.add(status.getPath().toString());
+ }
+ files = newFiles;
+ }
+ return files;
}
public boolean containsDirectories(DrillFileSystem fs) throws IOException {
- init(fs);
- for (FileStatus p : statuses) {
- if (p.isDirectory()) {
+ for (final FileStatus status : getStatuses(fs)) {
+ if (status.isDirectory()) {
return true;
}
}
@@ -102,110 +102,127 @@ public class FileSelection {
}
public FileSelection minusDirectories(DrillFileSystem fs) throws IOException {
- Stopwatch timer = new Stopwatch();
- timer.start();
- init(fs);
- List<FileStatus> newList = Lists.newArrayList();
- for (FileStatus p : statuses) {
- if (p.isDirectory()) {
- List<FileStatus> statuses = fs.list(true, p.getPath());
- for (FileStatus s : statuses) {
- newList.add(s);
- }
- } else {
- newList.add(p);
- }
+ final List<FileStatus> statuses = getStatuses(fs);
+ final int total = statuses.size();
+ final Path[] paths = new Path[total];
+ for (int i=0; i<total; i++) {
+ paths[i] = statuses.get(i).getPath();
}
- logger.info("FileSelection.minusDirectories() took {} ms, numFiles: {}",
- timer.elapsed(TimeUnit.MILLISECONDS), newList.size());
- return new FileSelection(newList, selectionRoot);
+ final List<FileStatus> allStats = fs.list(true, paths);
+ final List<FileStatus> nonDirectories = Lists.newArrayList(Iterables.filter(allStats, new Predicate<FileStatus>() {
+ @Override
+ public boolean apply(@Nullable FileStatus status) {
+ return !status.isDirectory();
+ }
+ }));
+
+ return create(nonDirectories, null, selectionRoot);
}
public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
- init(fs);
- return statuses.get(0);
+ return getStatuses(fs).get(0);
}
- public List<String> getAsFiles() {
- if (!files.isEmpty()) {
- return files;
+ private static String commonPath(final List<FileStatus> statuses) {
+ if (statuses == null || statuses.isEmpty()) {
+ return "";
}
- if (statuses == null) {
- return Collections.emptyList();
- }
- List<String> files = Lists.newArrayList();
- for (FileStatus s : statuses) {
- files.add(s.getPath().toString());
- }
- return files;
- }
- private void init(DrillFileSystem fs) throws IOException {
- Stopwatch timer = new Stopwatch();
- timer.start();
- if (files != null && statuses == null) {
- statuses = Lists.newArrayList();
- for (String p : files) {
- statuses.add(fs.getFileStatus(new Path(p)));
- }
+ final List<String> files = Lists.newArrayList();
+ for (final FileStatus status : statuses) {
+ files.add(status.getPath().toString());
}
- logger.info("FileSelection.init() took {} ms, numFiles: {}",
- timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size());
- }
-
- public List<FileStatus> getFileStatusList(DrillFileSystem fs) throws IOException {
- init(fs);
- return statuses;
+ return commonPathForFiles(files);
}
/**
- * Return the parquet table metadata that may have been read
- * from a metadata cache file during creation of this file selection.
- * It will always be null for non-parquet files and null for cases
- * where no metadata cache was created.
+ * Returns longest common path for the given list of files.
+ *
+ * @param files list of files.
+ * @return longest common path
*/
- public ParquetTableMetadata_v1 getParquetMetadata() {
- return parquetMeta;
- }
+ private static String commonPathForFiles(final List<String> files) {
+ if (files == null || files.isEmpty()) {
+ return "";
+ }
- private static String commonPath(FileStatus... paths) {
- String commonPath = "";
- String[][] folders = new String[paths.length][];
- for (int i = 0; i < paths.length; i++) {
- folders[i] = Path.getPathWithoutSchemeAndAuthority(paths[i].getPath()).toString().split("/");
+ final int total = files.size();
+ final String[][] folders = new String[total][];
+ int shortest = Integer.MAX_VALUE;
+ for (int i = 0; i < total; i++) {
+ final Path path = new Path(files.get(i));
+ folders[i] = Path.getPathWithoutSchemeAndAuthority(path).toString().split(PATH_SEPARATOR);
+ shortest = Math.min(shortest, folders[i].length);
}
- for (int j = 0; j < folders[0].length; j++) {
- String thisFolder = folders[0][j];
- boolean allMatched = true;
- for (int i = 1; i < folders.length && allMatched; i++) {
- if (folders[i].length < j) {
- allMatched = false;
- break;
+
+ int latest;
+ out:
+ for (latest = 0; latest < shortest; latest++) {
+ final String current = folders[0][latest];
+ for (int i = 1; i < folders.length; i++) {
+ if (!current.equals(folders[i][latest])) {
+ break out;
}
- allMatched &= folders[i][j].equals(thisFolder);
- }
- if (allMatched) {
- commonPath += thisFolder + "/";
- } else {
- break;
}
}
- URI oneURI = paths[0].getPath().toUri();
- return new Path(oneURI.getScheme(), oneURI.getAuthority(), commonPath).toString();
+ final Path path = new Path(files.get(0));
+ final URI uri = path.toUri();
+ final String pathString = buildPath(folders[0], latest);
+ return new Path(uri.getScheme(), uri.getAuthority(), pathString).toString();
}
- public static FileSelection create(DrillFileSystem fs, String parent, String path) throws IOException {
- Path p = new Path(parent,removeLeadingSlash(path));
- FileStatus[] status = fs.globStatus(p);
- if (status == null || status.length == 0) {
+ private static String buildPath(final String[] path, final int folderIndex) {
+ final StringBuilder builder = new StringBuilder();
+ for (int i=0; i<folderIndex; i++) {
+ builder.append(path[i]).append(PATH_SEPARATOR);
+ }
+ builder.deleteCharAt(builder.length()-1);
+ return builder.toString();
+ }
+
+ public static FileSelection create(final DrillFileSystem fs, final String parent, final String path) throws IOException {
+ final Path combined = new Path(parent, removeLeadingSlash(path));
+ final FileStatus[] statuses = fs.globStatus(combined);
+ if (statuses == null) {
+ return null;
+ }
+ return create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+ }
+
+ /**
+ * Creates a {@link FileSelection selection} with the given file statuses/files and selection root.
+ *
+ * @param statuses list of file statuses
+ * @param files list of files
+ * @param root root path for selections
+ *
+ * @return null if creation of {@link FileSelection} fails with an {@link IllegalArgumentException}
+ * otherwise a new selection.
+ *
+ * @see FileSelection#FileSelection(List, List, String)
+ */
+ public static FileSelection create(final List<FileStatus> statuses, final List<String> files, final String root) {
+ final boolean bothNonEmptySelection = (statuses != null && statuses.size() > 0) && (files != null && files.size() == 0);
+ final boolean bothEmptySelection = (statuses == null || statuses.size() == 0) && (files == null || files.size() == 0);
+
+ if (bothNonEmptySelection || bothEmptySelection) {
return null;
}
- if (status.length == 1) {
- URI oneURI = status[0].getPath().toUri();
- String selectionRoot = new Path(oneURI.getScheme(), oneURI.getAuthority(), p.toUri().getPath()).toString();
- return new FileSelection(Collections.singletonList(status[0]), selectionRoot);
+
+ final String selectionRoot;
+ if (statuses == null || statuses.isEmpty()) {
+ selectionRoot = commonPathForFiles(files);
+ } else {
+ if (statuses.size() == 1 && !Strings.isNullOrEmpty(root)) {
+ final Path rootPath = new Path(root);
+ final URI uri = statuses.get(0).getPath().toUri();
+ final Path path = new Path(uri.getScheme(), uri.getAuthority(), rootPath.toUri().getPath());
+ selectionRoot = path.toString();
+ } else {
+ selectionRoot = commonPath(statuses);
+ }
}
- return new FileSelection(Lists.newArrayList(status), commonPath(status));
+ return new FileSelection(statuses, files, selectionRoot);
}
private static String removeLeadingSlash(String path) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
index eda9d4d5b..14f1441e6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
@@ -27,7 +27,6 @@ import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
import org.apache.drill.exec.physical.base.AbstractWriter;
import org.apache.drill.exec.physical.base.PhysicalOperator;
-import org.apache.drill.exec.planner.logical.DrillTable;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.store.StoragePluginOptimizerRule;
import org.apache.hadoop.conf.Configuration;
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
index 1ea7da7da..4473c5c46 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
@@ -37,7 +37,7 @@ public class FormatSelection {
@JsonCreator
public FormatSelection(@JsonProperty("format") FormatPluginConfig format, @JsonProperty("files") List<String> files){
this.format = format;
- this.selection = new FileSelection(files, true);
+ this.selection = FileSelection.create(null, files, null);
}
public FormatSelection(FormatPluginConfig format, FileSelection selection) {
@@ -53,7 +53,7 @@ public class FormatSelection {
@JsonProperty("files")
public List<String> getAsFiles(){
- return selection.getAsFiles();
+ return selection.getFiles();
}
@JsonIgnore
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
index ac832dade..37da60600 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
@@ -316,16 +316,16 @@ public class WorkspaceSchemaFactory {
@Override
public DrillTable create(String key) {
try {
-
- FileSelection fileSelection = FileSelection.create(fs, config.getLocation(), key);
- if (fileSelection == null) {
+ final FileSelection selection = FileSelection.create(fs, config.getLocation(), key);
+ if (selection == null) {
return null;
}
- if (fileSelection.containsDirectories(fs)) {
- for (FormatMatcher m : dirMatchers) {
+ final boolean hasDirectories = selection.containsDirectories(fs);
+ if (hasDirectories) {
+ for (final FormatMatcher matcher : dirMatchers) {
try {
- DrillTable table = m.isReadable(fs, fileSelection, plugin, storageEngineName, schemaConfig.getUserName());
+ DrillTable table = matcher.isReadable(fs, selection, plugin, storageEngineName, schemaConfig.getUserName());
if (table != null) {
return table;
}
@@ -333,11 +333,15 @@ public class WorkspaceSchemaFactory {
logger.debug("File read failed.", e);
}
}
- fileSelection = fileSelection.minusDirectories(fs);
}
- for (FormatMatcher m : fileMatchers) {
- DrillTable table = m.isReadable(fs, fileSelection, plugin, storageEngineName, schemaConfig.getUserName());
+ final FileSelection newSelection = hasDirectories ? selection.minusDirectories(fs) : selection;
+ if (newSelection == null) {
+ return null;
+ }
+
+ for (final FormatMatcher matcher : fileMatchers) {
+ DrillTable table = matcher.isReadable(fs, newSelection, plugin, storageEngineName, schemaConfig.getUserName());
if (table != null) {
return table;
}
@@ -396,7 +400,7 @@ public class WorkspaceSchemaFactory {
FormatMatcher matcher = null;
Queue<FileStatus> listOfFiles = new LinkedList<>();
- listOfFiles.addAll(fileSelection.getFileStatusList(fs));
+ listOfFiles.addAll(fileSelection.getStatuses(fs));
while (!listOfFiles.isEmpty()) {
FileStatus currentFile = listOfFiles.poll();
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
index a559bebdc..d75b6f6b7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
@@ -78,7 +78,7 @@ public class EasyGroupScan extends AbstractFileGroupScan{
@JsonProperty("selectionRoot") String selectionRoot
) throws IOException, ExecutionSetupException {
this(ImpersonationUtil.resolveUserName(userName),
- new FileSelection(files, true),
+ FileSelection.create(null, files, selectionRoot),
(EasyFormatPlugin<?>)engineRegistry.getFormatPlugin(storageConfig, formatConfig),
columns,
selectionRoot);
@@ -130,7 +130,7 @@ public class EasyGroupScan extends AbstractFileGroupScan{
final DrillFileSystem dfs = ImpersonationUtil.createFileSystem(getUserName(), formatPlugin.getFsConf());
this.selection = selection;
BlockMapBuilder b = new BlockMapBuilder(dfs, formatPlugin.getContext().getBits());
- this.chunks = b.generateFileWork(selection.getFileStatusList(dfs), formatPlugin.isBlockSplittable());
+ this.chunks = b.generateFileWork(selection.getStatuses(dfs), formatPlugin.isBlockSplittable());
this.maxWidth = chunks.size();
this.endpointAffinities = AffinityCreator.getAffinityMap(chunks);
}
@@ -152,7 +152,7 @@ public class EasyGroupScan extends AbstractFileGroupScan{
@JsonProperty("files")
public List<String> getFiles() {
- return selection.getAsFiles();
+ return selection.getFiles();
}
@JsonProperty("columns")
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
index f5dcb7d7e..9393f4a53 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
@@ -22,7 +22,6 @@ import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.logical.StoragePluginConfig;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.base.AbstractGroupScan;
-import org.apache.drill.exec.proto.UserBitShared;
import org.apache.drill.exec.server.DrillbitContext;
import org.apache.drill.exec.store.RecordReader;
import org.apache.drill.exec.store.RecordWriter;
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
new file mode 100644
index 000000000..26ebfc5df
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import com.google.common.base.Preconditions;
+import org.apache.drill.exec.store.dfs.FileSelection;
+import org.apache.drill.exec.store.parquet.Metadata.ParquetTableMetadata_v1;
+
+/**
+ * Parquet specific {@link FileSelection selection} that carries out {@link ParquetTableMetadata_v1 metadata} along.
+ */
+public class ParquetFileSelection extends FileSelection {
+// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetFileSelection.class);
+
+ private final ParquetTableMetadata_v1 metadata;
+
+ protected ParquetFileSelection(final FileSelection delegate, final ParquetTableMetadata_v1 metadata) {
+ super(delegate);
+ this.metadata = Preconditions.checkNotNull(metadata, "Parquet metadata cannot be null");
+ }
+
+ /**
+ * Return the parquet table metadata that may have been read
+ * from a metadata cache file during creation of this file selection.
+ * It will always be null for non-parquet files and null for cases
+ * where no metadata cache was created.
+ */
+ public ParquetTableMetadata_v1 getParquetMetadata() {
+ return metadata;
+ }
+
+ /**
+ * Creates a new Parquet specific selection wrapping the given {@link FileSelection selection}.
+ *
+ * @param selection inner file selection
+ * @param metadata parquet metadata
+ * @return null if selection is null
+ * otherwise a new selection
+ */
+ public static ParquetFileSelection create(final FileSelection selection, final ParquetTableMetadata_v1 metadata) {
+ if (selection == null) {
+ return null;
+ }
+ return new ParquetFileSelection(selection, metadata);
+ }
+
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index 91cd11255..4932aafb4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -234,9 +234,9 @@ public class ParquetFormatPlugin implements FormatPlugin{
// file:/a/b. The reason is that the file names above have been created in the form
// /a/b/c.parquet and the format of the selection root must match that of the file names
// otherwise downstream operations such as partition pruning can break.
- Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
- return new FileSelection(fileNames, metaRootPath.toString(), metadata, /* save metadata for future use */
- selection.getFileStatusList(fs));
+ final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
+ final FileSelection newSelection = FileSelection.create(null, fileNames, metaRootPath.toString());
+ return ParquetFileSelection.create(newSelection, metadata);
} else {
// don't expand yet; ParquetGroupScan's metadata gathering operation
// does that.
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
index ce2f845ee..3a9fc0de3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
@@ -19,7 +19,6 @@ package org.apache.drill.exec.store.parquet;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -178,13 +177,16 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
this.fs = ImpersonationUtil.createFileSystem(userName, formatPlugin.getFsConf());
this.entries = Lists.newArrayList();
- List<FileStatus> files = selection.getFileStatusList(fs);
+ final List<FileStatus> files = selection.getStatuses(fs);
for (FileStatus file : files) {
entries.add(new ReadEntryWithPath(file.getPath().toString()));
}
this.selectionRoot = selectionRoot;
- this.parquetTableMetadata = selection.getParquetMetadata();
+ if (selection instanceof ParquetFileSelection) {
+ final ParquetFileSelection pfs = ParquetFileSelection.class.cast(selection);
+ this.parquetTableMetadata = pfs.getParquetMetadata();
+ }
init();
}
@@ -341,7 +343,7 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
public void modifyFileSelection(FileSelection selection) {
entries.clear();
fileSet = Sets.newHashSet();
- for (String fileName : selection.getAsFiles()) {
+ for (String fileName : selection.getFiles()) {
entries.add(new ReadEntryWithPath(fileName));
fileSet.add(fileName);
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java
new file mode 100644
index 000000000..82f45ae62
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.dfs;
+
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.drill.BaseTestQuery;
+import org.apache.drill.common.util.TestTools;
+import org.apache.hadoop.fs.FileStatus;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class TestFileSelection extends BaseTestQuery {
+ private static final List<FileStatus> EMPTY_STATUSES = ImmutableList.of();
+ private static final List<String> EMPTY_FILES = ImmutableList.of();
+ private static final String EMPTY_ROOT = "";
+
+ @Test
+ public void testCreateReturnsNullWhenArgumentsAreIllegal() {
+ for (final Object statuses : new Object[] { null, EMPTY_STATUSES}) {
+ for (final Object files : new Object[]{null, EMPTY_FILES}) {
+ for (final Object root : new Object[]{null, EMPTY_ROOT}) {
+ final FileSelection selection = FileSelection.create((List<FileStatus>) statuses, (List<String>) files,
+ (String)root);
+ assertNull(selection);
+ }
+ }
+ }
+ }
+
+
+ @Test(expected = Exception.class)
+ public void testEmptyFolderThrowsTableNotFound() throws Exception {
+ final String table = String.format("%s/empty", TestTools.getTestResourcesPath());
+ final String query = String.format("select * from dfs.`%s`", table);
+ try {
+ testNoResult(query);
+ } catch (Exception ex) {
+ final String pattern = String.format("%s' not found", table).toLowerCase();
+ final boolean isTableNotFound = ex.getMessage().toLowerCase().contains(pattern);
+ assertTrue(isTableNotFound);
+ throw ex;
+ }
+ }
+
+}