aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorshimamoto <shimamoto@apache.org>2019-03-08 11:04:16 +0900
committerSorabh Hamirwasia <sorabh@apache.org>2019-03-15 11:35:47 -0700
commit2364b02175bec69cee2f9ceb4e52e1333da39f70 (patch)
treeeb8d2926f954cc2a84d3fc825c64899118662f9f
parenta72f1700a8d260a2f92ba8955c77df6428feb919 (diff)
downloaddrill-2364b02175bec69cee2f9ceb4e52e1333da39f70.tar.gz
DRILL-7014: Format plugin for LTSV files
closes #1627
-rw-r--r--contrib/format-ltsv/README.md38
-rw-r--r--contrib/format-ltsv/pom.xml56
-rw-r--r--contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java97
-rw-r--r--contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java60
-rw-r--r--contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java165
-rw-r--r--contrib/format-ltsv/src/main/resources/drill-module.conf20
-rw-r--r--contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java100
-rw-r--r--contrib/format-ltsv/src/test/resources/emptylines.ltsv4
-rw-r--r--contrib/format-ltsv/src/test/resources/invalid.ltsv1
-rw-r--r--contrib/format-ltsv/src/test/resources/simple.ltsv2
-rw-r--r--contrib/native/client/src/protobuf/UserBitShared.pb.cc15
-rw-r--r--contrib/native/client/src/protobuf/UserBitShared.pb.h5
-rw-r--r--contrib/pom.xml1
-rw-r--r--pom.xml2
-rw-r--r--protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java21
-rw-r--r--protocol/src/main/java/org/apache/drill/exec/proto/beans/CoreOperatorType.java4
-rw-r--r--protocol/src/main/protobuf/UserBitShared.proto1
17 files changed, 576 insertions, 16 deletions
diff --git a/contrib/format-ltsv/README.md b/contrib/format-ltsv/README.md
new file mode 100644
index 000000000..05916b08c
--- /dev/null
+++ b/contrib/format-ltsv/README.md
@@ -0,0 +1,38 @@
+# Drill LTSV files Plugin
+
+Drill LTSV storage plugin allows you to perform interactive analysis using SQL against LTSV files.
+
+For more information about LTSV, please see [LTSV (Labeled Tab-separated Values)](http://ltsv.org/).
+
+## Example of Querying an LTSV File
+
+### About the Data
+
+Each line in the LTSV file has the following structure:
+
+```
+time:<value> TAB host:<value> TAB forwardedfor:<value> TAB req:<value> TAB status:<value> TAB size:<value> TAB referer:<value> TAB ua:<value> TAB reqtime:<value> TAB apptime:<value> TAB vhost:<value> NEWLINE
+```
+
+For example,
+
+```
+time:30/Nov/2016:00:55:08 +0900<TAB>host:xxx.xxx.xxx.xxx<TAB>forwardedfor:-<TAB>req:GET /v1/xxx HTTP/1.1<TAB>status:200<TAB>size:4968<TAB>referer:-<TAB>ua:Java/1.8.0_131<TAB>reqtime:2.532<TAB>apptime:2.532<TAB>vhost:api.example.com
+time:30/Nov/2016:00:56:37 +0900<TAB>host:xxx.xxx.xxx.xxx<TAB>forwardedfor:-<TAB>req:GET /v1/yyy HTTP/1.1<TAB>status:200<TAB>size:412<TAB>referer:-<TAB>ua:Java/1.8.0_201<TAB>reqtime:3.580<TAB>apptime:3.580<TAB>vhost:api.example.com
+```
+
+The Drill dfs storage plugin definition includes an LTSV format that requires a file to have an `.ltsv` extension.
+
+### Query the Data
+
+Issue a SELECT statement to get the second row in the file.
+
+```
+0: jdbc:drill:zk=local> SELECT * FROM dfs.`/tmp/sample.ltsv` WHERE reqtime > 3.0;
++-----------------------------+------------------+---------------+-----------------------+---------+-------+----------+-----------------+----------+----------+------------------+
+| time | host | forwardedfor | req | status | size | referer | ua | reqtime | apptime | vhost |
++-----------------------------+------------------+---------------+-----------------------+---------+-------+----------+-----------------+----------+----------+------------------+
+| 30/Nov/2016:00:56:37 +0900 | xxx.xxx.xxx.xxx | - | GET /v1/yyy HTTP/1.1 | 200 | 412 | - | Java/1.8.0_201 | 3.580 | 3.580 | api.example.com |
++-----------------------------+------------------+---------------+-----------------------+---------+-------+----------+-----------------+----------+----------+------------------+
+1 row selected (6.074 seconds)
+```
diff --git a/contrib/format-ltsv/pom.xml b/contrib/format-ltsv/pom.xml
new file mode 100644
index 000000000..9031d7104
--- /dev/null
+++ b/contrib/format-ltsv/pom.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>drill-contrib-parent</artifactId>
+ <groupId>org.apache.drill.contrib</groupId>
+ <version>1.16.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>drill-format-ltsv</artifactId>
+ <name>contrib/ltsv-format-plugin</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.drill.exec</groupId>
+ <artifactId>drill-java-exec</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <!-- Test dependencies -->
+ <dependency>
+ <groupId>org.apache.drill.exec</groupId>
+ <artifactId>drill-java-exec</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.drill</groupId>
+ <artifactId>drill-common</artifactId>
+ <classifier>tests</classifier>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java
new file mode 100644
index 000000000..8ff62ed56
--- /dev/null
+++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.logical.StoragePluginConfig;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.planner.common.DrillStatsTable.TableStatistics;
+import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.RecordReader;
+import org.apache.drill.exec.store.RecordWriter;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
+import org.apache.drill.exec.store.dfs.easy.EasyWriter;
+import org.apache.drill.exec.store.dfs.easy.FileWork;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.List;
+
+public class LTSVFormatPlugin extends EasyFormatPlugin<LTSVFormatPluginConfig> {
+
+ private static final boolean IS_COMPRESSIBLE = false;
+
+ private static final String DEFAULT_NAME = "ltsv";
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(LTSVFormatPlugin.class);
+
+ public LTSVFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig storageConfig) {
+ this(name, context, fsConf, storageConfig, new LTSVFormatPluginConfig());
+ }
+
+ public LTSVFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig config, LTSVFormatPluginConfig formatPluginConfig) {
+ super(name, context, fsConf, config, formatPluginConfig, true, false, false, IS_COMPRESSIBLE, formatPluginConfig.getExtensions(), DEFAULT_NAME);
+ }
+
+ @Override
+ public RecordReader getRecordReader(FragmentContext context, DrillFileSystem dfs, FileWork fileWork, List<SchemaPath> columns, String userName) throws ExecutionSetupException {
+ return new LTSVRecordReader(context, fileWork.getPath(), dfs, columns);
+ }
+
+
+ @Override
+ public int getReaderOperatorType() {
+ return UserBitShared.CoreOperatorType.LTSV_SUB_SCAN_VALUE;
+ }
+
+ @Override
+ public int getWriterOperatorType() {
+ throw new UnsupportedOperationException("Drill doesn't currently support writing to LTSV files.");
+ }
+
+ @Override
+ public boolean supportsPushDown() {
+ return true;
+ }
+
+ @Override
+ public RecordWriter getRecordWriter(FragmentContext context, EasyWriter writer) throws IOException {
+ throw new UnsupportedOperationException("Drill doesn't currently support writing to LTSV files.");
+ }
+
+ @Override
+ public boolean supportsStatistics() {
+ return false;
+ }
+
+ @Override
+ public TableStatistics readStatistics(FileSystem fs, Path statsTablePath) throws IOException {
+ throw new UnsupportedOperationException("unimplemented");
+ }
+
+ @Override
+ public void writeStatistics(TableStatistics statistics, FileSystem fs, Path statsTablePath) throws IOException {
+ throw new UnsupportedOperationException("unimplemented");
+ }
+
+}
diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java
new file mode 100644
index 000000000..1e96b74e9
--- /dev/null
+++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+@JsonTypeName("ltsv")
+public class LTSVFormatPluginConfig implements FormatPluginConfig {
+ private static final List<String> DEFAULT_EXTS = ImmutableList.of("ltsv");
+
+ public List<String> extensions;
+
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+ public List<String> getExtensions() {
+ if (extensions == null) {
+ // when loading an old JSONFormatConfig that doesn't contain an "extensions" attribute
+ return DEFAULT_EXTS;
+ }
+ return extensions;
+ }
+
+ @Override
+ public int hashCode() {
+ List<String> array = extensions != null ? extensions : DEFAULT_EXTS;
+ return Arrays.hashCode(array.toArray(new String[array.size()]));
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ } else if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ LTSVFormatPluginConfig that = (LTSVFormatPluginConfig) obj;
+ return Objects.equals(extensions, that.extensions);
+ }
+}
diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java
new file mode 100644
index 000000000..cb2385013
--- /dev/null
+++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import io.netty.buffer.DrillBuf;
+import org.apache.drill.common.AutoCloseables;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+public class LTSVRecordReader extends AbstractRecordReader {
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(LTSVRecordReader.class);
+
+ private static final int MAX_RECORDS_PER_BATCH = 8096;
+
+ private final String inputPath;
+
+ private final FSDataInputStream fsStream;
+
+ private final BufferedReader reader;
+
+ private DrillBuf buffer;
+
+ private VectorContainerWriter writer;
+
+ public LTSVRecordReader(FragmentContext fragmentContext, Path path, DrillFileSystem fileSystem,
+ List<SchemaPath> columns) throws OutOfMemoryException {
+ this.inputPath = path.toUri().getPath();
+ try {
+ this.fsStream = fileSystem.open(path);
+ this.reader = new BufferedReader(new InputStreamReader(fsStream.getWrappedStream(), StandardCharsets.UTF_8));
+ this.buffer = fragmentContext.getManagedBuffer();
+ setColumns(columns);
+
+ } catch (IOException e) {
+ String msg = String.format("Failed to open input file: %s", inputPath);
+ throw UserException.dataReadError(e).message(msg).build(logger);
+ }
+ }
+
+ @Override
+ protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> projected) {
+ Set<SchemaPath> transformed = new LinkedHashSet<>();
+ if (!isStarQuery()) {
+ for (SchemaPath column : projected) {
+ transformed.add(column);
+ }
+ } else {
+ transformed.add(SchemaPath.STAR_COLUMN);
+ }
+ return transformed;
+ }
+
+ public void setup(final OperatorContext context, final OutputMutator output) throws ExecutionSetupException {
+ this.writer = new VectorContainerWriter(output);
+ }
+
+ public int next() {
+ this.writer.allocate();
+ this.writer.reset();
+
+ int recordCount = 0;
+
+ try {
+ BaseWriter.MapWriter map = this.writer.rootAsMap();
+ String line = null;
+
+ while (recordCount < MAX_RECORDS_PER_BATCH && (line = this.reader.readLine()) != null) {
+ // Skip empty lines
+ if (line.trim().length() == 0) {
+ continue;
+ }
+
+ List<String[]> fields = new ArrayList<>();
+ for (String field : line.split("\t")) {
+ int index = field.indexOf(":");
+ if (index <= 0) {
+ throw new ParseException(String.format("Invalid LTSV format: %s\n%d:%s", inputPath, recordCount + 1, line), 0);
+ }
+
+ String fieldName = field.substring(0, index);
+ String fieldValue = field.substring(index + 1);
+ if (selectedColumn(fieldName)) {
+ fields.add(new String[]{fieldName, fieldValue});
+ }
+ }
+
+ if (fields.size() == 0) {
+ continue;
+ }
+
+ this.writer.setPosition(recordCount);
+ map.start();
+
+ for (String[] field : fields) {
+ byte[] bytes = field[1].getBytes(StandardCharsets.UTF_8);
+ this.buffer = this.buffer.reallocIfNeeded(bytes.length);
+ this.buffer.setBytes(0, bytes, 0, bytes.length);
+ map.varChar(field[0]).writeVarChar(0, bytes.length, buffer);
+ }
+
+ map.end();
+ recordCount++;
+ }
+
+ this.writer.setValueCount(recordCount);
+ return recordCount;
+
+ } catch (final Exception e) {
+ String msg = String.format("Failure while reading messages from %s. Record reader was at record: %d", inputPath, recordCount + 1);
+ throw UserException.dataReadError(e).message(msg).build(logger);
+ }
+ }
+
+ private boolean selectedColumn(String fieldName) {
+ for (SchemaPath col : getColumns()) {
+ if (col.equals(SchemaPath.STAR_COLUMN) || col.getRootSegment().getPath().equals(fieldName)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void close() throws Exception {
+ AutoCloseables.close(reader, fsStream);
+ }
+
+}
diff --git a/contrib/format-ltsv/src/main/resources/drill-module.conf b/contrib/format-ltsv/src/main/resources/drill-module.conf
new file mode 100644
index 000000000..faf163261
--- /dev/null
+++ b/contrib/format-ltsv/src/main/resources/drill-module.conf
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file tells Drill to consider this module when class path scanning.
+// This file can also include any supplementary configuration information.
+// This file is in HOCON format, see https://github.com/typesafehub/config/blob/master/HOCON.md for more information.
+
+drill.classpath.scanning.packages += "org.apache.drill.exec.store.ltsv"
diff --git a/contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java b/contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java
new file mode 100644
index 000000000..61f65f473
--- /dev/null
+++ b/contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.dfs.FileSystemConfig;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class TestLTSVRecordReader extends ClusterTest {
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ startCluster(ClusterFixture.builder(dirTestWatcher));
+
+ DrillbitContext context = cluster.drillbit().getContext();
+ FileSystemConfig original = (FileSystemConfig) context.getStorage().getPlugin("cp").getConfig();
+ Map<String, FormatPluginConfig> newFormats = new HashMap<>(original.getFormats());
+ newFormats.put("ltsv", new LTSVFormatPluginConfig());
+ FileSystemConfig pluginConfig = new FileSystemConfig(original.getConnection(), original.getConfig(), original.getWorkspaces(), newFormats);
+ pluginConfig.setEnabled(true);
+ context.getStorage().createOrUpdate("cp", pluginConfig, true);
+ }
+
+ @Test
+ public void testWildcard() throws Exception {
+ testBuilder()
+ .sqlQuery("SELECT * FROM cp.`simple.ltsv`")
+ .unOrdered()
+ .baselineColumns("host", "forwardedfor", "req", "status", "size", "referer", "ua", "reqtime", "apptime", "vhost")
+ .baselineValues("xxx.xxx.xxx.xxx", "-", "GET /v1/xxx HTTP/1.1", "200", "4968", "-", "Java/1.8.0_131", "2.532", "2.532", "api.example.com")
+ .baselineValues("xxx.xxx.xxx.xxx", "-", "GET /v1/yyy HTTP/1.1", "200", "412", "-", "Java/1.8.0_201", "3.580", "3.580", "api.example.com")
+ .go();
+ }
+
+ @Test
+ public void testSelectColumns() throws Exception {
+ testBuilder()
+ .sqlQuery("SELECT ua, reqtime FROM cp.`simple.ltsv`")
+ .unOrdered()
+ .baselineColumns("ua", "reqtime")
+ .baselineValues("Java/1.8.0_131", "2.532")
+ .baselineValues("Java/1.8.0_201", "3.580")
+ .go();
+ }
+
+ @Test
+ public void testQueryWithConditions() throws Exception {
+ testBuilder()
+ .sqlQuery("SELECT * FROM cp.`simple.ltsv` WHERE reqtime > 3.0")
+ .unOrdered()
+ .baselineColumns("host", "forwardedfor", "req", "status", "size", "referer", "ua", "reqtime", "apptime", "vhost")
+ .baselineValues("xxx.xxx.xxx.xxx", "-", "GET /v1/yyy HTTP/1.1", "200", "412", "-", "Java/1.8.0_201", "3.580", "3.580", "api.example.com")
+ .go();
+ }
+
+ @Test
+ public void testSkipEmptyLines() throws Exception {
+ assertEquals(2, queryBuilder().sql("SELECT * FROM cp.`emptylines.ltsv`").run().recordCount());
+ }
+
+ @Test
+ public void testReadException() throws Exception {
+ try {
+ run("SELECT * FROM cp.`invalid.ltsv`");
+ fail();
+ } catch (UserException e) {
+ assertEquals(UserBitShared.DrillPBError.ErrorType.DATA_READ, e.getErrorType());
+ assertTrue(e.getMessage().contains("Failure while reading messages from /invalid.ltsv. Record reader was at record: 1"));
+ }
+ }
+
+}
diff --git a/contrib/format-ltsv/src/test/resources/emptylines.ltsv b/contrib/format-ltsv/src/test/resources/emptylines.ltsv
new file mode 100644
index 000000000..2cdfe4efd
--- /dev/null
+++ b/contrib/format-ltsv/src/test/resources/emptylines.ltsv
@@ -0,0 +1,4 @@
+time:30/Nov/2016:00:55:08 +0900 host:xxx.xxx.xxx.xxx
+
+
+time:30/Nov/2016:00:56:37 +0900 host:xxx.xxx.xxx.xxx
diff --git a/contrib/format-ltsv/src/test/resources/invalid.ltsv b/contrib/format-ltsv/src/test/resources/invalid.ltsv
new file mode 100644
index 000000000..2395e0af5
--- /dev/null
+++ b/contrib/format-ltsv/src/test/resources/invalid.ltsv
@@ -0,0 +1 @@
+time:30/Nov/2016:00:55:08 +0900 :xxx.xxx.xxx.xxx
diff --git a/contrib/format-ltsv/src/test/resources/simple.ltsv b/contrib/format-ltsv/src/test/resources/simple.ltsv
new file mode 100644
index 000000000..d09ce0c97
--- /dev/null
+++ b/contrib/format-ltsv/src/test/resources/simple.ltsv
@@ -0,0 +1,2 @@
+host:xxx.xxx.xxx.xxx forwardedfor:- req:GET /v1/xxx HTTP/1.1 status:200 size:4968 referer:- ua:Java/1.8.0_131 reqtime:2.532 apptime:2.532 vhost:api.example.com
+host:xxx.xxx.xxx.xxx forwardedfor:- req:GET /v1/yyy HTTP/1.1 status:200 size:412 referer:- ua:Java/1.8.0_201 reqtime:3.580 apptime:3.580 vhost:api.example.com
diff --git a/contrib/native/client/src/protobuf/UserBitShared.pb.cc b/contrib/native/client/src/protobuf/UserBitShared.pb.cc
index 2e3f1e5af..6703b5b64 100644
--- a/contrib/native/client/src/protobuf/UserBitShared.pb.cc
+++ b/contrib/native/client/src/protobuf/UserBitShared.pb.cc
@@ -1032,7 +1032,7 @@ void AddDescriptorsImpl() {
"entState\022\013\n\007SENDING\020\000\022\027\n\023AWAITING_ALLOCA"
"TION\020\001\022\013\n\007RUNNING\020\002\022\014\n\010FINISHED\020\003\022\r\n\tCAN"
"CELLED\020\004\022\n\n\006FAILED\020\005\022\032\n\026CANCELLATION_REQ"
- "UESTED\020\006*\351\t\n\020CoreOperatorType\022\021\n\rSINGLE_"
+ "UESTED\020\006*\374\t\n\020CoreOperatorType\022\021\n\rSINGLE_"
"SENDER\020\000\022\024\n\020BROADCAST_SENDER\020\001\022\n\n\006FILTER"
"\020\002\022\022\n\016HASH_AGGREGATE\020\003\022\r\n\tHASH_JOIN\020\004\022\016\n"
"\nMERGE_JOIN\020\005\022\031\n\025HASH_PARTITION_SENDER\020\006"
@@ -1063,14 +1063,14 @@ void AddDescriptorsImpl() {
"T\0206\022\023\n\017PCAPNG_SUB_SCAN\0207\022\022\n\016RUNTIME_FILT"
"ER\0208\022\017\n\013ROWKEY_JOIN\0209\022\023\n\017SYSLOG_SUB_SCAN"
"\020:\022\030\n\024STATISTICS_AGGREGATE\020;\022\020\n\014UNPIVOT_"
- "MAPS\020<\022\024\n\020STATISTICS_MERGE\020=*g\n\nSaslStat"
- "us\022\020\n\014SASL_UNKNOWN\020\000\022\016\n\nSASL_START\020\001\022\024\n\020"
- "SASL_IN_PROGRESS\020\002\022\020\n\014SASL_SUCCESS\020\003\022\017\n\013"
- "SASL_FAILED\020\004B.\n\033org.apache.drill.exec.p"
- "rotoB\rUserBitSharedH\001"
+ "MAPS\020<\022\024\n\020STATISTICS_MERGE\020=\022\021\n\rLTSV_SUB"
+ "_SCAN\020>*g\n\nSaslStatus\022\020\n\014SASL_UNKNOWN\020\000\022"
+ "\016\n\nSASL_START\020\001\022\024\n\020SASL_IN_PROGRESS\020\002\022\020\n"
+ "\014SASL_SUCCESS\020\003\022\017\n\013SASL_FAILED\020\004B.\n\033org."
+ "apache.drill.exec.protoB\rUserBitSharedH\001"
};
::google::protobuf::DescriptorPool::InternalAddGeneratedFile(
- descriptor, 5621);
+ descriptor, 5640);
::google::protobuf::MessageFactory::InternalRegisterGeneratedFile(
"UserBitShared.proto", &protobuf_RegisterTypes);
::protobuf_Types_2eproto::AddDescriptors();
@@ -1311,6 +1311,7 @@ bool CoreOperatorType_IsValid(int value) {
case 59:
case 60:
case 61:
+ case 62:
return true;
default:
return false;
diff --git a/contrib/native/client/src/protobuf/UserBitShared.pb.h b/contrib/native/client/src/protobuf/UserBitShared.pb.h
index 15926cd05..7dd916d10 100644
--- a/contrib/native/client/src/protobuf/UserBitShared.pb.h
+++ b/contrib/native/client/src/protobuf/UserBitShared.pb.h
@@ -352,11 +352,12 @@ enum CoreOperatorType {
SYSLOG_SUB_SCAN = 58,
STATISTICS_AGGREGATE = 59,
UNPIVOT_MAPS = 60,
- STATISTICS_MERGE = 61
+ STATISTICS_MERGE = 61,
+ LTSV_SUB_SCAN = 62
};
bool CoreOperatorType_IsValid(int value);
const CoreOperatorType CoreOperatorType_MIN = SINGLE_SENDER;
-const CoreOperatorType CoreOperatorType_MAX = STATISTICS_MERGE;
+const CoreOperatorType CoreOperatorType_MAX = LTSV_SUB_SCAN;
const int CoreOperatorType_ARRAYSIZE = CoreOperatorType_MAX + 1;
const ::google::protobuf::EnumDescriptor* CoreOperatorType_descriptor();
diff --git a/contrib/pom.xml b/contrib/pom.xml
index 0341edb6b..17ac734b0 100644
--- a/contrib/pom.xml
+++ b/contrib/pom.xml
@@ -42,6 +42,7 @@
<module>storage-hbase</module>
<module>format-maprdb</module>
<module>format-syslog</module>
+ <module>format-ltsv</module>
<module>storage-hive</module>
<module>storage-mongo</module>
<module>storage-jdbc</module>
diff --git a/pom.xml b/pom.xml
index c7b9272e7..f9796906b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -343,6 +343,7 @@
<exclude>**/*.sqllog2</exclude>
<exclude>**/*.syslog</exclude>
<exclude>**/*.syslog1</exclude>
+ <exclude>**/*.ltsv</exclude>
<exclude>**/*.log</exclude>
<exclude>**/*.css</exclude>
<exclude>**/*.js</exclude>
@@ -671,6 +672,7 @@
<exclude>**/*.log2</exclude>
<exclude>**/*.sqllog</exclude>
<exclude>**/*.sqllog2</exclude>
+ <exclude>**/*.ltsv</exclude>
<exclude>**/*.wav</exclude>
<exclude>**/control</exclude>
<!-- TODO DRILL-4336: try to avoid the need to add this -->
diff --git a/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java b/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java
index b1ba64373..1e63aca6e 100644
--- a/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java
+++ b/protocol/src/main/java/org/apache/drill/exec/proto/UserBitShared.java
@@ -643,6 +643,10 @@ public final class UserBitShared {
* <code>STATISTICS_MERGE = 61;</code>
*/
STATISTICS_MERGE(61),
+ /**
+ * <code>LTSV_SUB_SCAN = 62;</code>
+ */
+ LTSV_SUB_SCAN(62),
;
/**
@@ -893,6 +897,10 @@ public final class UserBitShared {
* <code>STATISTICS_MERGE = 61;</code>
*/
public static final int STATISTICS_MERGE_VALUE = 61;
+ /**
+ * <code>LTSV_SUB_SCAN = 62;</code>
+ */
+ public static final int LTSV_SUB_SCAN_VALUE = 62;
public final int getNumber() {
@@ -971,6 +979,7 @@ public final class UserBitShared {
case 59: return STATISTICS_AGGREGATE;
case 60: return UNPIVOT_MAPS;
case 61: return STATISTICS_MERGE;
+ case 62: return LTSV_SUB_SCAN;
default: return null;
}
}
@@ -27783,7 +27792,7 @@ public final class UserBitShared {
"entState\022\013\n\007SENDING\020\000\022\027\n\023AWAITING_ALLOCA" +
"TION\020\001\022\013\n\007RUNNING\020\002\022\014\n\010FINISHED\020\003\022\r\n\tCAN" +
"CELLED\020\004\022\n\n\006FAILED\020\005\022\032\n\026CANCELLATION_REQ" +
- "UESTED\020\006*\351\t\n\020CoreOperatorType\022\021\n\rSINGLE_" +
+ "UESTED\020\006*\374\t\n\020CoreOperatorType\022\021\n\rSINGLE_" +
"SENDER\020\000\022\024\n\020BROADCAST_SENDER\020\001\022\n\n\006FILTER" +
"\020\002\022\022\n\016HASH_AGGREGATE\020\003\022\r\n\tHASH_JOIN\020\004\022\016\n" +
"\nMERGE_JOIN\020\005\022\031\n\025HASH_PARTITION_SENDER\020\006" +
@@ -27814,11 +27823,11 @@ public final class UserBitShared {
"T\0206\022\023\n\017PCAPNG_SUB_SCAN\0207\022\022\n\016RUNTIME_FILT" +
"ER\0208\022\017\n\013ROWKEY_JOIN\0209\022\023\n\017SYSLOG_SUB_SCAN" +
"\020:\022\030\n\024STATISTICS_AGGREGATE\020;\022\020\n\014UNPIVOT_" +
- "MAPS\020<\022\024\n\020STATISTICS_MERGE\020=*g\n\nSaslStat" +
- "us\022\020\n\014SASL_UNKNOWN\020\000\022\016\n\nSASL_START\020\001\022\024\n\020" +
- "SASL_IN_PROGRESS\020\002\022\020\n\014SASL_SUCCESS\020\003\022\017\n\013" +
- "SASL_FAILED\020\004B.\n\033org.apache.drill.exec.p" +
- "rotoB\rUserBitSharedH\001"
+ "MAPS\020<\022\024\n\020STATISTICS_MERGE\020=\022\021\n\rLTSV_SUB" +
+ "_SCAN\020>*g\n\nSaslStatus\022\020\n\014SASL_UNKNOWN\020\000\022" +
+ "\016\n\nSASL_START\020\001\022\024\n\020SASL_IN_PROGRESS\020\002\022\020\n" +
+ "\014SASL_SUCCESS\020\003\022\017\n\013SASL_FAILED\020\004B.\n\033org." +
+ "apache.drill.exec.protoB\rUserBitSharedH\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor. InternalDescriptorAssigner() {
diff --git a/protocol/src/main/java/org/apache/drill/exec/proto/beans/CoreOperatorType.java b/protocol/src/main/java/org/apache/drill/exec/proto/beans/CoreOperatorType.java
index 6b277b4ac..eea96ca39 100644
--- a/protocol/src/main/java/org/apache/drill/exec/proto/beans/CoreOperatorType.java
+++ b/protocol/src/main/java/org/apache/drill/exec/proto/beans/CoreOperatorType.java
@@ -83,7 +83,8 @@ public enum CoreOperatorType implements com.dyuproject.protostuff.EnumLite<CoreO
SYSLOG_SUB_SCAN(58),
STATISTICS_AGGREGATE(59),
UNPIVOT_MAPS(60),
- STATISTICS_MERGE(61);
+ STATISTICS_MERGE(61),
+ LTSV_SUB_SCAN(62);
public final int number;
@@ -163,6 +164,7 @@ public enum CoreOperatorType implements com.dyuproject.protostuff.EnumLite<CoreO
case 59: return STATISTICS_AGGREGATE;
case 60: return UNPIVOT_MAPS;
case 61: return STATISTICS_MERGE;
+ case 62: return LTSV_SUB_SCAN;
default: return null;
}
}
diff --git a/protocol/src/main/protobuf/UserBitShared.proto b/protocol/src/main/protobuf/UserBitShared.proto
index 30c612ca3..37453feb8 100644
--- a/protocol/src/main/protobuf/UserBitShared.proto
+++ b/protocol/src/main/protobuf/UserBitShared.proto
@@ -354,6 +354,7 @@ enum CoreOperatorType {
STATISTICS_AGGREGATE = 59;
UNPIVOT_MAPS = 60;
STATISTICS_MERGE = 61;
+ LTSV_SUB_SCAN = 62;
}
/* Registry that contains list of jars, each jar contains its name and list of function signatures.