aboutsummaryrefslogtreecommitdiff
path: root/contrib/format-ltsv/src
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/format-ltsv/src')
-rw-r--r--contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java97
-rw-r--r--contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java60
-rw-r--r--contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java165
-rw-r--r--contrib/format-ltsv/src/main/resources/drill-module.conf20
-rw-r--r--contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java100
-rw-r--r--contrib/format-ltsv/src/test/resources/emptylines.ltsv4
-rw-r--r--contrib/format-ltsv/src/test/resources/invalid.ltsv1
-rw-r--r--contrib/format-ltsv/src/test/resources/simple.ltsv2
8 files changed, 449 insertions, 0 deletions
diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java
new file mode 100644
index 000000000..8ff62ed56
--- /dev/null
+++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPlugin.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.logical.StoragePluginConfig;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.planner.common.DrillStatsTable.TableStatistics;
+import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.RecordReader;
+import org.apache.drill.exec.store.RecordWriter;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
+import org.apache.drill.exec.store.dfs.easy.EasyWriter;
+import org.apache.drill.exec.store.dfs.easy.FileWork;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.List;
+
+public class LTSVFormatPlugin extends EasyFormatPlugin<LTSVFormatPluginConfig> {
+
+ private static final boolean IS_COMPRESSIBLE = false;
+
+ private static final String DEFAULT_NAME = "ltsv";
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(LTSVFormatPlugin.class);
+
+ public LTSVFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig storageConfig) {
+ this(name, context, fsConf, storageConfig, new LTSVFormatPluginConfig());
+ }
+
+ public LTSVFormatPlugin(String name, DrillbitContext context, Configuration fsConf, StoragePluginConfig config, LTSVFormatPluginConfig formatPluginConfig) {
+ super(name, context, fsConf, config, formatPluginConfig, true, false, false, IS_COMPRESSIBLE, formatPluginConfig.getExtensions(), DEFAULT_NAME);
+ }
+
+ @Override
+ public RecordReader getRecordReader(FragmentContext context, DrillFileSystem dfs, FileWork fileWork, List<SchemaPath> columns, String userName) throws ExecutionSetupException {
+ return new LTSVRecordReader(context, fileWork.getPath(), dfs, columns);
+ }
+
+
+ @Override
+ public int getReaderOperatorType() {
+ return UserBitShared.CoreOperatorType.LTSV_SUB_SCAN_VALUE;
+ }
+
+ @Override
+ public int getWriterOperatorType() {
+ throw new UnsupportedOperationException("Drill doesn't currently support writing to LTSV files.");
+ }
+
+ @Override
+ public boolean supportsPushDown() {
+ return true;
+ }
+
+ @Override
+ public RecordWriter getRecordWriter(FragmentContext context, EasyWriter writer) throws IOException {
+ throw new UnsupportedOperationException("Drill doesn't currently support writing to LTSV files.");
+ }
+
+ @Override
+ public boolean supportsStatistics() {
+ return false;
+ }
+
+ @Override
+ public TableStatistics readStatistics(FileSystem fs, Path statsTablePath) throws IOException {
+ throw new UnsupportedOperationException("unimplemented");
+ }
+
+ @Override
+ public void writeStatistics(TableStatistics statistics, FileSystem fs, Path statsTablePath) throws IOException {
+ throw new UnsupportedOperationException("unimplemented");
+ }
+
+}
diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java
new file mode 100644
index 000000000..1e96b74e9
--- /dev/null
+++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVFormatPluginConfig.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+@JsonTypeName("ltsv")
+public class LTSVFormatPluginConfig implements FormatPluginConfig {
+ private static final List<String> DEFAULT_EXTS = ImmutableList.of("ltsv");
+
+ public List<String> extensions;
+
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+ public List<String> getExtensions() {
+ if (extensions == null) {
+ // when loading an old JSONFormatConfig that doesn't contain an "extensions" attribute
+ return DEFAULT_EXTS;
+ }
+ return extensions;
+ }
+
+ @Override
+ public int hashCode() {
+ List<String> array = extensions != null ? extensions : DEFAULT_EXTS;
+ return Arrays.hashCode(array.toArray(new String[array.size()]));
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ } else if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ LTSVFormatPluginConfig that = (LTSVFormatPluginConfig) obj;
+ return Objects.equals(extensions, that.extensions);
+ }
+}
diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java
new file mode 100644
index 000000000..cb2385013
--- /dev/null
+++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import io.netty.buffer.DrillBuf;
+import org.apache.drill.common.AutoCloseables;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+public class LTSVRecordReader extends AbstractRecordReader {
+
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(LTSVRecordReader.class);
+
+ private static final int MAX_RECORDS_PER_BATCH = 8096;
+
+ private final String inputPath;
+
+ private final FSDataInputStream fsStream;
+
+ private final BufferedReader reader;
+
+ private DrillBuf buffer;
+
+ private VectorContainerWriter writer;
+
+ public LTSVRecordReader(FragmentContext fragmentContext, Path path, DrillFileSystem fileSystem,
+ List<SchemaPath> columns) throws OutOfMemoryException {
+ this.inputPath = path.toUri().getPath();
+ try {
+ this.fsStream = fileSystem.open(path);
+ this.reader = new BufferedReader(new InputStreamReader(fsStream.getWrappedStream(), StandardCharsets.UTF_8));
+ this.buffer = fragmentContext.getManagedBuffer();
+ setColumns(columns);
+
+ } catch (IOException e) {
+ String msg = String.format("Failed to open input file: %s", inputPath);
+ throw UserException.dataReadError(e).message(msg).build(logger);
+ }
+ }
+
+ @Override
+ protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> projected) {
+ Set<SchemaPath> transformed = new LinkedHashSet<>();
+ if (!isStarQuery()) {
+ for (SchemaPath column : projected) {
+ transformed.add(column);
+ }
+ } else {
+ transformed.add(SchemaPath.STAR_COLUMN);
+ }
+ return transformed;
+ }
+
+ public void setup(final OperatorContext context, final OutputMutator output) throws ExecutionSetupException {
+ this.writer = new VectorContainerWriter(output);
+ }
+
+ public int next() {
+ this.writer.allocate();
+ this.writer.reset();
+
+ int recordCount = 0;
+
+ try {
+ BaseWriter.MapWriter map = this.writer.rootAsMap();
+ String line = null;
+
+ while (recordCount < MAX_RECORDS_PER_BATCH && (line = this.reader.readLine()) != null) {
+ // Skip empty lines
+ if (line.trim().length() == 0) {
+ continue;
+ }
+
+ List<String[]> fields = new ArrayList<>();
+ for (String field : line.split("\t")) {
+ int index = field.indexOf(":");
+ if (index <= 0) {
+ throw new ParseException(String.format("Invalid LTSV format: %s\n%d:%s", inputPath, recordCount + 1, line), 0);
+ }
+
+ String fieldName = field.substring(0, index);
+ String fieldValue = field.substring(index + 1);
+ if (selectedColumn(fieldName)) {
+ fields.add(new String[]{fieldName, fieldValue});
+ }
+ }
+
+ if (fields.size() == 0) {
+ continue;
+ }
+
+ this.writer.setPosition(recordCount);
+ map.start();
+
+ for (String[] field : fields) {
+ byte[] bytes = field[1].getBytes(StandardCharsets.UTF_8);
+ this.buffer = this.buffer.reallocIfNeeded(bytes.length);
+ this.buffer.setBytes(0, bytes, 0, bytes.length);
+ map.varChar(field[0]).writeVarChar(0, bytes.length, buffer);
+ }
+
+ map.end();
+ recordCount++;
+ }
+
+ this.writer.setValueCount(recordCount);
+ return recordCount;
+
+ } catch (final Exception e) {
+ String msg = String.format("Failure while reading messages from %s. Record reader was at record: %d", inputPath, recordCount + 1);
+ throw UserException.dataReadError(e).message(msg).build(logger);
+ }
+ }
+
+ private boolean selectedColumn(String fieldName) {
+ for (SchemaPath col : getColumns()) {
+ if (col.equals(SchemaPath.STAR_COLUMN) || col.getRootSegment().getPath().equals(fieldName)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public void close() throws Exception {
+ AutoCloseables.close(reader, fsStream);
+ }
+
+}
diff --git a/contrib/format-ltsv/src/main/resources/drill-module.conf b/contrib/format-ltsv/src/main/resources/drill-module.conf
new file mode 100644
index 000000000..faf163261
--- /dev/null
+++ b/contrib/format-ltsv/src/main/resources/drill-module.conf
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file tells Drill to consider this module when class path scanning.
+// This file can also include any supplementary configuration information.
+// This file is in HOCON format, see https://github.com/typesafehub/config/blob/master/HOCON.md for more information.
+
+drill.classpath.scanning.packages += "org.apache.drill.exec.store.ltsv"
diff --git a/contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java b/contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java
new file mode 100644
index 000000000..61f65f473
--- /dev/null
+++ b/contrib/format-ltsv/src/test/java/org/apache/drill/exec/store/ltsv/TestLTSVRecordReader.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.logical.FormatPluginConfig;
+import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.store.dfs.FileSystemConfig;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class TestLTSVRecordReader extends ClusterTest {
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ startCluster(ClusterFixture.builder(dirTestWatcher));
+
+ DrillbitContext context = cluster.drillbit().getContext();
+ FileSystemConfig original = (FileSystemConfig) context.getStorage().getPlugin("cp").getConfig();
+ Map<String, FormatPluginConfig> newFormats = new HashMap<>(original.getFormats());
+ newFormats.put("ltsv", new LTSVFormatPluginConfig());
+ FileSystemConfig pluginConfig = new FileSystemConfig(original.getConnection(), original.getConfig(), original.getWorkspaces(), newFormats);
+ pluginConfig.setEnabled(true);
+ context.getStorage().createOrUpdate("cp", pluginConfig, true);
+ }
+
+ @Test
+ public void testWildcard() throws Exception {
+ testBuilder()
+ .sqlQuery("SELECT * FROM cp.`simple.ltsv`")
+ .unOrdered()
+ .baselineColumns("host", "forwardedfor", "req", "status", "size", "referer", "ua", "reqtime", "apptime", "vhost")
+ .baselineValues("xxx.xxx.xxx.xxx", "-", "GET /v1/xxx HTTP/1.1", "200", "4968", "-", "Java/1.8.0_131", "2.532", "2.532", "api.example.com")
+ .baselineValues("xxx.xxx.xxx.xxx", "-", "GET /v1/yyy HTTP/1.1", "200", "412", "-", "Java/1.8.0_201", "3.580", "3.580", "api.example.com")
+ .go();
+ }
+
+ @Test
+ public void testSelectColumns() throws Exception {
+ testBuilder()
+ .sqlQuery("SELECT ua, reqtime FROM cp.`simple.ltsv`")
+ .unOrdered()
+ .baselineColumns("ua", "reqtime")
+ .baselineValues("Java/1.8.0_131", "2.532")
+ .baselineValues("Java/1.8.0_201", "3.580")
+ .go();
+ }
+
+ @Test
+ public void testQueryWithConditions() throws Exception {
+ testBuilder()
+ .sqlQuery("SELECT * FROM cp.`simple.ltsv` WHERE reqtime > 3.0")
+ .unOrdered()
+ .baselineColumns("host", "forwardedfor", "req", "status", "size", "referer", "ua", "reqtime", "apptime", "vhost")
+ .baselineValues("xxx.xxx.xxx.xxx", "-", "GET /v1/yyy HTTP/1.1", "200", "412", "-", "Java/1.8.0_201", "3.580", "3.580", "api.example.com")
+ .go();
+ }
+
+ @Test
+ public void testSkipEmptyLines() throws Exception {
+ assertEquals(2, queryBuilder().sql("SELECT * FROM cp.`emptylines.ltsv`").run().recordCount());
+ }
+
+ @Test
+ public void testReadException() throws Exception {
+ try {
+ run("SELECT * FROM cp.`invalid.ltsv`");
+ fail();
+ } catch (UserException e) {
+ assertEquals(UserBitShared.DrillPBError.ErrorType.DATA_READ, e.getErrorType());
+ assertTrue(e.getMessage().contains("Failure while reading messages from /invalid.ltsv. Record reader was at record: 1"));
+ }
+ }
+
+}
diff --git a/contrib/format-ltsv/src/test/resources/emptylines.ltsv b/contrib/format-ltsv/src/test/resources/emptylines.ltsv
new file mode 100644
index 000000000..2cdfe4efd
--- /dev/null
+++ b/contrib/format-ltsv/src/test/resources/emptylines.ltsv
@@ -0,0 +1,4 @@
+time:30/Nov/2016:00:55:08 +0900 host:xxx.xxx.xxx.xxx
+
+
+time:30/Nov/2016:00:56:37 +0900 host:xxx.xxx.xxx.xxx
diff --git a/contrib/format-ltsv/src/test/resources/invalid.ltsv b/contrib/format-ltsv/src/test/resources/invalid.ltsv
new file mode 100644
index 000000000..2395e0af5
--- /dev/null
+++ b/contrib/format-ltsv/src/test/resources/invalid.ltsv
@@ -0,0 +1 @@
+time:30/Nov/2016:00:55:08 +0900 :xxx.xxx.xxx.xxx
diff --git a/contrib/format-ltsv/src/test/resources/simple.ltsv b/contrib/format-ltsv/src/test/resources/simple.ltsv
new file mode 100644
index 000000000..d09ce0c97
--- /dev/null
+++ b/contrib/format-ltsv/src/test/resources/simple.ltsv
@@ -0,0 +1,2 @@
+host:xxx.xxx.xxx.xxx forwardedfor:- req:GET /v1/xxx HTTP/1.1 status:200 size:4968 referer:- ua:Java/1.8.0_131 reqtime:2.532 apptime:2.532 vhost:api.example.com
+host:xxx.xxx.xxx.xxx forwardedfor:- req:GET /v1/yyy HTTP/1.1 status:200 size:412 referer:- ua:Java/1.8.0_201 reqtime:3.580 apptime:3.580 vhost:api.example.com