1 files changed, 165 insertions, 0 deletions
diff --git a/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java
new file mode 100644
index 000000000..cb2385013
--- /dev/null
+++ b/contrib/format-ltsv/src/main/java/org/apache/drill/exec/store/ltsv/LTSVRecordReader.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.ltsv;
+
+import io.netty.buffer.DrillBuf;
+import org.apache.drill.common.AutoCloseables;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.exception.OutOfMemoryException;
+import org.apache.drill.exec.ops.FragmentContext;
+import org.apache.drill.exec.ops.OperatorContext;
+import org.apache.drill.exec.physical.impl.OutputMutator;
+import org.apache.drill.exec.store.AbstractRecordReader;
+import org.apache.drill.exec.store.dfs.DrillFileSystem;
+import org.apache.drill.exec.vector.complex.impl.VectorContainerWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+
+public class LTSVRecordReader extends AbstractRecordReader {
+
+  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(LTSVRecordReader.class);
+
+  private static final int MAX_RECORDS_PER_BATCH = 8096;
+
+  private final String inputPath;
+
+  private final FSDataInputStream fsStream;
+
+  private final BufferedReader reader;
+
+  private DrillBuf buffer;
+
+  private VectorContainerWriter writer;
+
+  public LTSVRecordReader(FragmentContext fragmentContext, Path path, DrillFileSystem fileSystem,
+                          List<SchemaPath> columns) throws OutOfMemoryException {
+    this.inputPath = path.toUri().getPath();
+    try {
+      this.fsStream = fileSystem.open(path);
+      this.reader = new BufferedReader(new InputStreamReader(fsStream.getWrappedStream(), StandardCharsets.UTF_8));
+      this.buffer = fragmentContext.getManagedBuffer();
+      setColumns(columns);
+
+    } catch (IOException e) {
+      String msg = String.format("Failed to open input file: %s", inputPath);
+      throw UserException.dataReadError(e).message(msg).build(logger);
+    }
+  }
+
+  @Override
+  protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> projected) {
+    Set<SchemaPath> transformed = new LinkedHashSet<>();
+    if (!isStarQuery()) {
+      for (SchemaPath column : projected) {
+        transformed.add(column);
+      }
+    } else {
+      transformed.add(SchemaPath.STAR_COLUMN);
+    }
+    return transformed;
+  }
+
+  public void setup(final OperatorContext context, final OutputMutator output) throws ExecutionSetupException {
+    this.writer = new VectorContainerWriter(output);
+  }
+
+  public int next() {
+    this.writer.allocate();
+    this.writer.reset();
+
+    int recordCount = 0;
+
+    try {
+      BaseWriter.MapWriter map = this.writer.rootAsMap();
+      String line = null;
+
+      while (recordCount < MAX_RECORDS_PER_BATCH && (line = this.reader.readLine()) != null) {
+        // Skip empty lines
+        if (line.trim().length() == 0) {
+          continue;
+        }
+
+        List<String[]> fields = new ArrayList<>();
+        for (String field : line.split("\t")) {
+          int index = field.indexOf(":");
+          if (index <= 0) {
+            throw new ParseException(String.format("Invalid LTSV format: %s\n%d:%s", inputPath, recordCount + 1, line), 0);
+          }
+
+          String fieldName = field.substring(0, index);
+          String fieldValue = field.substring(index + 1);
+          if (selectedColumn(fieldName)) {
+            fields.add(new String[]{fieldName, fieldValue});
+          }
+        }
+
+        if (fields.size() == 0) {
+          continue;
+        }
+
+        this.writer.setPosition(recordCount);
+        map.start();
+
+        for (String[] field : fields) {
+          byte[] bytes = field[1].getBytes(StandardCharsets.UTF_8);
+          this.buffer = this.buffer.reallocIfNeeded(bytes.length);
+          this.buffer.setBytes(0, bytes, 0, bytes.length);
+          map.varChar(field[0]).writeVarChar(0, bytes.length, buffer);
+        }
+
+        map.end();
+        recordCount++;
+      }
+
+      this.writer.setValueCount(recordCount);
+      return recordCount;
+
+    } catch (final Exception e) {
+      String msg = String.format("Failure while reading messages from %s. Record reader was at record: %d", inputPath, recordCount + 1);
+      throw UserException.dataReadError(e).message(msg).build(logger);
+    }
+  }
+
+  private boolean selectedColumn(String fieldName) {
+    for (SchemaPath col : getColumns()) {
+      if (col.equals(SchemaPath.STAR_COLUMN) || col.getRootSegment().getPath().equals(fieldName)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  public void close() throws Exception {
+    AutoCloseables.close(reader, fsStream);
+  }
+
+}