aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java
diff options
context:
space:
mode:
Diffstat (limited to 'exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java')
-rw-r--r--exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java237
1 files changed, 237 insertions, 0 deletions
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java
new file mode 100644
index 000000000..ac85a920d
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/httpd/TestHTTPDLogReader.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.httpd;
+
+import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.exec.record.BatchSchema;
+import org.apache.drill.exec.record.metadata.SchemaBuilder;
+import org.apache.drill.exec.rpc.RpcException;
+import org.apache.drill.exec.server.Drillbit;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.dfs.FileSystemConfig;
+import org.apache.drill.exec.store.dfs.FileSystemPlugin;
+import org.apache.drill.test.BaseDirTestWatcher;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.apache.drill.test.rowSet.RowSet;
+import org.apache.drill.test.rowSet.RowSetUtilities;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+
+import java.time.LocalDateTime;
+import java.util.HashMap;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestHTTPDLogReader extends ClusterTest {
+
+ @ClassRule
+ public static final BaseDirTestWatcher dirTestWatcher = new BaseDirTestWatcher();
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ ClusterTest.startCluster(ClusterFixture.builder(dirTestWatcher));
+ defineHTTPDPlugin();
+ }
+
+ private static void defineHTTPDPlugin() throws ExecutionSetupException {
+
+ // Create an instance of the regex config.
+ // Note: we can"t use the ".log" extension; the Drill .gitignore
+ // file ignores such files, so they"ll never get committed. Instead,
+ // make up a fake suffix.
+ HttpdLogFormatConfig sampleConfig = new HttpdLogFormatConfig();
+ sampleConfig.setLogFormat("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"");
+
+ // Define a temporary format plugin for the "cp" storage plugin.
+ Drillbit drillbit = cluster.drillbit();
+ final StoragePluginRegistry pluginRegistry = drillbit.getContext().getStorage();
+ final FileSystemPlugin plugin = (FileSystemPlugin) pluginRegistry.getPlugin("cp");
+ final FileSystemConfig pluginConfig = (FileSystemConfig) plugin.getConfig();
+ pluginConfig.getFormats().put("sample", sampleConfig);
+ pluginRegistry.createOrUpdate("cp", pluginConfig, false);
+ }
+
+ @Test
+ public void testDateField() throws RpcException {
+ String sql = "SELECT `request_receive_time` FROM cp.`httpd/hackers-access-small.httpd` LIMIT 5";
+ RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+ BatchSchema expectedSchema = new SchemaBuilder()
+ .addNullable("request_receive_time", MinorType.TIMESTAMP)
+ .build();
+ RowSet expected = client.rowSetBuilder(expectedSchema)
+ .addRow(1445742685000L)
+ .addRow(1445742686000L)
+ .addRow(1445742687000L)
+ .addRow(1445743471000L)
+ .addRow(1445743472000L)
+ .build();
+
+ RowSetUtilities.verify(expected, results);
+ }
+
+ @Test
+ public void testSelectColumns() throws Exception {
+ String sql = "SELECT request_referer_ref,\n" +
+ "request_receive_time_last_time,\n" +
+ "request_firstline_uri_protocol,\n" +
+ "request_receive_time_microsecond,\n" +
+ "request_receive_time_last_microsecond__utc,\n" +
+ "request_firstline_original_protocol,\n" +
+ "request_firstline_original_uri_host,\n" +
+ "request_referer_host,\n" +
+ "request_receive_time_month__utc,\n" +
+ "request_receive_time_last_minute,\n" +
+ "request_firstline_protocol_version,\n" +
+ "request_receive_time_time__utc,\n" +
+ "request_referer_last_ref,\n" +
+ "request_receive_time_last_timezone,\n" +
+ "request_receive_time_last_weekofweekyear,\n" +
+ "request_referer_last,\n" +
+ "request_receive_time_minute,\n" +
+ "connection_client_host_last,\n" +
+ "request_receive_time_last_millisecond__utc,\n" +
+ "request_firstline_original_uri,\n" +
+ "request_firstline,\n" +
+ "request_receive_time_nanosecond,\n" +
+ "request_receive_time_last_millisecond,\n" +
+ "request_receive_time_day,\n" +
+ "request_referer_port,\n" +
+ "request_firstline_original_uri_port,\n" +
+ "request_receive_time_year,\n" +
+ "request_receive_time_last_date,\n" +
+ "request_receive_time_last_time__utc,\n" +
+ "request_receive_time_last_hour__utc,\n" +
+ "request_firstline_original_protocol_version,\n" +
+ "request_firstline_original_method,\n" +
+ "request_receive_time_last_year__utc,\n" +
+ "request_firstline_uri,\n" +
+ "request_referer_last_host,\n" +
+ "request_receive_time_last_minute__utc,\n" +
+ "request_receive_time_weekofweekyear,\n" +
+ "request_firstline_uri_userinfo,\n" +
+ "request_receive_time_epoch,\n" +
+ "connection_client_logname,\n" +
+ "response_body_bytes,\n" +
+ "request_receive_time_nanosecond__utc,\n" +
+ "request_firstline_protocol,\n" +
+ "request_receive_time_microsecond__utc,\n" +
+ "request_receive_time_hour,\n" +
+ "request_firstline_uri_host,\n" +
+ "request_referer_last_port,\n" +
+ "request_receive_time_last_epoch,\n" +
+ "request_receive_time_last_weekyear__utc,\n" +
+ "request_useragent,\n" +
+ "request_receive_time_weekyear,\n" +
+ "request_receive_time_timezone,\n" +
+ "response_body_bytesclf,\n" +
+ "request_receive_time_last_date__utc,\n" +
+ "request_receive_time_millisecond__utc,\n" +
+ "request_referer_last_protocol,\n" +
+ "request_status_last,\n" +
+ "request_firstline_uri_query,\n" +
+ "request_receive_time_minute__utc,\n" +
+ "request_firstline_original_uri_protocol,\n" +
+ "request_referer_query,\n" +
+ "request_receive_time_date,\n" +
+ "request_firstline_uri_port,\n" +
+ "request_receive_time_last_second__utc,\n" +
+ "request_referer_last_userinfo,\n" +
+ "request_receive_time_last_second,\n" +
+ "request_receive_time_last_monthname__utc,\n" +
+ "request_firstline_method,\n" +
+ "request_receive_time_last_month__utc,\n" +
+ "request_receive_time_millisecond,\n" +
+ "request_receive_time_day__utc,\n" +
+ "request_receive_time_year__utc,\n" +
+ "request_receive_time_weekofweekyear__utc,\n" +
+ "request_receive_time_second,\n" +
+ "request_firstline_original_uri_ref,\n" +
+ "connection_client_logname_last,\n" +
+ "request_receive_time_last_year,\n" +
+ "request_firstline_original_uri_path,\n" +
+ "connection_client_host,\n" +
+ "request_firstline_original_uri_query,\n" +
+ "request_referer_userinfo,\n" +
+ "request_receive_time_last_monthname,\n" +
+ "request_referer_path,\n" +
+ "request_receive_time_monthname,\n" +
+ "request_receive_time_last_month,\n" +
+ "request_referer_last_query,\n" +
+ "request_firstline_uri_ref,\n" +
+ "request_receive_time_last_day,\n" +
+ "request_receive_time_time,\n" +
+ "request_receive_time_last_weekofweekyear__utc,\n" +
+ "request_useragent_last,\n" +
+ "request_receive_time_last_weekyear,\n" +
+ "request_receive_time_last_microsecond,\n" +
+ "request_firstline_original,\n" +
+ "request_referer_last_path,\n" +
+ "request_receive_time_month,\n" +
+ "request_receive_time_last_day__utc,\n" +
+ "request_referer,\n" +
+ "request_referer_protocol,\n" +
+ "request_receive_time_monthname__utc,\n" +
+ "response_body_bytes_last,\n" +
+ "request_receive_time,\n" +
+ "request_receive_time_last_nanosecond,\n" +
+ "request_firstline_uri_path,\n" +
+ "request_firstline_original_uri_userinfo,\n" +
+ "request_receive_time_date__utc,\n" +
+ "request_receive_time_last,\n" +
+ "request_receive_time_last_nanosecond__utc,\n" +
+ "request_receive_time_last_hour,\n" +
+ "request_receive_time_hour__utc,\n" +
+ "request_receive_time_second__utc,\n" +
+ "connection_client_user_last,\n" +
+ "request_receive_time_weekyear__utc,\n" +
+ "connection_client_user\n" +
+ "FROM cp.`httpd/hackers-access-small.httpd`\n" +
+ "LIMIT 1";
+
+ testBuilder()
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("request_referer_ref", "request_receive_time_last_time", "request_firstline_uri_protocol", "request_receive_time_microsecond", "request_receive_time_last_microsecond__utc", "request_firstline_original_protocol", "request_firstline_original_uri_host", "request_referer_host", "request_receive_time_month__utc", "request_receive_time_last_minute", "request_firstline_protocol_version", "request_receive_time_time__utc", "request_referer_last_ref", "request_receive_time_last_timezone", "request_receive_time_last_weekofweekyear", "request_referer_last", "request_receive_time_minute", "connection_client_host_last", "request_receive_time_last_millisecond__utc", "request_firstline_original_uri", "request_firstline", "request_receive_time_nanosecond", "request_receive_time_last_millisecond", "request_receive_time_day", "request_referer_port", "request_firstline_original_uri_port", "request_receive_time_year", "request_receive_time_last_date", "request_receive_time_last_time__utc", "request_receive_time_last_hour__utc", "request_firstline_original_protocol_version", "request_firstline_original_method", "request_receive_time_last_year__utc", "request_firstline_uri", "request_referer_last_host", "request_receive_time_last_minute__utc", "request_receive_time_weekofweekyear", "request_firstline_uri_userinfo", "request_receive_time_epoch", "connection_client_logname", "response_body_bytes", "request_receive_time_nanosecond__utc", "request_firstline_protocol", "request_receive_time_microsecond__utc", "request_receive_time_hour", "request_firstline_uri_host", "request_referer_last_port", "request_receive_time_last_epoch", "request_receive_time_last_weekyear__utc", "request_useragent", "request_receive_time_weekyear", "request_receive_time_timezone", "response_body_bytesclf", "request_receive_time_last_date__utc", "request_receive_time_millisecond__utc", "request_referer_last_protocol", "request_status_last", "request_firstline_uri_query", "request_receive_time_minute__utc", "request_firstline_original_uri_protocol", "request_referer_query", "request_receive_time_date", "request_firstline_uri_port", "request_receive_time_last_second__utc", "request_referer_last_userinfo", "request_receive_time_last_second", "request_receive_time_last_monthname__utc", "request_firstline_method", "request_receive_time_last_month__utc", "request_receive_time_millisecond", "request_receive_time_day__utc", "request_receive_time_year__utc", "request_receive_time_weekofweekyear__utc", "request_receive_time_second", "request_firstline_original_uri_ref", "connection_client_logname_last", "request_receive_time_last_year", "request_firstline_original_uri_path", "connection_client_host", "request_firstline_original_uri_query", "request_referer_userinfo", "request_receive_time_last_monthname", "request_referer_path", "request_receive_time_monthname", "request_receive_time_last_month", "request_referer_last_query", "request_firstline_uri_ref", "request_receive_time_last_day", "request_receive_time_time", "request_receive_time_last_weekofweekyear__utc", "request_useragent_last", "request_receive_time_last_weekyear", "request_receive_time_last_microsecond", "request_firstline_original", "request_referer_last_path", "request_receive_time_month", "request_receive_time_last_day__utc", "request_referer", "request_referer_protocol", "request_receive_time_monthname__utc", "response_body_bytes_last", "request_receive_time", "request_receive_time_last_nanosecond", "request_firstline_uri_path", "request_firstline_original_uri_userinfo", "request_receive_time_date__utc", "request_receive_time_last", "request_receive_time_last_nanosecond__utc", "request_receive_time_last_hour", "request_receive_time_hour__utc", "request_receive_time_second__utc", "connection_client_user_last", "request_receive_time_weekyear__utc", "connection_client_user")
+ .baselineValues(null, "04:11:25", null, 0L, 0L, "HTTP", null, "howto.basjes.nl", 10L, 11L, "1.1", "03:11:25", null, null, 43L, "http://howto.basjes.nl/", 11L, "195.154.46.135", 0L, "/linux/doing-pxe-without-dhcp-control", "GET /linux/doing-pxe-without-dhcp-control HTTP/1.1", 0L, 0L, 25L, null, null, 2015L, "2015-10-25", "03:11:25", 3L, "1.1", "GET", 2015L, "/linux/doing-pxe-without-dhcp-control", "howto.basjes.nl", 11L, 43L, null, 1445742685000L, null, 24323L, 0L, "HTTP", 0L, 4L, null, null, 1445742685000L, 2015L, "Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 Firefox/35.0", 2015L, null, 24323L, "2015-10-25", 0L, "http", "200", "", 11L, null, "", "2015-10-25", null, 25L, null, 25L, "October", "GET", 10L, 0L, 25L, 2015L, 43L, 25L, null, null, 2015L, "/linux/doing-pxe-without-dhcp-control", "195.154.46.135", "", null, "October", "/", "October", 10L, "", null, 25L, "04:11:25", 43L, "Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 Firefox/35.0", 2015L, 0L, "GET /linux/doing-pxe-without-dhcp-control HTTP/1.1", "/", 10L, 25L, "http://howto.basjes.nl/", "http", "October", 24323L, LocalDateTime.parse("2015-10-25T03:11:25"), 0L, "/linux/doing-pxe-without-dhcp-control", null, "2015-10-25", LocalDateTime.parse("2015-10-25T03:11:25"), 0L, 4L, 3L, 25L, null, 2015L, null)
+ .go();
+ }
+
+
+ @Test
+ public void testCount() throws Exception {
+ String sql = "SELECT COUNT(*) FROM cp.`httpd/hackers-access-small.httpd`";
+ long result = client.queryBuilder().sql(sql).singletonLong();
+ assertEquals(10, result);
+ }
+
+ @Test
+ public void testStar() throws Exception {
+ String sql = "SELECT * FROM cp.`httpd/hackers-access-small.httpd` LIMIT 1";
+
+ testBuilder()
+ .sqlQuery(sql)
+ .unOrdered()
+ .baselineColumns("request_referer_ref","request_receive_time_last_time","request_firstline_uri_protocol","request_receive_time_microsecond","request_receive_time_last_microsecond__utc","request_firstline_original_uri_query_$","request_firstline_original_protocol","request_firstline_original_uri_host","request_referer_host","request_receive_time_month__utc","request_receive_time_last_minute","request_firstline_protocol_version","request_receive_time_time__utc","request_referer_last_ref","request_receive_time_last_timezone","request_receive_time_last_weekofweekyear","request_referer_last","request_receive_time_minute","connection_client_host_last","request_receive_time_last_millisecond__utc","request_firstline_original_uri","request_firstline","request_receive_time_nanosecond","request_receive_time_last_millisecond","request_receive_time_day","request_referer_port","request_firstline_original_uri_port","request_receive_time_year","request_receive_time_last_date","request_referer_query_$","request_receive_time_last_time__utc","request_receive_time_last_hour__utc","request_firstline_original_protocol_version","request_firstline_original_method","request_receive_time_last_year__utc","request_firstline_uri","request_referer_last_host","request_receive_time_last_minute__utc","request_receive_time_weekofweekyear","request_firstline_uri_userinfo","request_receive_time_epoch","connection_client_logname","response_body_bytes","request_receive_time_nanosecond__utc","request_firstline_protocol","request_receive_time_microsecond__utc","request_receive_time_hour","request_firstline_uri_host","request_referer_last_port","request_receive_time_last_epoch","request_receive_time_last_weekyear__utc","request_receive_time_weekyear","request_receive_time_timezone","response_body_bytesclf","request_receive_time_last_date__utc","request_useragent_last","request_useragent","request_receive_time_millisecond__utc","request_referer_last_protocol","request_status_last","request_firstline_uri_query","request_receive_time_minute__utc","request_firstline_original_uri_protocol","request_referer_query","request_receive_time_date","request_firstline_uri_port","request_receive_time_last_second__utc","request_referer_last_userinfo","request_receive_time_last_second","request_receive_time_last_monthname__utc","request_firstline_method","request_receive_time_last_month__utc","request_receive_time_millisecond","request_receive_time_day__utc","request_receive_time_year__utc","request_receive_time_weekofweekyear__utc","request_receive_time_second","request_firstline_original_uri_ref","connection_client_logname_last","request_receive_time_last_year","request_firstline_original_uri_path","connection_client_host","request_referer_last_query_$","request_firstline_original_uri_query","request_referer_userinfo","request_receive_time_last_monthname","request_referer_path","request_receive_time_monthname","request_receive_time_last_month","request_referer_last_query","request_firstline_uri_ref","request_receive_time_last_day","request_receive_time_time","request_receive_time_last_weekofweekyear__utc","request_receive_time_last_weekyear","request_receive_time_last_microsecond","request_firstline_original","request_firstline_uri_query_$","request_referer_last_path","request_receive_time_month","request_receive_time_last_day__utc","request_referer","request_referer_protocol","request_receive_time_monthname__utc","response_body_bytes_last","request_receive_time","request_receive_time_last_nanosecond","request_firstline_uri_path","request_firstline_original_uri_userinfo","request_receive_time_date__utc","request_receive_time_last","request_receive_time_last_nanosecond__utc","request_receive_time_last_hour","request_receive_time_hour__utc","request_receive_time_second__utc","connection_client_user_last","request_receive_time_weekyear__utc","connection_client_user")
+ .baselineValues(null,"04:11:25",null,0L,0L,new HashMap<>(),"HTTP",null,"howto.basjes.nl",10L,11L,"1.1","03:11:25",null,null,43L,"http://howto.basjes.nl/",11L,"195.154.46.135",0L,"/linux/doing-pxe-without-dhcp-control","GET /linux/doing-pxe-without-dhcp-control HTTP/1.1",0L,0L,25L,null,null,2015L,"2015-10-25",new HashMap<>(),"03:11:25",3L,"1.1","GET",2015L,"/linux/doing-pxe-without-dhcp-control","howto.basjes.nl",11L,43L,null,1445742685000L,null,24323L,0L,"HTTP",0L,4L,null,null,1445742685000L,2015L,2015L,null,24323L,"2015-10-25","Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 Firefox/35.0","Mozilla/5.0 (Windows NT 5.1; rv:35.0) Gecko/20100101 Firefox/35.0",0L,"http","200","",11L,null,"","2015-10-25",null,25L,null,25L,"October","GET",10L,0L,25L,2015L,43L,25L,null,null,2015L,"/linux/doing-pxe-without-dhcp-control","195.154.46.135",new HashMap<>(),"",null,"October","/","October",10L,"",null,25L,"04:11:25",43L,2015L,0L,"GET /linux/doing-pxe-without-dhcp-control HTTP/1.1",new HashMap<>(),"/",10L,25L,"http://howto.basjes.nl/","http","October",24323L,LocalDateTime.parse("2015-10-25T03:11:25"),0L,"/linux/doing-pxe-without-dhcp-control",null,"2015-10-25",LocalDateTime.parse("2015-10-25T03:11:25"),0L,4L,3L,25L,null,2015L,null)
+ .go();
+ }
+}