Java tutorial
/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.cdk.tools; import com.cloudera.data.Dataset; import com.cloudera.data.DatasetReader; import com.cloudera.data.DatasetRepository; import com.cloudera.data.filesystem.FileSystemDatasetRepository; import com.google.common.io.Resources; import java.io.File; import junit.framework.Assert; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.junit.Before; import org.junit.Test; public class TestCombinedLogFormatConverter { private static final File TEST_DIR = new File("/tmp", TestCombinedLogFormatConverter.class.getName()); @Before public void setUp() { FileUtil.fullyDelete(TEST_DIR); } @Test public void test() throws Exception { CombinedLogFormatConverter tool = new CombinedLogFormatConverter(); String input = Resources.getResource("access_log.txt").toExternalForm(); String datasetRoot = TEST_DIR.toURI().toURL().toExternalForm(); String datasetName = "logs"; int exitCode = tool.run(input, datasetRoot, datasetName); Assert.assertEquals(0, exitCode); Path root = new Path(datasetRoot); FileSystem fs = root.getFileSystem(new Configuration()); DatasetRepository repo = new FileSystemDatasetRepository(fs, root); Dataset dataset = repo.get(datasetName); DatasetReader<GenericRecord> reader = dataset.getReader(); try { reader.open(); Assert.assertTrue(reader.hasNext()); GenericRecord first = reader.read(); Assert.assertEquals("ip1", first.get("host")); Assert.assertNull(first.get("rfc931_identity")); Assert.assertNull(first.get("username")); Assert.assertEquals("24/Apr/2011:04:06:01 -0400", first.get("datetime")); Assert.assertEquals("GET /~strabal/grease/photo9/927-3.jpg HTTP/1.1", first.get("request")); Assert.assertEquals(200, first.get("http_status_code")); Assert.assertEquals(40028, first.get("response_size")); Assert.assertNull(first.get("referrer")); Assert.assertEquals("Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex" + ".com/bots)", first.get("user_agent")); } finally { reader.close(); } } }