Java tutorial
/* * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.cdk.morphline.hadoop.sequencefile; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.cdk.morphline.api.AbstractMorphlineTest; import com.cloudera.cdk.morphline.api.Record; import com.cloudera.cdk.morphline.base.Fields; import com.google.common.io.Closeables; public class ReadSequenceFileTest extends AbstractMorphlineTest { private static final Logger LOGGER = LoggerFactory.getLogger(ReadSequenceFileTest.class); /** * Test that Solr queries on a parsed SequenceFile document * return the expected content and fields. Don't pass * in our own parser via the context. */ @Test public void testSequenceFileContentSimple() throws Exception { morphline = createMorphline("test-morphlines/sequenceFileMorphlineSimple"); String path = RESOURCES_DIR; File sequenceFile = new File(path, "testSequenceFileContentSimple.seq"); int numRecords = 5; HashMap<String, Record> expected = createTextSequenceFile(sequenceFile, numRecords); InputStream in = new FileInputStream(sequenceFile.getAbsolutePath()); Record record = new Record(); record.put(Fields.ATTACHMENT_BODY, in); startSession(); assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(record)); assertTrue(areFieldsEqual(expected, collector.getRecords())); } /** * return a mapping of expected keys -> records */ private HashMap<String, Record> createTextSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, Text.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); Text value = new Text("value" + i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; } /** * Test that Solr queries on a parsed SequenceFile document * return the expected content and fields. */ @Test public void testSequenceFileContentCustomParsers() throws Exception { morphline = createMorphline("test-morphlines/sequenceFileMorphlineSimple"); String path = RESOURCES_DIR; File sequenceFile = new File(path, "testSequenceFileContentCustomParsers.seq"); int numRecords = 10; HashMap<String, Record> expected = createTextSequenceFile(sequenceFile, numRecords); InputStream in = new FileInputStream(sequenceFile.getAbsolutePath()); Record record = new Record(); record.put(Fields.ATTACHMENT_BODY, in); startSession(); assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(record)); assertTrue(areFieldsEqual(expected, collector.getRecords())); } /** * return a mapping of expected keys -> records */ private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException { HashMap<String, Record> map = new HashMap<String, Record>(); SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile()); FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(new Configuration(), out, Text.class, ParseTextMyWritableBuilder.MyWritable.class, SequenceFile.CompressionType.NONE, null, metadata); for (int i = 0; i < numRecords; ++i) { Text key = new Text("key" + i); ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i); writer.append(key, value); Record record = new Record(); record.put("key", key); record.put("value", value); map.put(key.toString(), record); } } finally { Closeables.closeQuietly(writer); } return map; } private TreeMap<Text, Text> getMetadataForSequenceFile() { TreeMap<Text, Text> metadata = new TreeMap<Text, Text>(); metadata.put(new Text("license"), new Text("Apache")); metadata.put(new Text("year"), new Text("2013")); return metadata; } private boolean areRecordFieldsEqual(Record record1, Record record2, List<String> fieldsToCheck) { for (String field : fieldsToCheck) { if (!record1.get(field).equals(record2.get(field))) { return false; } } return true; } private boolean areFieldsEqual(HashMap<String, Record> expected, List<Record> actual) { if (expected.size() != actual.size()) { return false; } for (Record current : actual) { String key = current.getFirstValue("key").toString(); Record currentExpected = expected.get(key); if (!areRecordFieldsEqual(current, currentExpected, Arrays.asList(new String[] { "key", "value" }))) { return false; } } return true; } }