Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.sqoop.connector.hdfs; import static org.apache.sqoop.connector.hdfs.configuration.ToFormat.SEQUENCE_FILE; import static org.apache.sqoop.connector.hdfs.configuration.ToFormat.TEXT_FILE; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.sqoop.common.PrefixContext; import org.apache.sqoop.connector.hdfs.configuration.LinkConfiguration; import org.apache.sqoop.connector.hdfs.configuration.ToCompression; import org.apache.sqoop.connector.hdfs.configuration.ToFormat; import org.apache.sqoop.connector.hdfs.configuration.ToJobConfiguration; import org.apache.sqoop.etl.io.DataReader; import org.apache.sqoop.job.etl.Loader; import org.apache.sqoop.job.etl.LoaderContext; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @RunWith(Parameterized.class) public class TestLoader extends TestHdfsBase { private static final String INPUT_ROOT = System.getProperty("maven.build.directory", "/tmp") + "/sqoop/warehouse/"; private static final int NUMBER_OF_ROWS_PER_FILE = 1000; private ToFormat outputFormat; private ToCompression compression; private final String outputDirectory; private Loader loader; public TestLoader(ToFormat outputFormat, ToCompression compression) throws Exception { this.outputDirectory = INPUT_ROOT + getClass().getSimpleName(); this.outputFormat = outputFormat; this.compression = compression; this.loader = new HdfsLoader(); } @Parameterized.Parameters public static Collection<Object[]> data() { List<Object[]> parameters = new ArrayList<Object[]>(); for (ToCompression compression : new ToCompression[] { ToCompression.DEFAULT, ToCompression.BZIP2, ToCompression.NONE }) { for (Object outputFileType : new Object[] { TEXT_FILE, SEQUENCE_FILE }) { parameters.add(new Object[] { outputFileType, compression }); } } return parameters; } @Before public void setUp() throws Exception { } @After public void tearDown() throws IOException { FileUtils.delete(outputDirectory); } @Test public void testLoader() throws Exception { FileSystem fs = FileSystem.get(new Configuration()); Configuration conf = new Configuration(); PrefixContext prefixContext = new PrefixContext(conf, "org.apache.sqoop.job.connector.from.context."); LoaderContext context = new LoaderContext(prefixContext, new DataReader() { private long index = 0L; @Override public Object[] readArrayRecord() { return null; } @Override public String readTextRecord() { if (index++ < NUMBER_OF_ROWS_PER_FILE) { return index + "," + (double) index + ",'" + index + "'"; } else { return null; } } @Override public Object readContent() { return null; } }, null); LinkConfiguration linkConf = new LinkConfiguration(); ToJobConfiguration jobConf = new ToJobConfiguration(); jobConf.toJobConfig.outputDirectory = outputDirectory; jobConf.toJobConfig.compression = compression; jobConf.toJobConfig.outputFormat = outputFormat; Path outputPath = new Path(outputDirectory); loader.load(context, linkConf, jobConf); Assert.assertEquals(1, fs.listStatus(outputPath).length); for (FileStatus status : fs.listStatus(outputPath)) { verifyOutput(fs, status.getPath()); } loader.load(context, linkConf, jobConf); Assert.assertEquals(2, fs.listStatus(outputPath).length); loader.load(context, linkConf, jobConf); loader.load(context, linkConf, jobConf); loader.load(context, linkConf, jobConf); Assert.assertEquals(5, fs.listStatus(outputPath).length); } private void verifyOutput(FileSystem fs, Path file) throws IOException { Configuration conf = new Configuration(); FSDataInputStream fsin = fs.open(file); CompressionCodec codec; switch (outputFormat) { case TEXT_FILE: codec = (new CompressionCodecFactory(conf)).getCodec(file); // Verify compression switch (compression) { case BZIP2: Assert.assertTrue(codec.getClass().getCanonicalName().indexOf("BZip2") != -1); break; case DEFAULT: if (org.apache.hadoop.util.VersionInfo.getVersion().matches("\\b1\\.\\d\\.\\d")) { Assert.assertTrue(codec.getClass().getCanonicalName().indexOf("Default") != -1); } else { Assert.assertTrue(codec.getClass().getCanonicalName().indexOf("Deflate") != -1); } break; case NONE: default: Assert.assertNull(codec); break; } InputStreamReader in; if (codec == null) { in = new InputStreamReader(fsin); } else { in = new InputStreamReader(codec.createInputStream(fsin, codec.createDecompressor())); } BufferedReader textReader = new BufferedReader(in); for (int i = 1; i <= NUMBER_OF_ROWS_PER_FILE; ++i) { Assert.assertEquals(i + "," + (double) i + ",'" + i + "'", textReader.readLine()); } break; case SEQUENCE_FILE: SequenceFile.Reader sequenceReader = new SequenceFile.Reader(fs, file, conf); codec = sequenceReader.getCompressionCodec(); // Verify compression switch (compression) { case BZIP2: Assert.assertTrue(codec.getClass().getCanonicalName().indexOf("BZip2") != -1); break; case DEFAULT: Assert.assertTrue(codec.getClass().getCanonicalName().indexOf("Default") != -1); break; case NONE: default: Assert.assertNull(codec); break; } Text line = new Text(); int index = 1; while (sequenceReader.next(line)) { Assert.assertEquals(index + "," + (double) index + ",'" + index++ + "'", line.toString()); line = new Text(); } break; } } }