List of usage examples for org.apache.hadoop.io.compress BZip2Codec BZip2Codec
public BZip2Codec()
From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java
License:Apache License
/** * Test to write few log lines, compress using bzip2, write to disk, read back * the compressed file and verify the written lines. * * @throws IOException/* w w w .j ava2 s . c o m*/ * @throws InterruptedException */ @Test public void testBZip2Codec() throws IOException, InterruptedException { checkOutputFormat("syslog", new SyslogEntryFormat(), "BZip2Codec", new BZip2Codec()); }
From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java
License:Apache License
/** * Test to write few log lines, compress using bzip2, write to disk, read back * the compressed file and verify the written lines. * * This test uses the wrong case for the codec name. * * @throws IOException/*from www. j a va 2 s .c o m*/ * @throws InterruptedException */ @Test public void testBZip2CodecWrongCase() throws IOException, InterruptedException { checkOutputFormat("syslog", new SyslogEntryFormat(), "bzip2Codec", new BZip2Codec()); }
From source file:com.cloudera.sqoop.TestCompression.java
License:Apache License
public void testBzip2TextCompression() throws IOException { runTextCompressionTest(new BZip2Codec(), 4); }
From source file:com.cloudera.sqoop.TestCompression.java
License:Apache License
public void testBzip2SequenceFileCompression() throws Exception { runSequenceFileCompressionTest(new BZip2Codec(), 4); }
From source file:example.TestLineRecordReader.java
License:Apache License
@Test public void testMultipleClose() throws IOException { URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2"); assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl); File testFile = new File(testFileUrl.getFile()); Path testFilePath = new Path(testFile.getAbsolutePath()); long testFileSize = testFile.length(); Configuration conf = new Configuration(); conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); // read the data and check whether BOM is skipped FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null); LineRecordReader reader = new LineRecordReader(); reader.initialize(split, context);/* www. j ava 2 s .c om*/ //noinspection StatementWithEmptyBody while (reader.nextKeyValue()) ; reader.close(); reader.close(); BZip2Codec codec = new BZip2Codec(); codec.setConf(conf); Set<Decompressor> decompressors = new HashSet<Decompressor>(); for (int i = 0; i < 10; ++i) { decompressors.add(CodecPool.getDecompressor(codec)); } assertEquals(10, decompressors.size()); }
From source file:fr.ens.biologie.genomique.eoulsan.io.HadoopCompressionCodecs.java
License:LGPL
/** * Create a bzip2 input stream.//from ww w . j a v a2 s. co m * @param is input stream * @return an uncompressed input stream * @throws IOException if an error occurs while creating the input stream */ public static InputStream createBZip2InputStream(final InputStream is) throws IOException { return new BZip2Codec().createInputStream(is); }
From source file:fr.ens.biologie.genomique.eoulsan.io.HadoopCompressionCodecs.java
License:LGPL
/** * Create a bzip2 output stream.//from ww w . j av a2s. c om * @param os the output stream to compress * @return a compressed output stream * @throws IOException if an error occurs while creating the output stream */ public static OutputStream createBZip2OutputStream(final OutputStream os) throws IOException { return new BZip2Codec().createOutputStream(os); }
From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java
License:Apache License
public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize) throws IOException { path = path.replace("\\", "/"); boolean gzip = path.endsWith(".gz"); boolean bzip2 = path.endsWith(".bz2"); InputStream stream;//w w w. j a v a 2s . com switch (fileType) { case LOCAL: path = getUpdatedFilePath(path, fileType); if (gzip) { stream = new GZIPInputStream(new FileInputStream(path)); } else if (bzip2) { stream = new BZip2CompressorInputStream(new FileInputStream(path)); } else { stream = new FileInputStream(path); } break; case HDFS: case ALLUXIO: case VIEWFS: Path pt = new Path(path); FileSystem fs = pt.getFileSystem(configuration); if (bufferSize == -1) { stream = fs.open(pt); } else { stream = fs.open(pt, bufferSize); } if (gzip) { GzipCodec codec = new GzipCodec(); stream = codec.createInputStream(stream); } else if (bzip2) { BZip2Codec codec = new BZip2Codec(); stream = codec.createInputStream(stream); } break; default: throw new UnsupportedOperationException("unsupported file system"); } return new DataInputStream(new BufferedInputStream(stream)); }
From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java
License:Apache License
/** * generate compressed files, no need to call this method. * @throws Exception//from w w w . j a v a 2s . co m */ public void generateCompressFiles() throws Exception { String pwd = new File("src/test/resources").getCanonicalPath(); String inputFile = pwd + "/data.csv"; FileInputStream input = new FileInputStream(inputFile); Configuration conf = new Configuration(); // .gz String outputFile = pwd + "/data.csv.gz"; FileOutputStream output = new FileOutputStream(outputFile); GzipCodec gzip = new GzipCodec(); gzip.setConf(conf); CompressionOutputStream outputStream = gzip.createOutputStream(output); int i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .bz2 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.bz2"; output = new FileOutputStream(outputFile); BZip2Codec bzip2 = new BZip2Codec(); bzip2.setConf(conf); outputStream = bzip2.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .snappy input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.snappy"; output = new FileOutputStream(outputFile); SnappyCodec snappy = new SnappyCodec(); snappy.setConf(conf); outputStream = snappy.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); //.lz4 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.lz4"; output = new FileOutputStream(outputFile); Lz4Codec lz4 = new Lz4Codec(); lz4.setConf(conf); outputStream = lz4.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); }
From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java
License:Apache License
/** * generate compressed files, no need to call this method. * @throws Exception/*from w w w .j a v a 2 s .c o m*/ */ public void generateCompressFiles() throws Exception { String pwd = new File("src/test/resources/csv").getCanonicalPath(); String inputFile = pwd + "/data.csv"; FileInputStream input = new FileInputStream(inputFile); Configuration conf = new Configuration(); // .gz String outputFile = pwd + "/data.csv.gz"; FileOutputStream output = new FileOutputStream(outputFile); GzipCodec gzip = new GzipCodec(); gzip.setConf(conf); CompressionOutputStream outputStream = gzip.createOutputStream(output); int i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .bz2 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.bz2"; output = new FileOutputStream(outputFile); BZip2Codec bzip2 = new BZip2Codec(); bzip2.setConf(conf); outputStream = bzip2.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .snappy input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.snappy"; output = new FileOutputStream(outputFile); SnappyCodec snappy = new SnappyCodec(); snappy.setConf(conf); outputStream = snappy.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); //.lz4 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.lz4"; output = new FileOutputStream(outputFile); Lz4Codec lz4 = new Lz4Codec(); lz4.setConf(conf); outputStream = lz4.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); }