Example usage for org.apache.hadoop.io.compress BZip2Codec BZip2Codec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress BZip2Codec BZip2Codec.

Prototype

public BZip2Codec()

Source Link

Document

Creates a new instance of BZip2Codec.

Usage

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

/**
 * Test to write few log lines, compress using bzip2, write to disk, read back
 * the compressed file and verify the written lines.
 *
 * @throws IOException/*  w w  w  .j ava2  s  . c o  m*/
 * @throws InterruptedException
 */
@Test
public void testBZip2Codec() throws IOException, InterruptedException {
    checkOutputFormat("syslog", new SyslogEntryFormat(), "BZip2Codec", new BZip2Codec());
}

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

/**
 * Test to write few log lines, compress using bzip2, write to disk, read back
 * the compressed file and verify the written lines.
 *
 * This test uses the wrong case for the codec name.
 *
 * @throws IOException/*from   www. j  a  va 2  s .c  o m*/
 * @throws InterruptedException
 */
@Test
public void testBZip2CodecWrongCase() throws IOException, InterruptedException {
    checkOutputFormat("syslog", new SyslogEntryFormat(), "bzip2Codec", new BZip2Codec());
}

From source file:com.cloudera.sqoop.TestCompression.java

License:Apache License

public void testBzip2TextCompression() throws IOException {
    runTextCompressionTest(new BZip2Codec(), 4);
}

From source file:com.cloudera.sqoop.TestCompression.java

License:Apache License

public void testBzip2SequenceFileCompression() throws Exception {
    runSequenceFileCompressionTest(new BZip2Codec(), 4);
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testMultipleClose() throws IOException {
    URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
    assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
    LineRecordReader reader = new LineRecordReader();
    reader.initialize(split, context);/* www.  j ava  2  s  .c om*/

    //noinspection StatementWithEmptyBody
    while (reader.nextKeyValue())
        ;
    reader.close();
    reader.close();

    BZip2Codec codec = new BZip2Codec();
    codec.setConf(conf);
    Set<Decompressor> decompressors = new HashSet<Decompressor>();
    for (int i = 0; i < 10; ++i) {
        decompressors.add(CodecPool.getDecompressor(codec));
    }
    assertEquals(10, decompressors.size());
}

From source file:fr.ens.biologie.genomique.eoulsan.io.HadoopCompressionCodecs.java

License:LGPL

/**
 * Create a bzip2 input stream.//from  ww  w  . j  a  v  a2 s.  co m
 * @param is input stream
 * @return an uncompressed input stream
 * @throws IOException if an error occurs while creating the input stream
 */
public static InputStream createBZip2InputStream(final InputStream is) throws IOException {

    return new BZip2Codec().createInputStream(is);
}

From source file:fr.ens.biologie.genomique.eoulsan.io.HadoopCompressionCodecs.java

License:LGPL

/**
 * Create a bzip2 output stream.//from   ww  w .  j av  a2s. c om
 * @param os the output stream to compress
 * @return a compressed output stream
 * @throws IOException if an error occurs while creating the output stream
 */
public static OutputStream createBZip2OutputStream(final OutputStream os) throws IOException {

    return new BZip2Codec().createOutputStream(os);
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize)
        throws IOException {
    path = path.replace("\\", "/");
    boolean gzip = path.endsWith(".gz");
    boolean bzip2 = path.endsWith(".bz2");
    InputStream stream;//w  w w. j a v a 2s  .  com
    switch (fileType) {
    case LOCAL:
        path = getUpdatedFilePath(path, fileType);
        if (gzip) {
            stream = new GZIPInputStream(new FileInputStream(path));
        } else if (bzip2) {
            stream = new BZip2CompressorInputStream(new FileInputStream(path));
        } else {
            stream = new FileInputStream(path);
        }
        break;
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        if (bufferSize == -1) {
            stream = fs.open(pt);
        } else {
            stream = fs.open(pt, bufferSize);
        }
        if (gzip) {
            GzipCodec codec = new GzipCodec();
            stream = codec.createInputStream(stream);
        } else if (bzip2) {
            BZip2Codec codec = new BZip2Codec();
            stream = codec.createInputStream(stream);
        }
        break;
    default:
        throw new UnsupportedOperationException("unsupported file system");
    }
    return new DataInputStream(new BufferedInputStream(stream));
}

From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception//from w w  w  .  j a v a 2s  .  co  m
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception/*from w w w .j  a  v  a  2  s .c o m*/
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}