Example usage for org.apache.hadoop.io.compress BZip2Codec BZip2Codec

List of usage examples for org.apache.hadoop.io.compress BZip2Codec BZip2Codec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress BZip2Codec BZip2Codec.

Prototype

public BZip2Codec() 

Source Link

Document

Creates a new instance of BZip2Codec.

Usage

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

/**
 * Test to write few log lines, compress using bzip2, write to disk, read back
 * the compressed file and verify the written lines.
 *
 * @throws IOException/*  w w  w  .j ava2  s  . c o  m*/
 * @throws InterruptedException
 */
@Test
public void testBZip2Codec() throws IOException, InterruptedException {
    checkOutputFormat("syslog", new SyslogEntryFormat(), "BZip2Codec", new BZip2Codec());
}

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

/**
 * Test to write few log lines, compress using bzip2, write to disk, read back
 * the compressed file and verify the written lines.
 *
 * This test uses the wrong case for the codec name.
 *
 * @throws IOException/*from   www. j  a  va 2  s .c  o m*/
 * @throws InterruptedException
 */
@Test
public void testBZip2CodecWrongCase() throws IOException, InterruptedException {
    checkOutputFormat("syslog", new SyslogEntryFormat(), "bzip2Codec", new BZip2Codec());
}

From source file:com.cloudera.sqoop.TestCompression.java

License:Apache License

public void testBzip2TextCompression() throws IOException {
    runTextCompressionTest(new BZip2Codec(), 4);
}

From source file:com.cloudera.sqoop.TestCompression.java

License:Apache License

public void testBzip2SequenceFileCompression() throws Exception {
    runSequenceFileCompressionTest(new BZip2Codec(), 4);
}

From source file:example.TestLineRecordReader.java

License:Apache License

@Test
public void testMultipleClose() throws IOException {
    URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
    assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
    LineRecordReader reader = new LineRecordReader();
    reader.initialize(split, context);/* www.  j ava  2  s  .c om*/

    //noinspection StatementWithEmptyBody
    while (reader.nextKeyValue())
        ;
    reader.close();
    reader.close();

    BZip2Codec codec = new BZip2Codec();
    codec.setConf(conf);
    Set<Decompressor> decompressors = new HashSet<Decompressor>();
    for (int i = 0; i < 10; ++i) {
        decompressors.add(CodecPool.getDecompressor(codec));
    }
    assertEquals(10, decompressors.size());
}

From source file:fr.ens.biologie.genomique.eoulsan.io.HadoopCompressionCodecs.java

License:LGPL

/**
 * Create a bzip2 input stream.//from  ww  w  . j  a  v  a2 s.  co m
 * @param is input stream
 * @return an uncompressed input stream
 * @throws IOException if an error occurs while creating the input stream
 */
public static InputStream createBZip2InputStream(final InputStream is) throws IOException {

    return new BZip2Codec().createInputStream(is);
}

From source file:fr.ens.biologie.genomique.eoulsan.io.HadoopCompressionCodecs.java

License:LGPL

/**
 * Create a bzip2 output stream.//from   ww  w .  j av  a2s. c om
 * @param os the output stream to compress
 * @return a compressed output stream
 * @throws IOException if an error occurs while creating the output stream
 */
public static OutputStream createBZip2OutputStream(final OutputStream os) throws IOException {

    return new BZip2Codec().createOutputStream(os);
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize)
        throws IOException {
    path = path.replace("\\", "/");
    boolean gzip = path.endsWith(".gz");
    boolean bzip2 = path.endsWith(".bz2");
    InputStream stream;//w  w w. j a v a 2s  .  com
    switch (fileType) {
    case LOCAL:
        path = getUpdatedFilePath(path, fileType);
        if (gzip) {
            stream = new GZIPInputStream(new FileInputStream(path));
        } else if (bzip2) {
            stream = new BZip2CompressorInputStream(new FileInputStream(path));
        } else {
            stream = new FileInputStream(path);
        }
        break;
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        if (bufferSize == -1) {
            stream = fs.open(pt);
        } else {
            stream = fs.open(pt, bufferSize);
        }
        if (gzip) {
            GzipCodec codec = new GzipCodec();
            stream = codec.createInputStream(stream);
        } else if (bzip2) {
            BZip2Codec codec = new BZip2Codec();
            stream = codec.createInputStream(stream);
        }
        break;
    default:
        throw new UnsupportedOperationException("unsupported file system");
    }
    return new DataInputStream(new BufferedInputStream(stream));
}

From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception//from w w  w  .  j a v a 2s  .  co  m
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception/*from w w w .j  a  v  a  2  s .c o m*/
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}