Example usage for org.apache.hadoop.io.compress GzipCodec GzipCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress GzipCodec GzipCodec.

Prototype

GzipCodec

Source Link

Usage

From source file:fi.tkk.ics.hadoop.bam.TestFastqInputFormat.java

License:Open Source License

@Test(expected = RuntimeException.class)
public void testCompressedSplit() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter fastqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    fastqOut.write(twoFastq);//  w ww.j a va2 s .  co m
    fastqOut.close();

    // now try to read it starting from the middle
    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null);
    FastqRecordReader reader = new FastqRecordReader(conf, split);
}

From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java

License:Open Source License

@Test
public void testGzCompressedInput() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter qseqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    qseqOut.write(twoQseq);/*ww w  .  j av  a  2 s .c o  m*/
    qseqOut.close();

    // now try to read it
    split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null);
    QseqRecordReader reader = new QseqRecordReader(conf, split);

    boolean retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
    assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
            fragment.getSequence().toString());

    retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
    assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
            fragment.getSequence().toString());
}

From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java

License:Open Source License

@Test(expected = RuntimeException.class)
public void testCompressedSplit() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter qseqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    qseqOut.write(twoQseq);//from ww w  . j  a v  a 2s. c o m
    qseqOut.close();

    // now try to read it starting from the middle
    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null);
    QseqRecordReader reader = new QseqRecordReader(conf, split);
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize)
        throws IOException {
    path = path.replace("\\", "/");
    boolean gzip = path.endsWith(".gz");
    boolean bzip2 = path.endsWith(".bz2");
    InputStream stream;/*from ww w  .j a v  a  2  s .co m*/
    switch (fileType) {
    case LOCAL:
        path = getUpdatedFilePath(path, fileType);
        if (gzip) {
            stream = new GZIPInputStream(new FileInputStream(path));
        } else if (bzip2) {
            stream = new BZip2CompressorInputStream(new FileInputStream(path));
        } else {
            stream = new FileInputStream(path);
        }
        break;
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        if (bufferSize == -1) {
            stream = fs.open(pt);
        } else {
            stream = fs.open(pt, bufferSize);
        }
        if (gzip) {
            GzipCodec codec = new GzipCodec();
            stream = codec.createInputStream(stream);
        } else if (bzip2) {
            BZip2Codec codec = new BZip2Codec();
            stream = codec.createInputStream(stream);
        }
        break;
    default:
        throw new UnsupportedOperationException("unsupported file system");
    }
    return new DataInputStream(new BufferedInputStream(stream));
}

From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception/*from w  ww  .jav a2 s . c  o m*/
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception// w w  w.j a  v a 2 s . com
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:org.apache.flume.sink.customhdfs.TestUseRawLocalFileSystem.java

License:Apache License

@Test
public void testCompressedFile() throws Exception {
    String file = testFile.getCanonicalPath();
    HDFSCompressedDataStream stream = new HDFSCompressedDataStream();
    context.put("hdfs.useRawLocalFileSystem", "true");
    stream.configure(context);/*  w  w  w . j  a v a 2s.  c o  m*/
    stream.open(file, new GzipCodec(), CompressionType.RECORD);
    stream.append(event);
    stream.sync();
    Assert.assertTrue(testFile.length() > 0);
}

From source file:org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilitiesTest.java

License:Apache License

@Test
public void isThreadSafe() {

    testIsThreadSafe("readable compression, no compression - thread safe", "/some/path/without.compression",
            null, null, true);/*from  w ww. j  a  v a2  s .  c o  m*/

    testIsThreadSafe("readable compression, gzip compression - thread safe", "/some/compressed/path.gz", null,
            new GzipCodec(), true);

    testIsThreadSafe("readable compression, bzip2 compression - not thread safe", "/some/path/with/bzip2.bz2",
            null, new BZip2Codec(), false);

    testIsThreadSafe("writable compression, no compression codec - thread safe", "/some/path", null, null,
            true);

    testIsThreadSafe("writable compression, some compression codec - thread safe", "/some/path",
            "I.am.a.nice.codec", new NotSoNiceCodec(), true);

    testIsThreadSafe("writable compression, compression codec bzip2 - not thread safe", "/some/path",
            "org.apache.hadoop.io.compress.BZip2Codec", new BZip2Codec(), false);
}

From source file:org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilitiesTest.java

License:Apache License

@Test
public void isSplittableCodec() {

    testIsSplittableCodec("no codec - splittable", "some/innocent.file", null, true);
    testIsSplittableCodec("gzip codec - not splittable", "/gzip.gz", new GzipCodec(), false);
    testIsSplittableCodec("default codec - not splittable", "/default.deflate", new DefaultCodec(), false);
    testIsSplittableCodec("bzip2 codec - splittable", "bzip2.bz2", new BZip2Codec(), true);
}

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.jsonld.GZippedJsonLDQuadInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public GZippedJsonLDQuadInputTest() {
    super(".jsonld.gz", new GzipCodec());
}