Example usage for org.apache.hadoop.io.compress GzipCodec GzipCodec

List of usage examples for org.apache.hadoop.io.compress GzipCodec GzipCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress GzipCodec GzipCodec.

Prototype

GzipCodec

Source Link

Usage

From source file:fi.tkk.ics.hadoop.bam.TestFastqInputFormat.java

License:Open Source License

@Test(expected = RuntimeException.class)
public void testCompressedSplit() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter fastqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    fastqOut.write(twoFastq);//  w ww.j a va2 s .  co m
    fastqOut.close();

    // now try to read it starting from the middle
    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null);
    FastqRecordReader reader = new FastqRecordReader(conf, split);
}

From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java

License:Open Source License

@Test
public void testGzCompressedInput() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter qseqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    qseqOut.write(twoQseq);/*ww w  .  j av  a  2 s .c o  m*/
    qseqOut.close();

    // now try to read it
    split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null);
    QseqRecordReader reader = new QseqRecordReader(conf, split);

    boolean retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
    assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
            fragment.getSequence().toString());

    retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
    assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
            fragment.getSequence().toString());
}

From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java

License:Open Source License

@Test(expected = RuntimeException.class)
public void testCompressedSplit() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter qseqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    qseqOut.write(twoQseq);//from ww w  . j  a v  a 2s. c o m
    qseqOut.close();

    // now try to read it starting from the middle
    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null);
    QseqRecordReader reader = new QseqRecordReader(conf, split);
}

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize)
        throws IOException {
    path = path.replace("\\", "/");
    boolean gzip = path.endsWith(".gz");
    boolean bzip2 = path.endsWith(".bz2");
    InputStream stream;/*from ww w  .j a v  a  2  s .co m*/
    switch (fileType) {
    case LOCAL:
        path = getUpdatedFilePath(path, fileType);
        if (gzip) {
            stream = new GZIPInputStream(new FileInputStream(path));
        } else if (bzip2) {
            stream = new BZip2CompressorInputStream(new FileInputStream(path));
        } else {
            stream = new FileInputStream(path);
        }
        break;
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        Path pt = new Path(path);
        FileSystem fs = pt.getFileSystem(configuration);
        if (bufferSize == -1) {
            stream = fs.open(pt);
        } else {
            stream = fs.open(pt, bufferSize);
        }
        if (gzip) {
            GzipCodec codec = new GzipCodec();
            stream = codec.createInputStream(stream);
        } else if (bzip2) {
            BZip2Codec codec = new BZip2Codec();
            stream = codec.createInputStream(stream);
        }
        break;
    default:
        throw new UnsupportedOperationException("unsupported file system");
    }
    return new DataInputStream(new BufferedInputStream(stream));
}

From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception/*from w  ww  .jav a2 s . c  o m*/
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception// w w  w.j a  v a 2 s . com
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:org.apache.flume.sink.customhdfs.TestUseRawLocalFileSystem.java

License:Apache License

@Test
public void testCompressedFile() throws Exception {
    String file = testFile.getCanonicalPath();
    HDFSCompressedDataStream stream = new HDFSCompressedDataStream();
    context.put("hdfs.useRawLocalFileSystem", "true");
    stream.configure(context);/*  w  w  w . j  a v a 2s.  c o  m*/
    stream.open(file, new GzipCodec(), CompressionType.RECORD);
    stream.append(event);
    stream.sync();
    Assert.assertTrue(testFile.length() > 0);
}

From source file:org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilitiesTest.java

License:Apache License

@Test
public void isThreadSafe() {

    testIsThreadSafe("readable compression, no compression - thread safe", "/some/path/without.compression",
            null, null, true);/*from  w ww. j  a  v a2  s .  c o  m*/

    testIsThreadSafe("readable compression, gzip compression - thread safe", "/some/compressed/path.gz", null,
            new GzipCodec(), true);

    testIsThreadSafe("readable compression, bzip2 compression - not thread safe", "/some/path/with/bzip2.bz2",
            null, new BZip2Codec(), false);

    testIsThreadSafe("writable compression, no compression codec - thread safe", "/some/path", null, null,
            true);

    testIsThreadSafe("writable compression, some compression codec - thread safe", "/some/path",
            "I.am.a.nice.codec", new NotSoNiceCodec(), true);

    testIsThreadSafe("writable compression, compression codec bzip2 - not thread safe", "/some/path",
            "org.apache.hadoop.io.compress.BZip2Codec", new BZip2Codec(), false);
}

From source file:org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilitiesTest.java

License:Apache License

@Test
public void isSplittableCodec() {

    testIsSplittableCodec("no codec - splittable", "some/innocent.file", null, true);
    testIsSplittableCodec("gzip codec - not splittable", "/gzip.gz", new GzipCodec(), false);
    testIsSplittableCodec("default codec - not splittable", "/default.deflate", new DefaultCodec(), false);
    testIsSplittableCodec("bzip2 codec - splittable", "bzip2.bz2", new BZip2Codec(), true);
}

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.jsonld.GZippedJsonLDQuadInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public GZippedJsonLDQuadInputTest() {
    super(".jsonld.gz", new GzipCodec());
}