Example usage for org.apache.hadoop.io.compress GzipCodec createOutputStream

List of usage examples for org.apache.hadoop.io.compress GzipCodec createOutputStream

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress GzipCodec createOutputStream.

Prototype

@Override
    public CompressionOutputStream createOutputStream(OutputStream out) throws IOException 

Source Link

Usage

From source file:com.pinterest.secor.io.FileReaderWriterFactoryTest.java

License:Apache License

private void mockDelimitedTextFileWriter(boolean isCompressed) throws Exception {
    PowerMockito.mockStatic(FileSystem.class);
    FileSystem fs = Mockito.mock(FileSystem.class);
    Mockito.when(FileSystem.get(Mockito.any(URI.class), Mockito.any(Configuration.class))).thenReturn(fs);

    Path fsPath = (!isCompressed) ? new Path(PATH) : new Path(PATH_GZ);

    GzipCodec codec = PowerMockito.mock(GzipCodec.class);
    PowerMockito.whenNew(GzipCodec.class).withNoArguments().thenReturn(codec);

    FSDataInputStream fileInputStream = Mockito.mock(FSDataInputStream.class);
    FSDataOutputStream fileOutputStream = Mockito.mock(FSDataOutputStream.class);

    Mockito.when(fs.open(fsPath)).thenReturn(fileInputStream);
    Mockito.when(fs.create(fsPath)).thenReturn(fileOutputStream);

    CompressionInputStream inputStream = Mockito.mock(CompressionInputStream.class);
    CompressionOutputStream outputStream = Mockito.mock(CompressionOutputStream.class);
    Mockito.when(codec.createInputStream(Mockito.any(InputStream.class))).thenReturn(inputStream);

    Mockito.when(codec.createOutputStream(Mockito.any(OutputStream.class))).thenReturn(outputStream);
}

From source file:com.pinterest.secor.io.FileReaderWriterTest.java

License:Apache License

private void mockDelimitedTextFileReaderWriter(boolean isCompressed) throws Exception {
    PowerMockito.mockStatic(FileSystem.class);
    FileSystem fs = Mockito.mock(FileSystem.class);
    Mockito.when(FileSystem.get(Mockito.any(URI.class), Mockito.any(Configuration.class))).thenReturn(fs);

    Path fsPath = (!isCompressed) ? new Path(PATH) : new Path(PATH_GZ);

    GzipCodec codec = PowerMockito.mock(GzipCodec.class);
    PowerMockito.whenNew(GzipCodec.class).withNoArguments().thenReturn(codec);

    FSDataInputStream fileInputStream = Mockito.mock(FSDataInputStream.class);
    FSDataOutputStream fileOutputStream = Mockito.mock(FSDataOutputStream.class);

    Mockito.when(fs.open(fsPath)).thenReturn(fileInputStream);
    Mockito.when(fs.create(fsPath)).thenReturn(fileOutputStream);

    CompressionInputStream inputStream = Mockito.mock(CompressionInputStream.class);
    CompressionOutputStream outputStream = Mockito.mock(CompressionOutputStream.class);
    Mockito.when(codec.createInputStream(Mockito.any(InputStream.class))).thenReturn(inputStream);

    Mockito.when(codec.createOutputStream(Mockito.any(OutputStream.class))).thenReturn(outputStream);
}

From source file:fi.tkk.ics.hadoop.bam.TestFastqInputFormat.java

License:Open Source License

@Test
public void testGzCompressedInput() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter fastqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    fastqOut.write(twoFastq);/*ww  w .j  a  v  a 2  s  .c om*/
    fastqOut.close();

    // now try to read it
    split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoFastq.length(), null);
    FastqRecordReader reader = new FastqRecordReader(conf, split);

    boolean retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
    assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
            fragment.getSequence().toString());

    retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString());
    assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
            fragment.getSequence().toString());
}

From source file:fi.tkk.ics.hadoop.bam.TestFastqInputFormat.java

License:Open Source License

@Test(expected = RuntimeException.class)
public void testCompressedSplit() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter fastqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    fastqOut.write(twoFastq);/*from   www .  j  a  va  2  s .  co m*/
    fastqOut.close();

    // now try to read it starting from the middle
    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null);
    FastqRecordReader reader = new FastqRecordReader(conf, split);
}

From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java

License:Open Source License

@Test
public void testGzCompressedInput() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter qseqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    qseqOut.write(twoQseq);/*from  www .j av  a2  s  .c  o  m*/
    qseqOut.close();

    // now try to read it
    split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null);
    QseqRecordReader reader = new QseqRecordReader(conf, split);

    boolean retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
    assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
            fragment.getSequence().toString());

    retval = reader.next(key, fragment);
    assertTrue(retval);
    assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
    assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
            fragment.getSequence().toString());
}

From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java

License:Open Source License

@Test(expected = RuntimeException.class)
public void testCompressedSplit() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter qseqOut = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    qseqOut.write(twoQseq);//from w  w  w.jav a  2s  .co  m
    qseqOut.close();

    // now try to read it starting from the middle
    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null);
    QseqRecordReader reader = new QseqRecordReader(conf, split);
}

From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception/*from   w  ww  .  j  a v  a 2  s . co  m*/
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java

License:Apache License

/**
 * generate compressed files, no need to call this method.
 * @throws Exception// w  ww  . ja v a2 s.c  o m
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();

    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();

}

From source file:tests.it.crs4.seal.common.TestSamInputFormat.java

License:Open Source License

@Test
public void testGzCompressedInput() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter out = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    out.write(twoRecords);// w w w.j a va 2  s .c o m
    out.close();

    // now try to read it
    split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoRecords.length(), null);

    SamRecordReader reader = new SamRecordReader();
    reader.initialize(split, Utils.getTaskAttemptContext(conf));

    boolean retval = reader.nextKeyValue();
    assertTrue(retval);
    assertEquals("Read/2", reader.getCurrentValue().getAnyRead().getName());

    retval = reader.nextKeyValue();
    assertTrue(retval);
    assertEquals("Read/1", reader.getCurrentValue().getAnyRead().getName());
}

From source file:tests.it.crs4.seal.common.TestSamInputFormat.java

License:Open Source License

@Test(expected = RuntimeException.class)
public void testCompressedSplit() throws IOException {
    // write gzip-compressed data
    GzipCodec codec = new GzipCodec();
    PrintWriter out = new PrintWriter(
            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
    out.write(twoRecords);//  w  w w. ja v a2  s  .  co  m
    out.close();

    // now try to read it starting from the middle

    SamInputFormat inputFormat = new SamInputFormat();

    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoRecords.length(), null);
    RecordReader<LongWritable, ReadPair> reader = inputFormat.createRecordReader(split,
            Utils.getTaskAttemptContext(conf));
}