List of usage examples for org.apache.hadoop.io.compress GzipCodec GzipCodec
GzipCodec
From source file:fi.tkk.ics.hadoop.bam.TestFastqInputFormat.java
License:Open Source License
@Test(expected = RuntimeException.class) public void testCompressedSplit() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz)))); fastqOut.write(twoFastq);// w ww.j a va2 s . co m fastqOut.close(); // now try to read it starting from the middle split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); }
From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java
License:Open Source License
@Test public void testGzCompressedInput() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz)))); qseqOut.write(twoQseq);/*ww w . j av a 2 s .c o m*/ qseqOut.close(); // now try to read it split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null); QseqRecordReader reader = new QseqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString()); assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString()); retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString()); assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString()); }
From source file:fi.tkk.ics.hadoop.bam.TestQseqInputFormat.java
License:Open Source License
@Test(expected = RuntimeException.class) public void testCompressedSplit() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz)))); qseqOut.write(twoQseq);//from ww w . j a v a 2s. c o m qseqOut.close(); // now try to read it starting from the middle split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null); QseqRecordReader reader = new QseqRecordReader(conf, split); }
From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java
License:Apache License
public static DataInputStream getDataInputStream(String path, FileType fileType, int bufferSize) throws IOException { path = path.replace("\\", "/"); boolean gzip = path.endsWith(".gz"); boolean bzip2 = path.endsWith(".bz2"); InputStream stream;/*from ww w .j a v a 2 s .co m*/ switch (fileType) { case LOCAL: path = getUpdatedFilePath(path, fileType); if (gzip) { stream = new GZIPInputStream(new FileInputStream(path)); } else if (bzip2) { stream = new BZip2CompressorInputStream(new FileInputStream(path)); } else { stream = new FileInputStream(path); } break; case HDFS: case ALLUXIO: case VIEWFS: Path pt = new Path(path); FileSystem fs = pt.getFileSystem(configuration); if (bufferSize == -1) { stream = fs.open(pt); } else { stream = fs.open(pt, bufferSize); } if (gzip) { GzipCodec codec = new GzipCodec(); stream = codec.createInputStream(stream); } else if (bzip2) { BZip2Codec codec = new BZip2Codec(); stream = codec.createInputStream(stream); } break; default: throw new UnsupportedOperationException("unsupported file system"); } return new DataInputStream(new BufferedInputStream(stream)); }
From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java
License:Apache License
/** * generate compressed files, no need to call this method. * @throws Exception/*from w ww .jav a2 s . c o m*/ */ public void generateCompressFiles() throws Exception { String pwd = new File("src/test/resources").getCanonicalPath(); String inputFile = pwd + "/data.csv"; FileInputStream input = new FileInputStream(inputFile); Configuration conf = new Configuration(); // .gz String outputFile = pwd + "/data.csv.gz"; FileOutputStream output = new FileOutputStream(outputFile); GzipCodec gzip = new GzipCodec(); gzip.setConf(conf); CompressionOutputStream outputStream = gzip.createOutputStream(output); int i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .bz2 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.bz2"; output = new FileOutputStream(outputFile); BZip2Codec bzip2 = new BZip2Codec(); bzip2.setConf(conf); outputStream = bzip2.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .snappy input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.snappy"; output = new FileOutputStream(outputFile); SnappyCodec snappy = new SnappyCodec(); snappy.setConf(conf); outputStream = snappy.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); //.lz4 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.lz4"; output = new FileOutputStream(outputFile); Lz4Codec lz4 = new Lz4Codec(); lz4.setConf(conf); outputStream = lz4.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); }
From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java
License:Apache License
/** * generate compressed files, no need to call this method. * @throws Exception// w w w.j a v a 2 s . com */ public void generateCompressFiles() throws Exception { String pwd = new File("src/test/resources/csv").getCanonicalPath(); String inputFile = pwd + "/data.csv"; FileInputStream input = new FileInputStream(inputFile); Configuration conf = new Configuration(); // .gz String outputFile = pwd + "/data.csv.gz"; FileOutputStream output = new FileOutputStream(outputFile); GzipCodec gzip = new GzipCodec(); gzip.setConf(conf); CompressionOutputStream outputStream = gzip.createOutputStream(output); int i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .bz2 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.bz2"; output = new FileOutputStream(outputFile); BZip2Codec bzip2 = new BZip2Codec(); bzip2.setConf(conf); outputStream = bzip2.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .snappy input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.snappy"; output = new FileOutputStream(outputFile); SnappyCodec snappy = new SnappyCodec(); snappy.setConf(conf); outputStream = snappy.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); //.lz4 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.lz4"; output = new FileOutputStream(outputFile); Lz4Codec lz4 = new Lz4Codec(); lz4.setConf(conf); outputStream = lz4.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); }
From source file:org.apache.flume.sink.customhdfs.TestUseRawLocalFileSystem.java
License:Apache License
@Test public void testCompressedFile() throws Exception { String file = testFile.getCanonicalPath(); HDFSCompressedDataStream stream = new HDFSCompressedDataStream(); context.put("hdfs.useRawLocalFileSystem", "true"); stream.configure(context);/* w w w . j a v a 2s. c o m*/ stream.open(file, new GzipCodec(), CompressionType.RECORD); stream.append(event); stream.sync(); Assert.assertTrue(testFile.length() > 0); }
From source file:org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilitiesTest.java
License:Apache License
@Test public void isThreadSafe() { testIsThreadSafe("readable compression, no compression - thread safe", "/some/path/without.compression", null, null, true);/*from w ww. j a v a2 s . c o m*/ testIsThreadSafe("readable compression, gzip compression - thread safe", "/some/compressed/path.gz", null, new GzipCodec(), true); testIsThreadSafe("readable compression, bzip2 compression - not thread safe", "/some/path/with/bzip2.bz2", null, new BZip2Codec(), false); testIsThreadSafe("writable compression, no compression codec - thread safe", "/some/path", null, null, true); testIsThreadSafe("writable compression, some compression codec - thread safe", "/some/path", "I.am.a.nice.codec", new NotSoNiceCodec(), true); testIsThreadSafe("writable compression, compression codec bzip2 - not thread safe", "/some/path", "org.apache.hadoop.io.compress.BZip2Codec", new BZip2Codec(), false); }
From source file:org.apache.hawq.pxf.plugins.hdfs.utilities.HdfsUtilitiesTest.java
License:Apache License
@Test public void isSplittableCodec() { testIsSplittableCodec("no codec - splittable", "some/innocent.file", null, true); testIsSplittableCodec("gzip codec - not splittable", "/gzip.gz", new GzipCodec(), false); testIsSplittableCodec("default codec - not splittable", "/default.deflate", new DefaultCodec(), false); testIsSplittableCodec("bzip2 codec - splittable", "bzip2.bz2", new BZip2Codec(), true); }
From source file:org.apache.jena.hadoop.rdf.io.input.compressed.jsonld.GZippedJsonLDQuadInputTest.java
License:Apache License
/** * Creates new tests */ public GZippedJsonLDQuadInputTest() { super(".jsonld.gz", new GzipCodec()); }