List of usage examples for org.apache.hadoop.io.compress SnappyCodec SnappyCodec
SnappyCodec
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplog.java
License:Apache License
private static Option withCompression(Logger logger) { String prop = System.getProperty(HoplogConfig.COMPRESSION); if (prop != null) { CompressionCodec codec;/* ww w. j ava 2 s. co m*/ if (prop.equalsIgnoreCase("SNAPPY")) { codec = new SnappyCodec(); } else if (prop.equalsIgnoreCase("LZ4")) { codec = new Lz4Codec(); } else if (prop.equals("GZ")) { codec = new GzipCodec(); } else { throw new IllegalStateException("Unsupported codec: " + prop); } if (logger.isDebugEnabled()) logger.debug("{}Using compression codec " + codec, logPrefix); return SequenceFile.Writer.compression(CompressionType.BLOCK, codec); } return SequenceFile.Writer.compression(CompressionType.NONE, null); }
From source file:org.apache.carbondata.hadoop.csv.CSVInputFormatTest.java
License:Apache License
/** * generate compressed files, no need to call this method. * @throws Exception//from w ww .j a va 2s .c o m */ public void generateCompressFiles() throws Exception { String pwd = new File("src/test/resources").getCanonicalPath(); String inputFile = pwd + "/data.csv"; FileInputStream input = new FileInputStream(inputFile); Configuration conf = new Configuration(); // .gz String outputFile = pwd + "/data.csv.gz"; FileOutputStream output = new FileOutputStream(outputFile); GzipCodec gzip = new GzipCodec(); gzip.setConf(conf); CompressionOutputStream outputStream = gzip.createOutputStream(output); int i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .bz2 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.bz2"; output = new FileOutputStream(outputFile); BZip2Codec bzip2 = new BZip2Codec(); bzip2.setConf(conf); outputStream = bzip2.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .snappy input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.snappy"; output = new FileOutputStream(outputFile); SnappyCodec snappy = new SnappyCodec(); snappy.setConf(conf); outputStream = snappy.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); //.lz4 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.lz4"; output = new FileOutputStream(outputFile); Lz4Codec lz4 = new Lz4Codec(); lz4.setConf(conf); outputStream = lz4.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); }
From source file:org.apache.carbondata.processing.csvload.CSVInputFormatTest.java
License:Apache License
/** * generate compressed files, no need to call this method. * @throws Exception// w ww . j a v a 2s.c o m */ public void generateCompressFiles() throws Exception { String pwd = new File("src/test/resources/csv").getCanonicalPath(); String inputFile = pwd + "/data.csv"; FileInputStream input = new FileInputStream(inputFile); Configuration conf = new Configuration(); // .gz String outputFile = pwd + "/data.csv.gz"; FileOutputStream output = new FileOutputStream(outputFile); GzipCodec gzip = new GzipCodec(); gzip.setConf(conf); CompressionOutputStream outputStream = gzip.createOutputStream(output); int i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .bz2 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.bz2"; output = new FileOutputStream(outputFile); BZip2Codec bzip2 = new BZip2Codec(); bzip2.setConf(conf); outputStream = bzip2.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); // .snappy input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.snappy"; output = new FileOutputStream(outputFile); SnappyCodec snappy = new SnappyCodec(); snappy.setConf(conf); outputStream = snappy.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); //.lz4 input = new FileInputStream(inputFile); outputFile = pwd + "/data.csv.lz4"; output = new FileOutputStream(outputFile); Lz4Codec lz4 = new Lz4Codec(); lz4.setConf(conf); outputStream = lz4.createOutputStream(output); i = -1; while ((i = input.read()) != -1) { outputStream.write(i); } outputStream.close(); input.close(); }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopSnappyTest.java
License:Apache License
/** * Internal check routine./*w w w. j a v a2 s . c om*/ * * @throws Throwable If failed. */ public static void checkSnappy() throws Throwable { try { byte[] expBytes = new byte[BYTE_SIZE]; byte[] actualBytes = new byte[BYTE_SIZE]; for (int i = 0; i < expBytes.length; i++) expBytes[i] = (byte) ThreadLocalRandom.current().nextInt(16); SnappyCodec codec = new SnappyCodec(); codec.setConf(new Configuration()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (CompressionOutputStream cos = codec.createOutputStream(baos)) { cos.write(expBytes); cos.flush(); } try (CompressionInputStream cis = codec .createInputStream(new ByteArrayInputStream(baos.toByteArray()))) { int read = cis.read(actualBytes, 0, actualBytes.length); assert read == actualBytes.length; } assert Arrays.equals(expBytes, actualBytes); } catch (Throwable e) { System.out.println("Snappy check failed:"); System.out .println("### NativeCodeLoader.isNativeCodeLoaded: " + NativeCodeLoader.isNativeCodeLoaded()); System.out .println("### SnappyCompressor.isNativeCodeLoaded: " + SnappyCompressor.isNativeCodeLoaded()); throw e; } }
From source file:org.commoncrawl.util.CrawlLogSplitter.java
License:Open Source License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus arcFiles[] = fs.globStatus(new Path("crawl/checkpoint_data/CrawlLog_*")); for (FileStatus candidate : arcFiles) { if (candidate.getLen() > SPLIT_SIZE) { candidateList.add(candidate.getPath()); }/*from w w w.j a v a 2 s . c om*/ } LOG.info("Found:" + candidateList.size() + " oversized candidates"); Path tempOutputDir = new Path(conf.get("mapred.temp.dir", ".")); while (candidateList.size() != 0) { Path candidateName = candidateList.first(); candidateList.remove(candidateName); LOG.info("Processing Candidate:" + candidateName); long fileSize = fs.getFileStatus(candidateName).getLen(); //get crawl log filename components ArrayList<Path> splitItems = new ArrayList<Path>(); int index = 0; Path outputPart = buildIncrementalPathGivenPathAndIndex(tempOutputDir, candidateName.getName(), index); LOG.info("Initial Output Path is:" + outputPart); fs.delete(outputPart, false); // create reader SequenceFile.Reader reader = new SequenceFile.Reader(fs, candidateName, conf); ValueBytes sourceVB = reader.createValueBytes(); DataOutputBuffer sourceKeyData = new DataOutputBuffer(); try { // ok create temp file SequenceFile.Writer activeWriter = SequenceFile.createWriter(fs, conf, outputPart, Text.class, CrawlURL.class, CompressionType.BLOCK, new SnappyCodec()); // add to split items array splitItems.add(outputPart); try { long recordsWritten = 0; while (reader.nextRawKey(sourceKeyData) != -1) { reader.nextRawValue(sourceVB); long lengthPreWrite = activeWriter.getLength(); activeWriter.appendRaw(sourceKeyData.getData(), 0, sourceKeyData.getLength(), sourceVB); if (++recordsWritten % 10000 == 0) { LOG.info("Write 10000 records"); } long lengthPostWrite = activeWriter.getLength(); if (lengthPostWrite != lengthPreWrite) { if (lengthPostWrite >= IDEAL_SIZE) { LOG.info("Hit Split Point. Flushing File:" + outputPart); activeWriter.close(); outputPart = buildIncrementalPathGivenPathAndIndex(tempOutputDir, candidateName.getName(), ++index); LOG.info("Creating New File:" + outputPart); activeWriter = SequenceFile.createWriter(fs, conf, outputPart, Text.class, CrawlURL.class, CompressionType.BLOCK, new SnappyCodec()); splitItems.add(outputPart); } } sourceKeyData.reset(); } } finally { activeWriter.close(); } } finally { reader.close(); } LOG.info("Rewrote Source:" + candidateName + " into:" + splitItems.size() + " split files"); for (Path splitItem : splitItems) { Path destPath = new Path("crawl/checkpoint_data", splitItem.getName()); LOG.info("Moving:" + splitItem + " to:" + destPath); fs.rename(splitItem, destPath); } Path sourceMoveLocation = new Path("crawl/checkpoint_data_split", candidateName.getName()); LOG.info("Moving SOURCE:" + candidateName + " to:" + sourceMoveLocation); fs.rename(candidateName, sourceMoveLocation); } }
From source file:org.pentaho.hadoop.shim.common.CommonSnappyShim.java
License:Apache License
/** * Gets an InputStream that uses the snappy codec and wraps the supplied base input stream. * * @param the buffer size for the codec to use (in bytes) * @param in the base input stream to wrap around * @return an InputStream that uses the Snappy codec * @throws Exception if snappy is not available or an error occurs during reflection *//*from w w w . j a v a2s .c o m*/ public InputStream getSnappyInputStream(int bufferSize, InputStream in) throws Exception { if (!isHadoopSnappyAvailable()) { throw new Exception("Hadoop-snappy does not seem to be available"); } ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try { SnappyCodec c = new SnappyCodec(); Configuration newConf = new Configuration(); newConf.set(IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize); c.setConf(newConf); return c.createInputStream(in); } finally { Thread.currentThread().setContextClassLoader(cl); } }
From source file:org.pentaho.hadoop.shim.common.CommonSnappyShim.java
License:Apache License
/** * Gets an OutputStream that uses the snappy codec and wraps the supplied base output stream. * * @param the buffer size for the codec to use (in bytes) * @param out the base output stream to wrap around * @return a OutputStream that uses the Snappy codec * @throws Exception if snappy is not available or an error occurs during reflection *//*from ww w. j a v a 2s. co m*/ public OutputStream getSnappyOutputStream(int bufferSize, OutputStream out) throws Exception { if (!isHadoopSnappyAvailable()) { throw new Exception("Hadoop-snappy does not seem to be available"); } ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try { SnappyCodec c = new SnappyCodec(); Configuration newConf = new Configuration(); newConf.set(IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, "" + bufferSize); c.setConf(newConf); return c.createOutputStream(out); } finally { Thread.currentThread().setContextClassLoader(cl); } }