List of usage examples for org.apache.hadoop.io.compress CompressionCodecFactory getCodecByName
public CompressionCodec getCodecByName(String codecName)
From source file:com.flipkart.fdp.migration.distcp.core.MirrorUtils.java
License:Apache License
public static OutputStream getCodecOutputStream(Configuration conf, String codecName, OutputStream out) throws IOException { CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf); String codecClassName = codecName; CodecType codecType = CodecType.getCodecType(codecName); if (codecType != null) { codecClassName = codecType.getIOCompressionCodecs(); }//from ww w . j a v a 2 s. co m System.out.println("codec class : " + codecClassName); CompressionCodec codec = compressionCodecs.getCodecByName(codecClassName); if (codec == null) { return out; } System.out.println("Getting OutputStream : " + codec.getDefaultExtension()); System.out.println("Getting OutputStream : " + codec); Compressor compressor = codec.createCompressor(); return codec.createOutputStream(out, compressor); }
From source file:com.jeffy.hdfs.compression.FileCompressor.java
License:Apache License
/** * @param args/* w w w .ja v a 2s . c o m*/ * ?????? * ???? * @throws IOException */ public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); //?? CompressionCodecFactory factory = new CompressionCodecFactory(conf); // For example for the 'GzipCodec' codec class name the alias are 'gzip' and 'gzipcodec'. CompressionCodec codec = factory.getCodecByName(args[0]); if (codec == null) {//??? System.err.println("Comperssion codec not found for " + args[0]); System.exit(1); } String ext = codec.getDefaultExtension(); Compressor compressor = null; try { //?CodecPool?Compressor compressor = CodecPool.getCompressor(codec); for (int i = 1; i < args.length; i++) { String filename = args[i] + ext; System.out.println("Compression the file " + filename); try (FileSystem outFs = FileSystem.get(URI.create(filename), conf); FileSystem inFs = FileSystem.get(URI.create(args[i]), conf); InputStream in = inFs.open(new Path(args[i]))) {// //Compressor? CompressionOutputStream out = codec.createOutputStream(outFs.create(new Path(filename)), compressor); //????? IOUtils.copy(in, out); out.finish();//?finish()?flush()??? compressor.reset(); //???????java.io.IOException: write beyond end of stream } } } finally {//?Compressor?? CodecPool.returnCompressor(compressor); } }
From source file:org.apache.flink.formats.sequencefile.SequenceFileWriterFactory.java
License:Apache License
private CompressionCodec getCompressionCodec(Configuration conf, String compressionCodecName) { checkNotNull(conf);//from w w w. ja v a 2s . co m checkNotNull(compressionCodecName); if (compressionCodecName.equals(NO_COMPRESSION)) { return null; } CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodecByName(compressionCodecName); if (codec == null) { throw new RuntimeException("Codec " + compressionCodecName + " not found."); } return codec; }
From source file:org.apache.flink.streaming.connectors.fs.SequenceFileWriter.java
License:Apache License
@Override public void open(FileSystem fs, Path path) throws IOException { super.open(fs, path); if (keyClass == null) { throw new IllegalStateException("Key Class has not been initialized."); }/*from w w w .ja v a 2 s. co m*/ if (valueClass == null) { throw new IllegalStateException("Value Class has not been initialized."); } CompressionCodec codec = null; if (!compressionCodecName.equals("None")) { CompressionCodecFactory codecFactory = new CompressionCodecFactory(new Configuration()); codec = codecFactory.getCodecByName(compressionCodecName); if (codec == null) { throw new RuntimeException("Codec " + compressionCodecName + " not found."); } } // the non-deprecated constructor syntax is only available in recent hadoop versions... writer = SequenceFile.createWriter(new Configuration(), getStream(), keyClass, valueClass, compressionType, codec); }
From source file:org.springframework.data.hadoop.serialization.CompressionUtils.java
License:Apache License
/** * Resolve compression alias (such as 'snappy' or 'gzip') to Hadoop {@link CompressionCodec codec}. * /*from www . jav a 2s . com*/ * @param conf Hadoop configuration to use. * @param compressionAlias <ul> * <li>The short class name (without the package) of the compression codec that is specified within Hadoop * configuration (under <i>io.compression.codecs</i> prop). If the short class name ends with 'Codec', then there * are two aliases for the codec - the complete short class name and the short class name without the 'Codec' * ending. For example for the 'GzipCodec' codec class name the alias are 'gzip' and 'gzipcodec' (case insensitive). * If the codec is configured to be used by Hadoop this is the preferred way instead of passing the codec canonical * name.</li> * <li>The canonical class name of the compression codec that is specified within Hadoop configuration (under * <i>io.compression.codecs</i> prop) or is present on the classpath.</li> * </ul> * * @return The codec to be used to compress the data on the fly while storing it onto HDFS, if the * <code>compressionAlias</code> property is specified; <code>null</code> otherwise. * * @throws IllegalArgumentException if the codec class name could not be resolved * @throws RuntimeException if the codec class could not be instantiated */ public static CompressionCodec getHadoopCompression(Configuration conf, String compressionAlias) { if (!StringUtils.hasText(compressionAlias) || "none".equalsIgnoreCase(compressionAlias)) { return null; } final CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); // Find codec by canonical class name or by codec alias as specified in Hadoop configuration CompressionCodec compression = codecFactory.getCodecByName(compressionAlias); // If the codec is not configured within Hadoop try to load it from the classpath if (compression == null) { Class<?> compressionClass = ClassUtils.resolveClassName(compressionAlias, SerializationWriter.class.getClassLoader()); // Instantiate codec and initialize it from configuration // org.apache.hadoop.util.ReflectionUtils design is specific to Hadoop env :) compression = (CompressionCodec) ReflectionUtils.newInstance(compressionClass, conf); } return compression; }