Example usage for org.apache.hadoop.io.compress CompressionCodecFactory getCodecByName

List of usage examples for org.apache.hadoop.io.compress CompressionCodecFactory getCodecByName

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodecFactory getCodecByName.

Prototype

public CompressionCodec getCodecByName(String codecName) 

Source Link

Document

Find the relevant compression codec for the codec's canonical class name or by codec alias.

Usage

From source file:com.flipkart.fdp.migration.distcp.core.MirrorUtils.java

License:Apache License

public static OutputStream getCodecOutputStream(Configuration conf, String codecName, OutputStream out)
        throws IOException {
    CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
    String codecClassName = codecName;
    CodecType codecType = CodecType.getCodecType(codecName);
    if (codecType != null) {
        codecClassName = codecType.getIOCompressionCodecs();
    }//from  ww w . j  a v  a 2  s.  co m
    System.out.println("codec class : " + codecClassName);
    CompressionCodec codec = compressionCodecs.getCodecByName(codecClassName);

    if (codec == null) {
        return out;
    }

    System.out.println("Getting OutputStream : " + codec.getDefaultExtension());
    System.out.println("Getting OutputStream : " + codec);
    Compressor compressor = codec.createCompressor();
    return codec.createOutputStream(out, compressor);
}

From source file:com.jeffy.hdfs.compression.FileCompressor.java

License:Apache License

/**
 * @param args/* w  w  w  .ja v a  2s . c  o  m*/
 * ??????
 * ????
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    //??
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    // For example for the 'GzipCodec' codec class name the alias are 'gzip' and 'gzipcodec'.
    CompressionCodec codec = factory.getCodecByName(args[0]);
    if (codec == null) {//???
        System.err.println("Comperssion codec not found for " + args[0]);
        System.exit(1);
    }
    String ext = codec.getDefaultExtension();
    Compressor compressor = null;
    try {
        //?CodecPool?Compressor
        compressor = CodecPool.getCompressor(codec);
        for (int i = 1; i < args.length; i++) {
            String filename = args[i] + ext;
            System.out.println("Compression the file " + filename);
            try (FileSystem outFs = FileSystem.get(URI.create(filename), conf);
                    FileSystem inFs = FileSystem.get(URI.create(args[i]), conf);
                    InputStream in = inFs.open(new Path(args[i]))) {//
                //Compressor?
                CompressionOutputStream out = codec.createOutputStream(outFs.create(new Path(filename)),
                        compressor);
                //?????
                IOUtils.copy(in, out);
                out.finish();//?finish()?flush()???
                compressor.reset(); //???????java.io.IOException: write beyond end of stream
            }
        }
    } finally {//?Compressor??
        CodecPool.returnCompressor(compressor);
    }
}

From source file:org.apache.flink.formats.sequencefile.SequenceFileWriterFactory.java

License:Apache License

private CompressionCodec getCompressionCodec(Configuration conf, String compressionCodecName) {
    checkNotNull(conf);//from   w  w w. ja v a 2s  . co m
    checkNotNull(compressionCodecName);

    if (compressionCodecName.equals(NO_COMPRESSION)) {
        return null;
    }

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec = codecFactory.getCodecByName(compressionCodecName);
    if (codec == null) {
        throw new RuntimeException("Codec " + compressionCodecName + " not found.");
    }
    return codec;
}

From source file:org.apache.flink.streaming.connectors.fs.SequenceFileWriter.java

License:Apache License

@Override
public void open(FileSystem fs, Path path) throws IOException {
    super.open(fs, path);
    if (keyClass == null) {
        throw new IllegalStateException("Key Class has not been initialized.");
    }/*from   w w  w  .ja v a 2 s. co m*/
    if (valueClass == null) {
        throw new IllegalStateException("Value Class has not been initialized.");
    }

    CompressionCodec codec = null;

    if (!compressionCodecName.equals("None")) {
        CompressionCodecFactory codecFactory = new CompressionCodecFactory(new Configuration());
        codec = codecFactory.getCodecByName(compressionCodecName);
        if (codec == null) {
            throw new RuntimeException("Codec " + compressionCodecName + " not found.");
        }
    }

    // the non-deprecated constructor syntax is only available in recent hadoop versions...
    writer = SequenceFile.createWriter(new Configuration(), getStream(), keyClass, valueClass, compressionType,
            codec);
}

From source file:org.springframework.data.hadoop.serialization.CompressionUtils.java

License:Apache License

/**
 * Resolve compression alias (such as 'snappy' or 'gzip') to Hadoop {@link CompressionCodec codec}.
 * /*from  www . jav  a 2s  . com*/
 * @param conf Hadoop configuration to use.
 * @param compressionAlias <ul>
 * <li>The short class name (without the package) of the compression codec that is specified within Hadoop
 * configuration (under <i>io.compression.codecs</i> prop). If the short class name ends with 'Codec', then there
 * are two aliases for the codec - the complete short class name and the short class name without the 'Codec'
 * ending. For example for the 'GzipCodec' codec class name the alias are 'gzip' and 'gzipcodec' (case insensitive).
 * If the codec is configured to be used by Hadoop this is the preferred way instead of passing the codec canonical
 * name.</li>
 * <li>The canonical class name of the compression codec that is specified within Hadoop configuration (under
 * <i>io.compression.codecs</i> prop) or is present on the classpath.</li>
 * </ul>
 * 
 * @return The codec to be used to compress the data on the fly while storing it onto HDFS, if the
 * <code>compressionAlias</code> property is specified; <code>null</code> otherwise.
 * 
 * @throws IllegalArgumentException if the codec class name could not be resolved
 * @throws RuntimeException if the codec class could not be instantiated
 */
public static CompressionCodec getHadoopCompression(Configuration conf, String compressionAlias) {
    if (!StringUtils.hasText(compressionAlias) || "none".equalsIgnoreCase(compressionAlias)) {
        return null;
    }

    final CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);

    // Find codec by canonical class name or by codec alias as specified in Hadoop configuration
    CompressionCodec compression = codecFactory.getCodecByName(compressionAlias);

    // If the codec is not configured within Hadoop try to load it from the classpath
    if (compression == null) {
        Class<?> compressionClass = ClassUtils.resolveClassName(compressionAlias,
                SerializationWriter.class.getClassLoader());

        // Instantiate codec and initialize it from configuration
        // org.apache.hadoop.util.ReflectionUtils design is specific to Hadoop env :)
        compression = (CompressionCodec) ReflectionUtils.newInstance(compressionClass, conf);
    }

    return compression;
}