Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_TYPE

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_TYPE

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_TYPE.

Prototype

String COMPRESS_TYPE

To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_TYPE.

Click Source Link

Document

Type of compression : NONE, RECORD, BLOCK.

Usage

From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java

License:Apache License

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) {
    boolean compression = compressionCodec != HiveCompressionCodec.NONE;
    config.setBoolean(COMPRESSRESULT.varname, compression);
    config.setBoolean("mapred.output.compress", compression);
    config.setBoolean(FileOutputFormat.COMPRESS, compression);
    // For DWRF// ww w. j a v  a 2  s  . c o m
    config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name());
    config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name());
    // For ORC
    config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name());
    // For RCFile and Text
    if (compressionCodec.getCodec().isPresent()) {
        config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
        config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
    } else {
        config.unset("mapred.output.compression.codec");
        config.unset(FileOutputFormat.COMPRESS_CODEC);
    }
    // For Parquet
    config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
    // For SequenceFile
    config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}

From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java

License:Apache License

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) {
    boolean compression = compressionCodec != HiveCompressionCodec.NONE;
    config.setBoolean(COMPRESSRESULT.varname, compression);
    config.setBoolean("mapred.output.compress", compression);
    config.setBoolean(FileOutputFormat.COMPRESS, compression);
    // For DWRF// ww  w.  j  a  v a  2 s  .  c om
    com.facebook.hive.orc.OrcConf.setVar(config, HIVE_ORC_COMPRESSION,
            compressionCodec.getOrcCompressionKind().name());
    // For ORC
    OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name());
    // For RCFile and Text
    if (compressionCodec.getCodec().isPresent()) {
        config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
        config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
    } else {
        config.unset("mapred.output.compression.codec");
        config.unset(FileOutputFormat.COMPRESS_CODEC);
    }
    // For Parquet
    config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
    // For SequenceFile
    config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}

From source file:nl.naward04.hadoop.country.Country.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = this.getConf();

    // Set compress type to compress BLOCKs (not RECORDs)
    // https://hadoop.apache.org/docs/r2.4.0/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml
    // http://hadoop.apache.org/docs/r2.4.0/api/org/apache/hadoop/io/SequenceFile.html
    conf.set(FileOutputFormat.COMPRESS_TYPE, "BLOCK");

    Job job = Job.getInstance(conf, "Find the country based on domain name or IP address.");
    job.setJarByClass(Country.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(CountryLookup.class);
    job.setInputFormatClass(WarcInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Enable compression
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    // Execute job and return status
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:nl.naward05.hadoop.MergeFiles.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = this.getConf();

    // Set compress type to compress BLOCKs (not RECORDs)
    // https://hadoop.apache.org/docs/r2.4.0/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml
    // http://hadoop.apache.org/docs/r2.4.0/api/org/apache/hadoop/io/SequenceFile.html
    conf.set(FileOutputFormat.COMPRESS_TYPE, "BLOCK");

    Job job = Job.getInstance(conf, "Merge countries and songs");
    job.setJarByClass(MergeFiles.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileInputFormat.addInputPath(job, new Path(args[1]));
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    job.setReducerClass(MergeReducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // Enable compression
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    // Execute job and return status
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:nl.surfsara.warcexamples.hadoop.rr.RR.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = this.getConf();

    conf.set(FileOutputFormat.COMPRESS_TYPE, "BLOCK");

    Job job = Job.getInstance(conf, "Record Recognizer");
    job.setJarByClass(RR.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(RRMapper.class);
    job.setInputFormatClass(WarcInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    //      job.setOutputValueClass(LongWritable.class);   
    //      job.setReducerClass(LongSumReducer.class);

    // Enable compression
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    // Execute job and return status
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java

License:Apache License

private static void populateMRSettingsToRetain() {

    // FileInputFormat
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE);
    mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS);
    mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES);
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE);

    // FileOutputFormat
    mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE);
    mrSettingsToRetain.add(FileOutputFormat.OUTDIR);
    mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER);
}