Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS.

Prototype

String COMPRESS

To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS.

Click Source Link

Document

Configuration option: should output be compressed?

Usage

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Set whether the output of the job is compressed.
 * /*from  w w  w .j a  va  2s  .co  m*/
 * @param job
 *            the job to modify
 * @param compress
 *            should the output of the job be compressed?
 */
public static void setCompressOutput(Job job, boolean compress) {
    job.getConfiguration().setBoolean(FileOutputFormat.COMPRESS, compress);
}

From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java

License:Apache License

/**
 * Is the job output compressed?//from  w ww.ja v a  2  s  .  c om
 * 
 * @param job
 *            the Job to look in
 * @return <code>true</code> if the job output should be compressed,
 *         <code>false</code> otherwise
 */
public static boolean getCompressOutput(JobContext job) {
    return job.getConfiguration().getBoolean(FileOutputFormat.COMPRESS, false);
}

From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java

License:Apache License

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) {
    boolean compression = compressionCodec != HiveCompressionCodec.NONE;
    config.setBoolean(COMPRESSRESULT.varname, compression);
    config.setBoolean("mapred.output.compress", compression);
    config.setBoolean(FileOutputFormat.COMPRESS, compression);
    // For DWRF/*from   ww w .  j  a v  a2s .c  o  m*/
    config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name());
    config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name());
    // For ORC
    config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name());
    // For RCFile and Text
    if (compressionCodec.getCodec().isPresent()) {
        config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
        config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
    } else {
        config.unset("mapred.output.compression.codec");
        config.unset(FileOutputFormat.COMPRESS_CODEC);
    }
    // For Parquet
    config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
    // For SequenceFile
    config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}

From source file:io.druid.indexer.UtilsCompressionTest.java

License:Apache License

@Before
public void setUp() throws IOException {
    jobConfig = new Configuration();
    mockJobContext = EasyMock.createMock(JobContext.class);
    EasyMock.expect(mockJobContext.getConfiguration()).andReturn(jobConfig).anyTimes();
    EasyMock.replay(mockJobContext);//ww w. j a  v  a  2  s . co m

    jobConfig.setBoolean(FileOutputFormat.COMPRESS, true);
    jobConfig.set(FileOutputFormat.COMPRESS_CODEC, CODEC_CLASS);
    Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(mockJobContext,
            DEFAULT_COMPRESSION_CODEC);
    codec = ReflectionUtils.newInstance(codecClass, jobConfig);

    tmpFile = tmpFolder.newFile(TMP_FILE_NAME + codec.getDefaultExtension());
    tmpPathWithExtension = new Path(tmpFile.getAbsolutePath());
    tmpPathWithoutExtension = new Path(tmpFile.getParent(), TMP_FILE_NAME);
    defaultFileSystem = tmpPathWithoutExtension.getFileSystem(jobConfig);
}

From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java

License:Apache License

public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) {
    boolean compression = compressionCodec != HiveCompressionCodec.NONE;
    config.setBoolean(COMPRESSRESULT.varname, compression);
    config.setBoolean("mapred.output.compress", compression);
    config.setBoolean(FileOutputFormat.COMPRESS, compression);
    // For DWRF//w  ww  .j a v  a  2  s  .  co  m
    com.facebook.hive.orc.OrcConf.setVar(config, HIVE_ORC_COMPRESSION,
            compressionCodec.getOrcCompressionKind().name());
    // For ORC
    OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name());
    // For RCFile and Text
    if (compressionCodec.getCodec().isPresent()) {
        config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName());
        config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName());
    } else {
        config.unset("mapred.output.compression.codec");
        config.unset(FileOutputFormat.COMPRESS_CODEC);
    }
    // For Parquet
    config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name());
    // For SequenceFile
    config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString());
}

From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java

License:Apache License

@Override
public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) {
    StringBuilder sb = new StringBuilder();
    List<String> optSettings = new ArrayList<>();

    JobConf jobconf = new JobConf(context.getJobconf());
    if (jobconf.getLong(NUM_REDUCES, 0) > 0) {
        if (!jobconf.getCompressMapOutput()) {
            optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS));
            sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n");
        } else {/*from  ww  w .  j a  va2s.  co m*/
            String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC);
            if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) {
                optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC));
                sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC)
                        .append("\n");
            }
        }
    }

    if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) {
        optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS));
        sb.append(
                "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n");
    } else {
        String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, "");
        String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, "");

        if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec")
                || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) {
            sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat");
            sb.append(" as this will cause the output files to be unsplittable. ");
            sb.append("Please use LZO instead or ");
            sb.append("use a container file format such as SequenceFileOutputFormat.\n");
        }
    }

    if (sb.length() > 0) {
        return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(),
                optSettings);
    }
    return null;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopAbstractWordCountTest.java

License:Apache License

/**
 * Reads whole text file into String.// w ww . ja v  a2 s .c  o  m
 *
 * @param fileName Name of the file to read.
 * @return Content of the file as String value.
 * @throws Exception If could not read the file.
 */
protected String readAndSortFile(String fileName, Configuration conf) throws Exception {
    final List<String> list = new ArrayList<>();

    final boolean snappyDecode = conf != null && conf.getBoolean(FileOutputFormat.COMPRESS, false);

    if (snappyDecode) {
        try (SequenceFile.Reader reader = new SequenceFile.Reader(conf,
                SequenceFile.Reader.file(new Path(fileName)))) {
            Text key = new Text();

            IntWritable val = new IntWritable();

            while (reader.next(key, val))
                list.add(key + "\t" + val);
        }
    } else {
        try (InputStream is0 = igfs.open(new IgfsPath(fileName))) {
            BufferedReader reader = new BufferedReader(new InputStreamReader(is0));

            String line;

            while ((line = reader.readLine()) != null)
                list.add(line);
        }
    }

    Collections.sort(list);

    return Joiner.on('\n').join(list) + "\n";
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java

License:Apache License

private static void populateMRSettingsToRetain() {

    // FileInputFormat
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE);
    mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE);
    mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS);
    mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES);
    mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE);

    // FileOutputFormat
    mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC);
    mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE);
    mrSettingsToRetain.add(FileOutputFormat.OUTDIR);
    mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER);
}