List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS
String COMPRESS
To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS.
Click Source Link
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Set whether the output of the job is compressed. * /*from w w w .j a va 2s .co m*/ * @param job * the job to modify * @param compress * should the output of the job be compressed? */ public static void setCompressOutput(Job job, boolean compress) { job.getConfiguration().setBoolean(FileOutputFormat.COMPRESS, compress); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Is the job output compressed?//from w ww.ja v a 2 s . c om * * @param job * the Job to look in * @return <code>true</code> if the job output should be compressed, * <code>false</code> otherwise */ public static boolean getCompressOutput(JobContext job) { return job.getConfiguration().getBoolean(FileOutputFormat.COMPRESS, false); }
From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java
License:Apache License
public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) { boolean compression = compressionCodec != HiveCompressionCodec.NONE; config.setBoolean(COMPRESSRESULT.varname, compression); config.setBoolean("mapred.output.compress", compression); config.setBoolean(FileOutputFormat.COMPRESS, compression); // For DWRF/*from ww w . j a v a2s .c o m*/ config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name()); config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name()); // For ORC config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text if (compressionCodec.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); } else { config.unset("mapred.output.compression.codec"); config.unset(FileOutputFormat.COMPRESS_CODEC); } // For Parquet config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); // For SequenceFile config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); }
From source file:io.druid.indexer.UtilsCompressionTest.java
License:Apache License
@Before public void setUp() throws IOException { jobConfig = new Configuration(); mockJobContext = EasyMock.createMock(JobContext.class); EasyMock.expect(mockJobContext.getConfiguration()).andReturn(jobConfig).anyTimes(); EasyMock.replay(mockJobContext);//ww w. j a v a 2 s . co m jobConfig.setBoolean(FileOutputFormat.COMPRESS, true); jobConfig.set(FileOutputFormat.COMPRESS_CODEC, CODEC_CLASS); Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(mockJobContext, DEFAULT_COMPRESSION_CODEC); codec = ReflectionUtils.newInstance(codecClass, jobConfig); tmpFile = tmpFolder.newFile(TMP_FILE_NAME + codec.getDefaultExtension()); tmpPathWithExtension = new Path(tmpFile.getAbsolutePath()); tmpPathWithoutExtension = new Path(tmpFile.getParent(), TMP_FILE_NAME); defaultFileSystem = tmpPathWithoutExtension.getFileSystem(jobConfig); }
From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java
License:Apache License
public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) { boolean compression = compressionCodec != HiveCompressionCodec.NONE; config.setBoolean(COMPRESSRESULT.varname, compression); config.setBoolean("mapred.output.compress", compression); config.setBoolean(FileOutputFormat.COMPRESS, compression); // For DWRF//w ww .j a v a 2 s . co m com.facebook.hive.orc.OrcConf.setVar(config, HIVE_ORC_COMPRESSION, compressionCodec.getOrcCompressionKind().name()); // For ORC OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text if (compressionCodec.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); } else { config.unset("mapred.output.compression.codec"); config.unset(FileOutputFormat.COMPRESS_CODEC); } // For Parquet config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); // For SequenceFile config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); }
From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java
License:Apache License
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { StringBuilder sb = new StringBuilder(); List<String> optSettings = new ArrayList<>(); JobConf jobconf = new JobConf(context.getJobconf()); if (jobconf.getLong(NUM_REDUCES, 0) > 0) { if (!jobconf.getCompressMapOutput()) { optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS)); sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n"); } else {/*from ww w . j a va2s. co m*/ String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC); if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) { optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC)); sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC) .append("\n"); } } } if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) { optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS)); sb.append( "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n"); } else { String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, ""); String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, ""); if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec") || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) { sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat"); sb.append(" as this will cause the output files to be unsplittable. "); sb.append("Please use LZO instead or "); sb.append("use a container file format such as SequenceFileOutputFormat.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(), optSettings); } return null; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopAbstractWordCountTest.java
License:Apache License
/** * Reads whole text file into String.// w ww . ja v a2 s .c o m * * @param fileName Name of the file to read. * @return Content of the file as String value. * @throws Exception If could not read the file. */ protected String readAndSortFile(String fileName, Configuration conf) throws Exception { final List<String> list = new ArrayList<>(); final boolean snappyDecode = conf != null && conf.getBoolean(FileOutputFormat.COMPRESS, false); if (snappyDecode) { try (SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(new Path(fileName)))) { Text key = new Text(); IntWritable val = new IntWritable(); while (reader.next(key, val)) list.add(key + "\t" + val); } } else { try (InputStream is0 = igfs.open(new IgfsPath(fileName))) { BufferedReader reader = new BufferedReader(new InputStreamReader(is0)); String line; while ((line = reader.readLine()) != null) list.add(line); } } Collections.sort(list); return Joiner.on('\n').join(list) + "\n"; }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java
License:Apache License
private static void populateMRSettingsToRetain() { // FileInputFormat mrSettingsToRetain.add(FileInputFormat.INPUT_DIR); mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE); mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE); mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS); mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES); mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE); // FileOutputFormat mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME); mrSettingsToRetain.add(FileOutputFormat.COMPRESS); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE); mrSettingsToRetain.add(FileOutputFormat.OUTDIR); mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER); }