List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_CODEC
String COMPRESS_CODEC
To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_CODEC.
Click Source Link
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Set the {@link CompressionCodec} to be used to compress job outputs. * /* w w w . j a va 2 s . co m*/ * @param job * the job to modify * @param codecClass * the {@link CompressionCodec} to be used to compress the job * outputs */ public static void setOutputCompressorClass(Job job, Class<? extends CompressionCodec> codecClass) { setCompressOutput(job, true); job.getConfiguration().setClass(FileOutputFormat.COMPRESS_CODEC, codecClass, CompressionCodec.class); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Get the {@link CompressionCodec} for compressing the job outputs. * // w w w .ja va 2 s .c o m * @param job * the {@link Job} to look in * @param defaultValue * the {@link CompressionCodec} to return if not set * @return the {@link CompressionCodec} to be used to compress the job * outputs * @throws IllegalArgumentException * if the class was specified, but not found */ public static Class<? extends CompressionCodec> getOutputCompressorClass(JobContext job, Class<? extends CompressionCodec> defaultValue) { Class<? extends CompressionCodec> codecClass = defaultValue; Configuration conf = job.getConfiguration(); String name = conf.get(FileOutputFormat.COMPRESS_CODEC); if (name != null) { try { codecClass = conf.getClassByName(name).asSubclass(CompressionCodec.class); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Compression codec " + name + " was not found.", e); } } return codecClass; }
From source file:com.cloudera.oryx.computation.common.JobStep.java
License:Open Source License
protected final Target compressedTextOutput(Configuration conf, String outputPathKey) { // The way this is used, it doesn't seem like we can just set the object in getConf(). Need // to set the copy in the MRPipeline directly? conf.setClass(FileOutputFormat.COMPRESS_CODEC, GzipCodec.class, CompressionCodec.class); conf.setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class); return To.textFile(Namespaces.toPath(outputPathKey)); }
From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java
License:Apache License
public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) { boolean compression = compressionCodec != HiveCompressionCodec.NONE; config.setBoolean(COMPRESSRESULT.varname, compression); config.setBoolean("mapred.output.compress", compression); config.setBoolean(FileOutputFormat.COMPRESS, compression); // For DWRF//ww w .j a va 2 s . c om config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name()); config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name()); // For ORC config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text if (compressionCodec.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); } else { config.unset("mapred.output.compression.codec"); config.unset(FileOutputFormat.COMPRESS_CODEC); } // For Parquet config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); // For SequenceFile config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); }
From source file:com.facebook.presto.hive.RcFileFileWriterFactory.java
License:Apache License
@Override public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) { if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) { return Optional.empty(); }//from ww w . j a va2 s . c o m if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { return Optional.empty(); } RcFileEncoding rcFileEncoding; if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) { rcFileEncoding = new BinaryRcFileEncoding(); } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) { rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone); } else { return Optional.empty(); } Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC)); // existing tables and partitions may have columns in a different order than the writer is providing, so build // an index to rearrange columns in the proper order List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings() .splitToList(schema.getProperty(META_TABLE_COLUMNS, "")); List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream() .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray(); try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration); OutputStream outputStream = fileSystem.create(path); Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty(); if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) { validationInputFactory = Optional.of(() -> { try { return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats); } catch (IOException e) { throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e); } }); } Callable<Void> rollbackAction = () -> { fileSystem.delete(path, false); return null; }; return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder() .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()) .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory)); } catch (Exception e) { throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e); } }
From source file:io.druid.indexer.UtilsCompressionTest.java
License:Apache License
@Before public void setUp() throws IOException { jobConfig = new Configuration(); mockJobContext = EasyMock.createMock(JobContext.class); EasyMock.expect(mockJobContext.getConfiguration()).andReturn(jobConfig).anyTimes(); EasyMock.replay(mockJobContext);// ww w .java 2s.com jobConfig.setBoolean(FileOutputFormat.COMPRESS, true); jobConfig.set(FileOutputFormat.COMPRESS_CODEC, CODEC_CLASS); Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(mockJobContext, DEFAULT_COMPRESSION_CODEC); codec = ReflectionUtils.newInstance(codecClass, jobConfig); tmpFile = tmpFolder.newFile(TMP_FILE_NAME + codec.getDefaultExtension()); tmpPathWithExtension = new Path(tmpFile.getAbsolutePath()); tmpPathWithoutExtension = new Path(tmpFile.getParent(), TMP_FILE_NAME); defaultFileSystem = tmpPathWithoutExtension.getFileSystem(jobConfig); }
From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java
License:Apache License
public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) { boolean compression = compressionCodec != HiveCompressionCodec.NONE; config.setBoolean(COMPRESSRESULT.varname, compression); config.setBoolean("mapred.output.compress", compression); config.setBoolean(FileOutputFormat.COMPRESS, compression); // For DWRF/* ww w. ja va 2 s. c om*/ com.facebook.hive.orc.OrcConf.setVar(config, HIVE_ORC_COMPRESSION, compressionCodec.getOrcCompressionKind().name()); // For ORC OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text if (compressionCodec.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); } else { config.unset("mapred.output.compression.codec"); config.unset(FileOutputFormat.COMPRESS_CODEC); } // For Parquet config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); // For SequenceFile config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); }
From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java
License:Apache License
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { StringBuilder sb = new StringBuilder(); List<String> optSettings = new ArrayList<>(); JobConf jobconf = new JobConf(context.getJobconf()); if (jobconf.getLong(NUM_REDUCES, 0) > 0) { if (!jobconf.getCompressMapOutput()) { optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS)); sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n"); } else {//from w ww . jav a 2 s .c o m String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC); if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) { optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC)); sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC) .append("\n"); } } } if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) { optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS)); sb.append( "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n"); } else { String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, ""); String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, ""); if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec") || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) { sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat"); sb.append(" as this will cause the output files to be unsplittable. "); sb.append("Please use LZO instead or "); sb.append("use a container file format such as SequenceFileOutputFormat.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(), optSettings); } return null; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.examples.HadoopWordCount2.java
License:Apache License
/** * Sets task classes with related info if needed into configuration object. * * @param job Configuration to change.// w w w . j a v a 2 s . co m * @param setMapper Option to set mapper and input format classes. * @param setCombiner Option to set combiner class. * @param setReducer Option to set reducer and output format classes. */ public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer, boolean outputCompression) { if (setMapper) { job.setMapperClass(HadoopWordCount2Mapper.class); job.setInputFormatClass(TextInputFormat.class); } if (setCombiner) job.setCombinerClass(HadoopWordCount2Combiner.class); if (setReducer) { job.setReducerClass(HadoopWordCount2Reducer.class); job.setOutputFormatClass(TextOutputFormat.class); } if (outputCompression) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); SequenceFileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName()); } }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java
License:Apache License
private static void populateMRSettingsToRetain() { // FileInputFormat mrSettingsToRetain.add(FileInputFormat.INPUT_DIR); mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE); mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE); mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS); mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES); mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE); // FileOutputFormat mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME); mrSettingsToRetain.add(FileOutputFormat.COMPRESS); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE); mrSettingsToRetain.add(FileOutputFormat.OUTDIR); mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER); }