List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_TYPE
String COMPRESS_TYPE
To view the source code for org.apache.hadoop.mapreduce.lib.output FileOutputFormat COMPRESS_TYPE.
Click Source Link
From source file:com.facebook.presto.hive.HdfsConfigurationUpdater.java
License:Apache License
public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) { boolean compression = compressionCodec != HiveCompressionCodec.NONE; config.setBoolean(COMPRESSRESULT.varname, compression); config.setBoolean("mapred.output.compress", compression); config.setBoolean(FileOutputFormat.COMPRESS, compression); // For DWRF// ww w. j a v a 2 s . c o m config.set(HIVE_ORC_DEFAULT_COMPRESS.varname, compressionCodec.getOrcCompressionKind().name()); config.set(HIVE_ORC_COMPRESSION.varname, compressionCodec.getOrcCompressionKind().name()); // For ORC config.set(OrcTableProperties.COMPRESSION.getPropName(), compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text if (compressionCodec.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); } else { config.unset("mapred.output.compression.codec"); config.unset(FileOutputFormat.COMPRESS_CODEC); } // For Parquet config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); // For SequenceFile config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); }
From source file:io.prestosql.plugin.hive.HdfsConfigurationInitializer.java
License:Apache License
public static void configureCompression(Configuration config, HiveCompressionCodec compressionCodec) { boolean compression = compressionCodec != HiveCompressionCodec.NONE; config.setBoolean(COMPRESSRESULT.varname, compression); config.setBoolean("mapred.output.compress", compression); config.setBoolean(FileOutputFormat.COMPRESS, compression); // For DWRF// ww w. j a v a 2 s . c om com.facebook.hive.orc.OrcConf.setVar(config, HIVE_ORC_COMPRESSION, compressionCodec.getOrcCompressionKind().name()); // For ORC OrcConf.COMPRESS.setString(config, compressionCodec.getOrcCompressionKind().name()); // For RCFile and Text if (compressionCodec.getCodec().isPresent()) { config.set("mapred.output.compression.codec", compressionCodec.getCodec().get().getName()); config.set(FileOutputFormat.COMPRESS_CODEC, compressionCodec.getCodec().get().getName()); } else { config.unset("mapred.output.compression.codec"); config.unset(FileOutputFormat.COMPRESS_CODEC); } // For Parquet config.set(ParquetOutputFormat.COMPRESSION, compressionCodec.getParquetCompressionCodec().name()); // For SequenceFile config.set(FileOutputFormat.COMPRESS_TYPE, BLOCK.toString()); }
From source file:nl.naward04.hadoop.country.Country.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); // Set compress type to compress BLOCKs (not RECORDs) // https://hadoop.apache.org/docs/r2.4.0/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml // http://hadoop.apache.org/docs/r2.4.0/api/org/apache/hadoop/io/SequenceFile.html conf.set(FileOutputFormat.COMPRESS_TYPE, "BLOCK"); Job job = Job.getInstance(conf, "Find the country based on domain name or IP address."); job.setJarByClass(Country.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CountryLookup.class); job.setInputFormatClass(WarcInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Enable compression FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); // Execute job and return status return job.waitForCompletion(true) ? 0 : 1; }
From source file:nl.naward05.hadoop.MergeFiles.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); // Set compress type to compress BLOCKs (not RECORDs) // https://hadoop.apache.org/docs/r2.4.0/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml // http://hadoop.apache.org/docs/r2.4.0/api/org/apache/hadoop/io/SequenceFile.html conf.set(FileOutputFormat.COMPRESS_TYPE, "BLOCK"); Job job = Job.getInstance(conf, "Merge countries and songs"); job.setJarByClass(MergeFiles.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileInputFormat.addInputPath(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.setReducerClass(MergeReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // Enable compression FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); // Execute job and return status return job.waitForCompletion(true) ? 0 : 1; }
From source file:nl.surfsara.warcexamples.hadoop.rr.RR.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); conf.set(FileOutputFormat.COMPRESS_TYPE, "BLOCK"); Job job = Job.getInstance(conf, "Record Recognizer"); job.setJarByClass(RR.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(RRMapper.class); job.setInputFormatClass(WarcInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // job.setOutputValueClass(LongWritable.class); // job.setReducerClass(LongSumReducer.class); // Enable compression FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); // Execute job and return status return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.util.MRToTezHelper.java
License:Apache License
private static void populateMRSettingsToRetain() { // FileInputFormat mrSettingsToRetain.add(FileInputFormat.INPUT_DIR); mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE); mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE); mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS); mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES); mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE); // FileOutputFormat mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME); mrSettingsToRetain.add(FileOutputFormat.COMPRESS); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC); mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE); mrSettingsToRetain.add(FileOutputFormat.OUTDIR); mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER); }