List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput
public static void setCompressOutput(Job job, boolean compress)
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java
License:Apache License
/** * Enable output compression using the deflate codec and * specify its level.//w w w . j a v a 2 s .c o m */ public static void setDeflateLevel(Job job, int level) { FileOutputFormat.setCompressOutput(job, true); job.getConfiguration().setInt(DEFLATE_LEVEL_KEY, level); }
From source file:com.metamx.milano.pig.MilanoStoreFunc.java
License:Apache License
/** * This does the setup for the mapper/reducer side. * * @param location The output path.//from ww w. jav a 2 s. c om * @param job The job config. * * @throws IOException Currently not thrown, but is part of the overridden signature. */ @Override public void setStoreLocation(String location, Job job) throws IOException { FileOutputFormat.setOutputPath(job, new Path(location)); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); Properties props = getUDFProps(); job.getConfiguration().set("com.metamx.milano.proto.descriptor.base64", (String) props.get("milano.pig.proto.schema.base64")); }
From source file:com.mozilla.main.ReadHBaseWriteHdfs.java
License:LGPL
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.job.queue.name", "prod"); Job job = new Job(conf, "ReadHBaseWriteHDFS"); job.setJarByClass(ReadHBaseWriteHdfs.class); Scan scan = new Scan(); scan.addFamily("data".getBytes()); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ReadHBaseWriteHdfsMapper.class, Text.class, Text.class, job); job.setReducerClass(ReadHBaseWriteHdfsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1000);//w ww. j av a2 s . c o m job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[0])); job.waitForCompletion(true); if (job.isSuccessful()) { System.out.println("DONE"); } return 0; }
From source file:com.mozilla.pig.storage.SeqFileMultiStorage.java
License:Apache License
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(this.keyClass); job.setOutputKeyClass(this.keyClass); Configuration conf = job.getConfiguration(); if ("true".equals(conf.get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = conf.get("output.compression.codec"); FileOutputFormat.setOutputCompressorClass(job, PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class)); }/*w w w.j av a 2 s .c om*/ FileOutputFormat.setOutputPath(job, new Path(location)); }
From source file:com.netease.news.text.SequenceFilesFromDirectory.java
License:Apache License
private int runMapReduce(Path input, Path output) throws IOException, ClassNotFoundException, InterruptedException { int chunkSizeInMB = 64; if (hasOption(CHUNK_SIZE_OPTION[0])) { chunkSizeInMB = Integer.parseInt(getOption(CHUNK_SIZE_OPTION[0])); }//from w w w .ja v a 2 s . c o m String keyPrefix = null; if (hasOption(KEY_PREFIX_OPTION[0])) { keyPrefix = getOption(KEY_PREFIX_OPTION[0]); } // Prepare Job for submission. Job job = prepareJob(input, output, MultipleTextFileInputFormat.class, SequenceFilesFromDirectoryMapper.class, Text.class, Text.class, SequenceFileOutputFormat.class, "SequenceFilesFromDirectory"); Configuration jobConfig = job.getConfiguration(); jobConfig.set(KEY_PREFIX_OPTION[0], keyPrefix); FileSystem fs = FileSystem.get(jobConfig); FileStatus fsFileStatus = fs.getFileStatus(input); String inputDirList = HadoopUtil.buildDirList(fs, fsFileStatus); jobConfig.set(BASE_INPUT_PATH, input.toString()); long chunkSizeInBytes = chunkSizeInMB * 1024 * 1024; // set the max split locations, otherwise we get nasty debug stuff jobConfig.set("mapreduce.job.max.split.locations", String.valueOf(MAX_JOB_SPLIT_LOCATIONS)); FileInputFormat.setInputPaths(job, inputDirList); // need to set this to a multiple of the block size, or no split happens FileInputFormat.setMaxInputSplitSize(job, chunkSizeInBytes); FileOutputFormat.setCompressOutput(job, true); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { return -1; } return 0; }
From source file:com.placeiq.piqconnect.InitialVectorGenerator.java
License:Apache License
private Job buildJob() throws Exception { Configuration conf = getConf(); conf.setLong("numberOfNodes", numberOfNodes); Job job = new Job(conf, "data-piqid.piqconnect.ConCmptIVGen_Stage1"); job.setJarByClass(InitialVectorGenerator.class); job.setMapperClass(_Mapper.class); job.setReducerClass(_Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, pathBitmask); FileOutputFormat.setOutputPath(job, pathVector); FileOutputFormat.setCompressOutput(job, true); return job;// w w w . j a v a2 s .co m }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob2(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2"); job.setJarByClass(Runner.class); job.setMapperClass(Mapper.class); job.setReducerClass(IterationStage2._Reducer.class); job.setNumReduceTasks(numberOfReducers); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(VLongWritable.class); job.setMapOutputValueClass(BlockWritable.class); job.setOutputKeyClass(BlockIndexWritable.class); job.setOutputValueClass(BlockWritable.class); job.setSortComparatorClass(VLongWritableComparator.class); SequenceFileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job);/*from www . j av a 2 s . c o m*/ return job; }
From source file:com.placeiq.piqconnect.Runner.java
License:Apache License
private Job buildJob3(Path input, Path output) throws Exception { Configuration conf = getConf(); conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize); Job job = new Job(conf, "data-piqid.piqconnect.FinalResultBuilder"); job.setJarByClass(Runner.class); job.setMapperClass(FinalResultBuilder._Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setNumReduceTasks(0);/* w ww. j a v a 2 s .c om*/ job.setOutputKeyClass(VLongWritable.class); job.setOutputValueClass(VLongWritable.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); setCompression(job); return job; }
From source file:com.sirius.hadoop.job.onlinetime.OnlineTimeJob.java
License:Apache License
public Job build() throws Exception { //init// ww w.j a v a 2 s .c om Job job = Job.getInstance(getConf(), "onlinetime"); job.setJarByClass(OnlineTimeJob.class); //mapp job.setMapperClass(StatusMapper.class); job.setMapOutputKeyClass(StatusKey.class); job.setMapOutputValueClass(OnlineRecord.class); //custom partition job.setPartitionerClass(StatusKeyPartitioner.class); //reduce job.setGroupingComparatorClass(StatusKeyGroupComparator.class); job.setReducerClass(StatusReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //input FileInputFormat.setInputPaths(job, new Path("/subscriber_status/subscriber_status.json")); //output FileOutputFormat.setOutputPath(job, out); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, Lz4Codec.class); return job; }
From source file:com.talis.hadoop.rdf.collation.QuadsCollater.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration configuration = getConf(); boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION, Constants.OPTION_USE_COMPRESSION_DEFAULT); if (useCompression) { configuration.setBoolean("mapred.compress.map.output", true); configuration.set("mapred.output.compression.type", "BLOCK"); configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); }/*from w ww .j a v a 2 s. co m*/ boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); } Job job = new Job(configuration); job.setJobName(JOB_NAME); job.setJarByClass(getClass()); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); FileOutputFormat.setCompressOutput(job, true); job.setInputFormatClass(NQuadsInputFormat.class); job.setMapperClass(CollationMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(QuadWritable.class); job.setReducerClass(CollationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(QuadArrayWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); if (LOG.isDebugEnabled()) Utils.log(job, LOG); return job.waitForCompletion(true) ? 0 : 1; }