Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput.

Prototype

public static void setCompressOutput(Job job, boolean compress)

Source Link

Document

Set whether the output of the job is compressed.

Usage

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java

License:Apache License

/**
 * Enable output compression using the deflate codec and
 * specify its level.//w w w .  j a  v  a  2  s  .c o m
 */
public static void setDeflateLevel(Job job, int level) {
    FileOutputFormat.setCompressOutput(job, true);
    job.getConfiguration().setInt(DEFLATE_LEVEL_KEY, level);

}

From source file:com.metamx.milano.pig.MilanoStoreFunc.java

License:Apache License

/**
 * This does the setup for the mapper/reducer side.
 *
 * @param location The output path.//from  ww w. jav a  2  s. c om
 * @param job      The job config.
 *
 * @throws IOException Currently not thrown, but is part of the overridden signature.
 */
@Override
public void setStoreLocation(String location, Job job) throws IOException {
    FileOutputFormat.setOutputPath(job, new Path(location));
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    Properties props = getUDFProps();

    job.getConfiguration().set("com.metamx.milano.proto.descriptor.base64",
            (String) props.get("milano.pig.proto.schema.base64"));
}

From source file:com.mozilla.main.ReadHBaseWriteHdfs.java

License:LGPL

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.job.queue.name", "prod");
    Job job = new Job(conf, "ReadHBaseWriteHDFS");
    job.setJarByClass(ReadHBaseWriteHdfs.class);
    Scan scan = new Scan();
    scan.addFamily("data".getBytes());

    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ReadHBaseWriteHdfsMapper.class, Text.class,
            Text.class, job);

    job.setReducerClass(ReadHBaseWriteHdfsReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1000);//w ww. j  av  a2  s  . c  o m

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(args[0]));

    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        System.out.println("DONE");
    }

    return 0;
}

From source file:com.mozilla.pig.storage.SeqFileMultiStorage.java

License:Apache License

@Override
public void setStoreLocation(String location, Job job) throws IOException {
    job.setOutputKeyClass(this.keyClass);
    job.setOutputKeyClass(this.keyClass);
    Configuration conf = job.getConfiguration();
    if ("true".equals(conf.get("output.compression.enabled"))) {
        FileOutputFormat.setCompressOutput(job, true);
        String codec = conf.get("output.compression.codec");
        FileOutputFormat.setOutputCompressorClass(job,
                PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class));
    }/*w  w w.j  av a  2  s .c  om*/
    FileOutputFormat.setOutputPath(job, new Path(location));
}

From source file:com.netease.news.text.SequenceFilesFromDirectory.java

License:Apache License

private int runMapReduce(Path input, Path output)
        throws IOException, ClassNotFoundException, InterruptedException {

    int chunkSizeInMB = 64;
    if (hasOption(CHUNK_SIZE_OPTION[0])) {
        chunkSizeInMB = Integer.parseInt(getOption(CHUNK_SIZE_OPTION[0]));
    }//from   w w  w .ja v  a  2 s  .  c  o m

    String keyPrefix = null;
    if (hasOption(KEY_PREFIX_OPTION[0])) {
        keyPrefix = getOption(KEY_PREFIX_OPTION[0]);
    }

    // Prepare Job for submission.
    Job job = prepareJob(input, output, MultipleTextFileInputFormat.class,
            SequenceFilesFromDirectoryMapper.class, Text.class, Text.class, SequenceFileOutputFormat.class,
            "SequenceFilesFromDirectory");

    Configuration jobConfig = job.getConfiguration();
    jobConfig.set(KEY_PREFIX_OPTION[0], keyPrefix);
    FileSystem fs = FileSystem.get(jobConfig);
    FileStatus fsFileStatus = fs.getFileStatus(input);
    String inputDirList = HadoopUtil.buildDirList(fs, fsFileStatus);
    jobConfig.set(BASE_INPUT_PATH, input.toString());

    long chunkSizeInBytes = chunkSizeInMB * 1024 * 1024;

    // set the max split locations, otherwise we get nasty debug stuff
    jobConfig.set("mapreduce.job.max.split.locations", String.valueOf(MAX_JOB_SPLIT_LOCATIONS));

    FileInputFormat.setInputPaths(job, inputDirList);
    // need to set this to a multiple of the block size, or no split happens
    FileInputFormat.setMaxInputSplitSize(job, chunkSizeInBytes);
    FileOutputFormat.setCompressOutput(job, true);

    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }
    return 0;
}

From source file:com.placeiq.piqconnect.InitialVectorGenerator.java

License:Apache License

private Job buildJob() throws Exception {
    Configuration conf = getConf();
    conf.setLong("numberOfNodes", numberOfNodes);

    Job job = new Job(conf, "data-piqid.piqconnect.ConCmptIVGen_Stage1");
    job.setJarByClass(InitialVectorGenerator.class);
    job.setMapperClass(_Mapper.class);
    job.setReducerClass(_Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(job, pathBitmask);
    FileOutputFormat.setOutputPath(job, pathVector);
    FileOutputFormat.setCompressOutput(job, true);

    return job;//  w  w  w . j  a v  a2  s .co  m
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob2(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2");
    job.setJarByClass(Runner.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(IterationStage2._Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(VLongWritable.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setSortComparatorClass(VLongWritableComparator.class);

    SequenceFileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);/*from www .  j av  a 2 s . c  o  m*/
    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob3(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.FinalResultBuilder");
    job.setJarByClass(Runner.class);

    job.setMapperClass(FinalResultBuilder._Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);/* w  ww.  j a  v  a 2  s .c om*/
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(VLongWritable.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);
    return job;
}

From source file:com.sirius.hadoop.job.onlinetime.OnlineTimeJob.java

License:Apache License

public Job build() throws Exception {
    //init//  ww  w.j a v  a  2  s  .c  om
    Job job = Job.getInstance(getConf(), "onlinetime");
    job.setJarByClass(OnlineTimeJob.class);

    //mapp
    job.setMapperClass(StatusMapper.class);
    job.setMapOutputKeyClass(StatusKey.class);
    job.setMapOutputValueClass(OnlineRecord.class);

    //custom partition
    job.setPartitionerClass(StatusKeyPartitioner.class);

    //reduce
    job.setGroupingComparatorClass(StatusKeyGroupComparator.class);
    job.setReducerClass(StatusReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    //input
    FileInputFormat.setInputPaths(job, new Path("/subscriber_status/subscriber_status.json"));

    //output
    FileOutputFormat.setOutputPath(job, out);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, Lz4Codec.class);

    return job;
}

From source file:com.talis.hadoop.rdf.collation.QuadsCollater.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration configuration = getConf();

    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);
    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }/*from w  ww .j  a  v  a  2  s.  co m*/

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName(JOB_NAME);
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    FileOutputFormat.setCompressOutput(job, true);

    job.setInputFormatClass(NQuadsInputFormat.class);
    job.setMapperClass(CollationMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(QuadWritable.class);

    job.setReducerClass(CollationReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(QuadArrayWritable.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    if (LOG.isDebugEnabled())
        Utils.log(job, LOG);

    return job.waitForCompletion(true) ? 0 : 1;
}