Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput.

Prototype

public static void setCompressOutput(Job job, boolean compress) 

Source Link

Document

Set whether the output of the job is compressed.

Usage

From source file:org.opencb.hpg.bigdata.tools.sequence.Fastq2AvroMR.java

License:Apache License

public static int run(String input, String output, String codecName) throws Exception {
    Configuration conf = new Configuration();

    Job job = Job.getInstance(conf, "Fastq2AvroMR");
    job.setJarByClass(Fastq2AvroMR.class);

    // We call setOutputSchema first so we can override the configuration
    // parameters it sets
    AvroJob.setOutputKeySchema(job, Read.SCHEMA$);
    job.setOutputValueClass(NullWritable.class);
    AvroJob.setMapOutputValueSchema(job, Read.SCHEMA$);

    // point to input data
    FileInputFormat.setInputPaths(job, new Path(input));
    job.setInputFormatClass(FastqInputFormatMODIF.class);

    // set the output format
    FileOutputFormat.setOutputPath(job, new Path(output));
    if (codecName != null) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, CompressionUtils.getHadoopCodec(codecName));
    }//from  w w  w .  ja v  a  2 s.co m
    job.setOutputFormatClass(AvroKeyOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(AvroValue.class);

    job.setMapperClass(Fastq2GaMapper.class);
    job.setReducerClass(Fastq2GaReducer.class);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:org.springframework.data.hadoop.mapreduce.JobFactoryBean.java

License:Apache License

@SuppressWarnings("rawtypes")
public void afterPropertiesSet() throws Exception {
    final Configuration cfg = ConfigurationUtils.createFrom(configuration, properties);

    buildGenericOptions(cfg);//from   w w  w .ja  v a  2s.c  om

    if (StringUtils.hasText(user)) {
        UserGroupInformation ugi = UserGroupInformation.createProxyUser(user,
                UserGroupInformation.getLoginUser());
        ugi.doAs(new PrivilegedExceptionAction<Void>() {

            @Override
            public Void run() throws Exception {
                job = new Job(cfg);
                return null;
            }
        });
    } else {
        job = new Job(cfg);
    }

    ClassLoader loader = (beanClassLoader != null ? beanClassLoader
            : org.springframework.util.ClassUtils.getDefaultClassLoader());

    if (jar != null) {
        JobConf conf = (JobConf) job.getConfiguration();
        conf.setJar(jar.getURI().toString());
        loader = ExecutionUtils.createParentLastClassLoader(jar, beanClassLoader, cfg);
        conf.setClassLoader(loader);
    }

    // set first to enable auto-detection of K/V to skip the key/value types to be specified
    if (mapper != null) {
        Class<? extends Mapper> mapperClass = resolveClass(mapper, loader, Mapper.class);
        job.setMapperClass(mapperClass);
        configureMapperTypesIfPossible(job, mapperClass);
    }

    if (reducer != null) {
        Class<? extends Reducer> reducerClass = resolveClass(reducer, loader, Reducer.class);
        job.setReducerClass(reducerClass);
        configureReducerTypesIfPossible(job, reducerClass);
    }

    if (StringUtils.hasText(name)) {
        job.setJobName(name);
    }
    if (combiner != null) {
        job.setCombinerClass(resolveClass(combiner, loader, Reducer.class));
    }
    if (groupingComparator != null) {
        job.setGroupingComparatorClass(resolveClass(groupingComparator, loader, RawComparator.class));
    }
    if (inputFormat != null) {
        job.setInputFormatClass(resolveClass(inputFormat, loader, InputFormat.class));
    }
    if (mapKey != null) {
        job.setMapOutputKeyClass(resolveClass(mapKey, loader, Object.class));
    }
    if (mapValue != null) {
        job.setMapOutputValueClass(resolveClass(mapValue, loader, Object.class));
    }
    if (numReduceTasks != null) {
        job.setNumReduceTasks(numReduceTasks);
    }
    if (key != null) {
        job.setOutputKeyClass(resolveClass(key, loader, Object.class));
    }
    if (value != null) {
        job.setOutputValueClass(resolveClass(value, loader, Object.class));
    }
    if (outputFormat != null) {
        job.setOutputFormatClass(resolveClass(outputFormat, loader, OutputFormat.class));
    }
    if (partitioner != null) {
        job.setPartitionerClass(resolveClass(partitioner, loader, Partitioner.class));
    }
    if (sortComparator != null) {
        job.setSortComparatorClass(resolveClass(sortComparator, loader, RawComparator.class));
    }
    if (StringUtils.hasText(workingDir)) {
        job.setWorkingDirectory(new Path(workingDir));
    }
    if (jarClass != null) {
        job.setJarByClass(jarClass);
    }

    if (!CollectionUtils.isEmpty(inputPaths)) {
        for (String path : inputPaths) {
            FileInputFormat.addInputPath(job, new Path(path));
        }
    }

    if (StringUtils.hasText(outputPath)) {
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
    }

    if (compressOutput != null) {
        FileOutputFormat.setCompressOutput(job, compressOutput);
    }

    if (codecClass != null) {
        FileOutputFormat.setOutputCompressorClass(job,
                resolveClass(codecClass, loader, CompressionCodec.class));
    }

    processJob(job);
}

From source file:org.talend.components.simplefileio.runtime.sinks.AvroHdfsFileSink.java

License:Open Source License

@Override
protected void configure(Job job, KV<AvroKey<IndexedRecord>, NullWritable> sample) {
    super.configure(job, sample);
    AvroKey<IndexedRecord> k = sample.getKey();
    AvroJob.setOutputKeySchema(job, k.datum().getSchema());
    FileOutputFormat.setCompressOutput(job, true);
    job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC);
}

From source file:pignlproc.storage.AbstractNTriplesStorer.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//from w  w w.  j av  a 2 s  .  c  o  m
public void setStoreLocation(String location, Job job) throws IOException {
    job.getConfiguration().set("mapred.textoutputformat.separator", "");
    FileOutputFormat.setOutputPath(job, new Path(location));
    if ("true".equals(job.getConfiguration().get("output.compression.enabled"))) {
        FileOutputFormat.setCompressOutput(job, true);
        String codec = job.getConfiguration().get("output.compression.codec");
        try {
            FileOutputFormat.setOutputCompressorClass(job,
                    (Class<? extends CompressionCodec>) Class.forName(codec));
        } catch (ClassNotFoundException e) {
            throw new RuntimeException("Class not found: " + codec);
        }
    } else {
        if (location.endsWith(".bz2") || location.endsWith(".bz")) {
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
        } else if (location.endsWith(".gz")) {
            FileOutputFormat.setCompressOutput(job, true);
            FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        } else {
            FileOutputFormat.setCompressOutput(job, false);
        }
    }
}

From source file:pl.edu.icm.coansys.commons.pig.udf.RichSequenceFileLoader.java

License:Open Source License

@SuppressWarnings("unchecked")
@Override//ww w  .  jav a  2 s . c o m
public void setStoreLocation(String location, Job job) throws IOException {
    ensureUDFContext(job.getConfiguration());
    job.setOutputKeyClass(keyClass);
    job.setOutputValueClass(valueClass);
    FileOutputFormat.setOutputPath(job, new Path(location));
    if ("true".equals(job.getConfiguration().get("output.compression.enabled"))) {
        FileOutputFormat.setCompressOutput(job, true);
        String codec = job.getConfiguration().get("output.compression.codec");
        FileOutputFormat.setOutputCompressorClass(job,
                PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class));
    } else {
        // This makes it so that storing to a directory ending with ".gz" or
        // ".bz2" works.
        setCompression(new Path(location), job);
    }
}

From source file:pl.edu.icm.coansys.commons.pig.udf.RichSequenceFileLoader.java

License:Open Source License

/**
 * @param path//w w w .j a va  2 s  .c o  m
 * @param job
 */
private void setCompression(Path path, Job job) {
    CompressionCodecFactory codecFactory = new CompressionCodecFactory(job.getConfiguration());
    CompressionCodec codec = codecFactory.getCodec(path);
    if (codec != null) {
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, codec.getClass());
    } else {
        FileOutputFormat.setCompressOutput(job, false);
    }
}

From source file:uk.ac.cam.eng.extraction.hadoop.extraction.ExtractorJob.java

License:Apache License

/**
 * //from  w ww.  j  a  v a 2s .c o  m
 * @param conf
 * @return
 * @throws IOException
 */
public static Job getJob(Configuration conf) throws IOException {
    conf.set("mapred.map.child.java.opts", "-Xmx200m");
    conf.set("mapred.reduce.child.java.opts", "-Xmx4096m");
    Job job = new Job(conf, "Rule extraction");
    job.setJarByClass(ExtractorJob.class);
    job.setMapOutputKeyClass(RuleWritable.class);
    job.setMapOutputValueClass(RuleInfoWritable.class);
    job.setOutputKeyClass(RuleWritable.class);
    job.setOutputValueClass(RuleInfoWritable.class);
    job.setMapperClass(ExtractorMapper.class);
    job.setReducerClass(ExtractorReducer.class);
    job.setCombinerClass(ExtractorReducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setCompressOutput(job, true);
    return job;
}

From source file:wikiduper.clir.rp.TextDocnoMappingBuilder.java

License:Apache License

@Override
public int run(String[] args) throws IOException {
    DocnoMapping.DefaultBuilderOptions options = DocnoMapping.BuilderUtils.parseDefaultOptions(args);
    System.out.println("WTF WTF args: " + Arrays.toString(args));
    if (options == null) {
        return -1;
    }/*from  ww w. ja v  a 2  s  .co m*/

    // Temp directory.
    String tmpDir = "tmp-" + TextDocnoMappingBuilder.class.getSimpleName() + "-" + random.nextInt(10000);

    LOG.info("Tool name: " + TextDocnoMappingBuilder.class.getCanonicalName());
    LOG.info(" - input path: " + options.collection);
    LOG.info(" - output file: " + options.docnoMapping);

    Job job = Job.getInstance(getConf());

    FileSystem fs = FileSystem.get(job.getConfiguration());

    job.setJarByClass(TextDocnoMappingBuilder.class);

    job.setNumReduceTasks(1);

    PathFilter filter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };

    // Note: Gov2 and Wt10g raw collections are organized into sub-directories.
    Path collectionPath = new Path(options.collection);
    for (FileStatus status : fs.listStatus(collectionPath, filter)) {
        if (status.isDirectory()) {
            for (FileStatus s : fs.listStatus(status.getPath(), filter)) {
                FileInputFormat.addInputPath(job, s.getPath());
            }
        } else {
            FileInputFormat.addInputPath(job, status.getPath());
        }
    }
    FileOutputFormat.setOutputPath(job, new Path(tmpDir));
    FileOutputFormat.setCompressOutput(job, false);

    job.setInputFormatClass(TextInputFormat.class); //options.inputFormat);
    LOG.info("Input format : " + options.inputFormat);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    LOG.info("Here1\n");
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    fs.delete(new Path(tmpDir), true);

    try {
        job.waitForCompletion(true);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    writeMappingData(new Path(tmpDir + "/part-r-00000"), new Path(options.docnoMapping), fs);
    fs.delete(new Path(tmpDir), true);

    return 0;
}