List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setCompressOutput
public static void setCompressOutput(Job job, boolean compress)
From source file:org.opencb.hpg.bigdata.tools.sequence.Fastq2AvroMR.java
License:Apache License
public static int run(String input, String output, String codecName) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Fastq2AvroMR"); job.setJarByClass(Fastq2AvroMR.class); // We call setOutputSchema first so we can override the configuration // parameters it sets AvroJob.setOutputKeySchema(job, Read.SCHEMA$); job.setOutputValueClass(NullWritable.class); AvroJob.setMapOutputValueSchema(job, Read.SCHEMA$); // point to input data FileInputFormat.setInputPaths(job, new Path(input)); job.setInputFormatClass(FastqInputFormatMODIF.class); // set the output format FileOutputFormat.setOutputPath(job, new Path(output)); if (codecName != null) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, CompressionUtils.getHadoopCodec(codecName)); }//from w w w . ja v a 2 s.co m job.setOutputFormatClass(AvroKeyOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(AvroValue.class); job.setMapperClass(Fastq2GaMapper.class); job.setReducerClass(Fastq2GaReducer.class); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:org.springframework.data.hadoop.mapreduce.JobFactoryBean.java
License:Apache License
@SuppressWarnings("rawtypes") public void afterPropertiesSet() throws Exception { final Configuration cfg = ConfigurationUtils.createFrom(configuration, properties); buildGenericOptions(cfg);//from w w w .ja v a 2s.c om if (StringUtils.hasText(user)) { UserGroupInformation ugi = UserGroupInformation.createProxyUser(user, UserGroupInformation.getLoginUser()); ugi.doAs(new PrivilegedExceptionAction<Void>() { @Override public Void run() throws Exception { job = new Job(cfg); return null; } }); } else { job = new Job(cfg); } ClassLoader loader = (beanClassLoader != null ? beanClassLoader : org.springframework.util.ClassUtils.getDefaultClassLoader()); if (jar != null) { JobConf conf = (JobConf) job.getConfiguration(); conf.setJar(jar.getURI().toString()); loader = ExecutionUtils.createParentLastClassLoader(jar, beanClassLoader, cfg); conf.setClassLoader(loader); } // set first to enable auto-detection of K/V to skip the key/value types to be specified if (mapper != null) { Class<? extends Mapper> mapperClass = resolveClass(mapper, loader, Mapper.class); job.setMapperClass(mapperClass); configureMapperTypesIfPossible(job, mapperClass); } if (reducer != null) { Class<? extends Reducer> reducerClass = resolveClass(reducer, loader, Reducer.class); job.setReducerClass(reducerClass); configureReducerTypesIfPossible(job, reducerClass); } if (StringUtils.hasText(name)) { job.setJobName(name); } if (combiner != null) { job.setCombinerClass(resolveClass(combiner, loader, Reducer.class)); } if (groupingComparator != null) { job.setGroupingComparatorClass(resolveClass(groupingComparator, loader, RawComparator.class)); } if (inputFormat != null) { job.setInputFormatClass(resolveClass(inputFormat, loader, InputFormat.class)); } if (mapKey != null) { job.setMapOutputKeyClass(resolveClass(mapKey, loader, Object.class)); } if (mapValue != null) { job.setMapOutputValueClass(resolveClass(mapValue, loader, Object.class)); } if (numReduceTasks != null) { job.setNumReduceTasks(numReduceTasks); } if (key != null) { job.setOutputKeyClass(resolveClass(key, loader, Object.class)); } if (value != null) { job.setOutputValueClass(resolveClass(value, loader, Object.class)); } if (outputFormat != null) { job.setOutputFormatClass(resolveClass(outputFormat, loader, OutputFormat.class)); } if (partitioner != null) { job.setPartitionerClass(resolveClass(partitioner, loader, Partitioner.class)); } if (sortComparator != null) { job.setSortComparatorClass(resolveClass(sortComparator, loader, RawComparator.class)); } if (StringUtils.hasText(workingDir)) { job.setWorkingDirectory(new Path(workingDir)); } if (jarClass != null) { job.setJarByClass(jarClass); } if (!CollectionUtils.isEmpty(inputPaths)) { for (String path : inputPaths) { FileInputFormat.addInputPath(job, new Path(path)); } } if (StringUtils.hasText(outputPath)) { FileOutputFormat.setOutputPath(job, new Path(outputPath)); } if (compressOutput != null) { FileOutputFormat.setCompressOutput(job, compressOutput); } if (codecClass != null) { FileOutputFormat.setOutputCompressorClass(job, resolveClass(codecClass, loader, CompressionCodec.class)); } processJob(job); }
From source file:org.talend.components.simplefileio.runtime.sinks.AvroHdfsFileSink.java
License:Open Source License
@Override protected void configure(Job job, KV<AvroKey<IndexedRecord>, NullWritable> sample) { super.configure(job, sample); AvroKey<IndexedRecord> k = sample.getKey(); AvroJob.setOutputKeySchema(job, k.datum().getSchema()); FileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC); }
From source file:pignlproc.storage.AbstractNTriplesStorer.java
License:Apache License
@SuppressWarnings("unchecked") @Override//from w w w. j av a 2 s . c o m public void setStoreLocation(String location, Job job) throws IOException { job.getConfiguration().set("mapred.textoutputformat.separator", ""); FileOutputFormat.setOutputPath(job, new Path(location)); if ("true".equals(job.getConfiguration().get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = job.getConfiguration().get("output.compression.codec"); try { FileOutputFormat.setOutputCompressorClass(job, (Class<? extends CompressionCodec>) Class.forName(codec)); } catch (ClassNotFoundException e) { throw new RuntimeException("Class not found: " + codec); } } else { if (location.endsWith(".bz2") || location.endsWith(".bz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); } else if (location.endsWith(".gz")) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { FileOutputFormat.setCompressOutput(job, false); } } }
From source file:pl.edu.icm.coansys.commons.pig.udf.RichSequenceFileLoader.java
License:Open Source License
@SuppressWarnings("unchecked") @Override//ww w . jav a 2 s . c o m public void setStoreLocation(String location, Job job) throws IOException { ensureUDFContext(job.getConfiguration()); job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); FileOutputFormat.setOutputPath(job, new Path(location)); if ("true".equals(job.getConfiguration().get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = job.getConfiguration().get("output.compression.codec"); FileOutputFormat.setOutputCompressorClass(job, PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class)); } else { // This makes it so that storing to a directory ending with ".gz" or // ".bz2" works. setCompression(new Path(location), job); } }
From source file:pl.edu.icm.coansys.commons.pig.udf.RichSequenceFileLoader.java
License:Open Source License
/** * @param path//w w w .j a va 2 s .c o m * @param job */ private void setCompression(Path path, Job job) { CompressionCodecFactory codecFactory = new CompressionCodecFactory(job.getConfiguration()); CompressionCodec codec = codecFactory.getCodec(path); if (codec != null) { FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, codec.getClass()); } else { FileOutputFormat.setCompressOutput(job, false); } }
From source file:uk.ac.cam.eng.extraction.hadoop.extraction.ExtractorJob.java
License:Apache License
/** * //from w ww. j a v a 2s .c o m * @param conf * @return * @throws IOException */ public static Job getJob(Configuration conf) throws IOException { conf.set("mapred.map.child.java.opts", "-Xmx200m"); conf.set("mapred.reduce.child.java.opts", "-Xmx4096m"); Job job = new Job(conf, "Rule extraction"); job.setJarByClass(ExtractorJob.class); job.setMapOutputKeyClass(RuleWritable.class); job.setMapOutputValueClass(RuleInfoWritable.class); job.setOutputKeyClass(RuleWritable.class); job.setOutputValueClass(RuleInfoWritable.class); job.setMapperClass(ExtractorMapper.class); job.setReducerClass(ExtractorReducer.class); job.setCombinerClass(ExtractorReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); return job; }
From source file:wikiduper.clir.rp.TextDocnoMappingBuilder.java
License:Apache License
@Override public int run(String[] args) throws IOException { DocnoMapping.DefaultBuilderOptions options = DocnoMapping.BuilderUtils.parseDefaultOptions(args); System.out.println("WTF WTF args: " + Arrays.toString(args)); if (options == null) { return -1; }/*from ww w. ja v a 2 s .co m*/ // Temp directory. String tmpDir = "tmp-" + TextDocnoMappingBuilder.class.getSimpleName() + "-" + random.nextInt(10000); LOG.info("Tool name: " + TextDocnoMappingBuilder.class.getCanonicalName()); LOG.info(" - input path: " + options.collection); LOG.info(" - output file: " + options.docnoMapping); Job job = Job.getInstance(getConf()); FileSystem fs = FileSystem.get(job.getConfiguration()); job.setJarByClass(TextDocnoMappingBuilder.class); job.setNumReduceTasks(1); PathFilter filter = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; // Note: Gov2 and Wt10g raw collections are organized into sub-directories. Path collectionPath = new Path(options.collection); for (FileStatus status : fs.listStatus(collectionPath, filter)) { if (status.isDirectory()) { for (FileStatus s : fs.listStatus(status.getPath(), filter)) { FileInputFormat.addInputPath(job, s.getPath()); } } else { FileInputFormat.addInputPath(job, status.getPath()); } } FileOutputFormat.setOutputPath(job, new Path(tmpDir)); FileOutputFormat.setCompressOutput(job, false); job.setInputFormatClass(TextInputFormat.class); //options.inputFormat); LOG.info("Input format : " + options.inputFormat); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); LOG.info("Here1\n"); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpDir), true); try { job.waitForCompletion(true); } catch (Exception e) { throw new RuntimeException(e); } writeMappingData(new Path(tmpDir + "/part-r-00000"), new Path(options.docnoMapping), fs); fs.delete(new Path(tmpDir), true); return 0; }