List of usage examples for org.apache.hadoop.mapred JobConf setOutputCommitter
public void setOutputCommitter(Class<? extends OutputCommitter> theClass)
From source file:com.hazelcast.jet.hadoop.impl.WriteHdfsPTest.java
License:Open Source License
@Test public void testWriteFile() throws Exception { int messageCount = 320; String mapName = randomMapName(); JetInstance instance = createJetMember(); createJetMember();// w ww .ja v a 2 s. c om Map<IntWritable, IntWritable> map = IntStream.range(0, messageCount).boxed() .collect(toMap(IntWritable::new, IntWritable::new)); instance.getMap(mapName).putAll(map); Path path = getPath(); JobConf conf = new JobConf(); conf.setOutputFormat(outputFormatClass); conf.setOutputCommitter(FileOutputCommitter.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); if (outputFormatClass.equals(LazyOutputFormat.class)) { LazyOutputFormat.setOutputFormatClass(conf, TextOutputFormat.class); } FileOutputFormat.setOutputPath(conf, path); Pipeline p = Pipeline.create(); p.drawFrom(Sources.map(mapName)).drainTo(HdfsSinks.hdfs(conf)) // we use higher value to increase the race chance for LazyOutputFormat .setLocalParallelism(8); Future<Void> future = instance.newJob(p).getFuture(); assertCompletesEventually(future); JobConf readJobConf = new JobConf(); readJobConf.setInputFormat(inputFormatClass); FileInputFormat.addInputPath(readJobConf, path); p = Pipeline.create(); p.drawFrom(HdfsSources.hdfs(readJobConf)).drainTo(Sinks.list("results")); future = instance.newJob(p).getFuture(); assertCompletesEventually(future); IList<Object> results = instance.getList("results"); assertEquals(messageCount, results.size()); }
From source file:com.hazelcast.jet.impl.connector.hadoop.WriteHdfsPTest.java
License:Open Source License
@Test public void testWriteFile() throws Exception { int messageCount = 20; String mapName = randomMapName(); JetInstance instance = createJetMember(); createJetMember();//from www .j av a 2 s.c o m Map<IntWritable, IntWritable> map = IntStream.range(0, messageCount).boxed() .collect(toMap(IntWritable::new, IntWritable::new)); instance.getMap(mapName).putAll(map); DAG dag = new DAG(); Vertex producer = dag.newVertex("producer", readMap(mapName)).localParallelism(1); Path path = getPath(); JobConf conf = new JobConf(); conf.setOutputFormat(outputFormatClass); conf.setOutputCommitter(FileOutputCommitter.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(conf, path); Vertex consumer = dag.newVertex("consumer", writeHdfs(conf)).localParallelism(4); dag.edge(between(producer, consumer)); Future<Void> future = instance.newJob(dag).execute(); assertCompletesEventually(future); dag = new DAG(); JobConf readJobConf = new JobConf(); readJobConf.setInputFormat(inputFormatClass); FileInputFormat.addInputPath(readJobConf, path); producer = dag.newVertex("producer", readHdfs(readJobConf)).localParallelism(8); consumer = dag.newVertex("consumer", writeList("results")).localParallelism(1); dag.edge(between(producer, consumer)); future = instance.newJob(dag).execute(); assertCompletesEventually(future); IList<Object> results = instance.getList("results"); assertEquals(messageCount, results.size()); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static void setUpMultipleOutputs(JobConf job, byte[] resultIndexes, byte[] resultDimsUnknwon, String[] outputs, OutputInfo[] outputInfos, boolean inBlockRepresentation, boolean mayContainCtable) throws Exception { if (resultIndexes.length != outputs.length) throw new Exception("number of outputs and result indexes does not match"); if (outputs.length != outputInfos.length) throw new Exception("number of outputs and outputInfos indexes does not match"); job.set(RESULT_INDEXES_CONFIG, MRJobConfiguration.getIndexesString(resultIndexes)); job.set(RESULT_DIMS_UNKNOWN_CONFIG, MRJobConfiguration.getIndexesString(resultDimsUnknwon)); job.setStrings(OUTPUT_MATRICES_DIRS_CONFIG, outputs); job.setOutputCommitter(MultipleOutputCommitter.class); for (int i = 0; i < outputs.length; i++) { MapReduceTool.deleteFileIfExistOnHDFS(new Path(outputs[i]), job); if (mayContainCtable && resultDimsUnknwon[i] == (byte) 1) { setOutputInfo(job, i, outputInfos[i], false); } else {/* w w w. ja v a 2 s.c o m*/ setOutputInfo(job, i, outputInfos[i], inBlockRepresentation); } MultipleOutputs.addNamedOutput(job, Integer.toString(i), outputInfos[i].outputFormatClass, outputInfos[i].outputKeyClass, outputInfos[i].outputValueClass); } job.setOutputFormat(NullOutputFormat.class); // configure temp output Path tempOutputPath = new Path(constructTempOutputFilename()); FileOutputFormat.setOutputPath(job, tempOutputPath); MapReduceTool.deleteFileIfExistOnHDFS(tempOutputPath, job); }
From source file:com.ibm.jaql.io.hadoop.CompositeOutputAdapter.java
License:Apache License
private void set(JobConf conf) throws Exception { // TODO: Should this call be passed to outputs? conf.setOutputFormat(this.getClass()); conf.setOutputCommitter(CompositeCommitter.class); conf.setOutputKeyClass(JsonHolderDefault.class); conf.setOutputValueClass(JsonHolderDefault.class); HadoopSerializationDefault.register(conf); // We do not support speculative execution at this time. conf.setSpeculativeExecution(false); // write out the input adapter args array ConfUtil.writeConfArray(conf, ConfSetter.CONFOUTOPTIONS_NAME, descriptors); }
From source file:com.liveramp.hank.cascading.DomainBuilderTap.java
License:Apache License
public void sinkConfInit(FlowProcess<JobConf> process, JobConf conf) { super.sinkConfInit(process, conf); // Output Format conf.setOutputFormat(this.outputFormatClass); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Set this tap's Domain name locally in the conf if (conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME) != null) { throw new RuntimeException("Trying to set domain name configuration parameter to " + domainName + " but it was previously set to " + conf.get(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME)); } else {/*from w ww .j ava 2s . c o m*/ conf.set(DomainBuilderAbstractOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME, domainName); } }
From source file:com.liveramp.hank.hadoop.AbstractHadoopDomainBuilder.java
License:Apache License
private void configureJobCommon(DomainBuilderProperties properties, int versionNumber, int numPartitions, JobConf conf) throws IOException { // Hank specific configuration properties.setJobConfProperties(conf, versionNumber); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Output path (set to tmp output path) FileOutputFormat.setOutputPath(conf, new Path(properties.getTmpOutputPath(versionNumber))); // Output format conf.setOutputFormat(properties.getOutputFormatClass()); // Num reduce tasks conf.setNumReduceTasks(numPartitions); }
From source file:com.rapleaf.hank.cascading.DomainBuilderTap.java
License:Apache License
@Override public void sinkInit(JobConf conf) throws IOException { super.sinkInit(conf); // Output Format conf.setOutputFormat(this.outputFormatClass); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Set this tap's Domain name locally in the conf if (conf.get(DomainBuilderOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME) != null) { throw new RuntimeException("Trying to set domain name configuration parameter to " + domainName + " but it was previously set to " + conf.get(DomainBuilderOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME)); } else {/* w w w . j a v a 2 s .c o m*/ conf.set(DomainBuilderOutputFormat.CONF_PARAM_HANK_DOMAIN_NAME, domainName); } }
From source file:com.rapleaf.hank.hadoop.AbstractHadoopDomainBuilder.java
License:Apache License
private void configureJobCommon(DomainBuilderProperties properties, int versionNumber, int numPartitions, JobConf conf) { // Hank specific configuration properties.setJobConfProperties(conf, versionNumber); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Output path (set to tmp output path) FileOutputFormat.setOutputPath(conf, new Path(properties.getTmpOutputPath(versionNumber))); // Output format conf.setOutputFormat(properties.getOutputFormatClass()); // Num reduce tasks conf.setNumReduceTasks(numPartitions); }
From source file:com.rapleaf.hank.hadoop.HadoopDomainBuilder.java
License:Apache License
public static final JobConf createJobConfiguration(String inputPath, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass, int versionNumber, DomainBuilderProperties properties) { JobConf conf = new JobConf(); // Input specification conf.setInputFormat(inputFormatClass); FileInputFormat.setInputPaths(conf, inputPath); // Mapper class and key/value classes conf.setMapperClass(mapperClass);//from w w w .j a va 2 s. c o m conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class); conf.setMapOutputValueClass(ValueWritable.class); // Reducer class and key/value classes conf.setReducerClass(DomainBuilderReducer.class); conf.setOutputKeyClass(KeyAndPartitionWritable.class); conf.setOutputValueClass(ValueWritable.class); // Output format conf.setOutputFormat(properties.getOutputFormatClass()); // Output path (set to tmp output path) FileOutputFormat.setOutputPath(conf, new Path(properties.getTmpOutputPath(versionNumber))); // Partitioner conf.setPartitionerClass(DomainBuilderPartitioner.class); // Output Committer conf.setOutputCommitter(DomainBuilderOutputCommitter.class); // Hank specific configuration properties.setJobConfProperties(conf, versionNumber); return conf; }
From source file:com.ricemap.spateDB.operations.FileMBR.java
License:Apache License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing// w w w . j av a 2 s .co m * @param conf * @param fs * @param file * @return * @throws IOException */ public static <S extends Shape> Prism fileMBRMapReduce(FileSystem fs, Path file, S stockShape, boolean background) throws IOException { // Quickly get file MBR if it is globally indexed GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(fs, file); if (globalIndex != null) { // Return the MBR of the global index. // Compute file size by adding up sizes of all files assuming they are // not compressed long totalLength = 0; for (Partition p : globalIndex) { Path filePath = new Path(file, p.filename); if (fs.exists(filePath)) totalLength += fs.getFileStatus(filePath).getLen(); } sizeOfLastProcessedFile = totalLength; return globalIndex.getMBR(); } JobConf job = new JobConf(FileMBR.class); Path outputPath; FileSystem outFs = FileSystem.get(job); do { outputPath = new Path(file.toUri().getPath() + ".mbr_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); job.setJobName("FileMBR"); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Prism.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setCombinerClass(Reduce.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setInputFormat(ShapeInputFormat.class); SpatialSite.setShapeClass(job, stockShape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, file); TextOutputFormat.setOutputPath(job, outputPath); job.setOutputCommitter(MBROutputCommitter.class); // Submit the job if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); return null; } else { lastSubmittedJob = JobClient.runJob(job); Counters counters = lastSubmittedJob.getCounters(); Counter inputBytesCounter = counters.findCounter(Task.Counter.MAP_INPUT_BYTES); FileMBR.sizeOfLastProcessedFile = inputBytesCounter.getValue(); // Read job result FileStatus[] results = outFs.listStatus(outputPath); Prism mbr = new Prism(); for (FileStatus fileStatus : results) { if (fileStatus.getLen() > 0 && fileStatus.getPath().getName().startsWith("part-")) { LineReader lineReader = new LineReader(outFs.open(fileStatus.getPath())); Text text = new Text(); if (lineReader.readLine(text) > 0) { mbr.fromText(text); } lineReader.close(); } } outFs.delete(outputPath, true); return mbr; } }