List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.datasalt.pangool.tuplemr.TupleMRBuilder.java
License:Apache License
public Job createJob() throws IOException, TupleMRException { failIfNull(tupleReducer, "Need to set a group handler"); failIfEmpty(multipleInputs.getMultiInputs(), "Need to add at least one input"); failIfNull(outputFormat, "Need to set output format"); failIfNull(outputKeyClass, "Need to set outputKeyClass"); failIfNull(outputValueClass, "Need to set outputValueClass"); failIfNull(outputPath, "Need to set outputPath"); // perform a deep copy of the Configuration this.conf = new Configuration(this.conf); TupleMRConfig tupleMRConf = buildConf(); // Serialize PangoolConf in Hadoop Configuration instanceFilesCreated.addAll(TupleMRConfig.set(tupleMRConf, conf)); Job job = (jobName == null) ? new Job(conf) : new Job(conf, jobName); if (tupleMRConf.getRollupFrom() != null) { job.setReducerClass(RollupReducer.class); } else {//from w ww . j av a 2 s.com job.setReducerClass(SimpleReducer.class); } if (tupleCombiner != null) { job.setCombinerClass(SimpleCombiner.class); // not rollup by now // Set Combiner Handler String uniqueName = UUID.randomUUID().toString() + '.' + "combiner-handler.dat"; try { InstancesDistributor.distribute(tupleCombiner, uniqueName, job.getConfiguration()); instanceFilesCreated.add(uniqueName); job.getConfiguration().set(SimpleCombiner.CONF_COMBINER_HANDLER, uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } } // Set Tuple Reducer try { String uniqueName = UUID.randomUUID().toString() + '.' + "group-handler.dat"; InstancesDistributor.distribute(tupleReducer, uniqueName, job.getConfiguration()); instanceFilesCreated.add(uniqueName); job.getConfiguration().set(SimpleReducer.CONF_REDUCER_HANDLER, uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } // Enabling serialization TupleSerialization.enableSerialization(job.getConfiguration()); job.setJarByClass((jarByClass != null) ? jarByClass : tupleReducer.getClass()); job.setMapOutputKeyClass(DatumWrapper.class); job.setMapOutputValueClass(NullWritable.class); job.setPartitionerClass(TupleHashPartitioner.class); job.setGroupingComparatorClass(GroupComparator.class); job.setSortComparatorClass(SortComparator.class); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); FileOutputFormat.setOutputPath(job, outputPath); instanceFilesCreated.addAll(multipleInputs.configureJob(job)); instanceFilesCreated.addAll(namedOutputs.configureJob(job)); // Configure a {@link ProxyOutputFormat} for Pangool's Multiple Outputs to // work: {@link PangoolMultipleOutput} String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try { InstancesDistributor.distribute(outputFormat, uniqueName, conf); instanceFilesCreated.add(uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); return job; }
From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java
License:Apache License
public void assertRun(Job job) throws IOException, InterruptedException, ClassNotFoundException { FileSystem fs = FileSystem.get(job.getConfiguration()); HadoopUtils.deleteIfExists(fs, FileOutputFormat.getOutputPath(job)); // Close input writers first for (Map.Entry<String, Object> entry : inputs.entrySet()) { Object in = entry.getValue(); if (in instanceof SequenceFile.Writer) { ((SequenceFile.Writer) in).close(); } else if (in instanceof TupleFile.Writer) { ((TupleFile.Writer) in).close(); }// w w w . j a v a2s . c om } job.waitForCompletion(true); Assert.assertTrue(job.isSuccessful()); }
From source file:com.datasalt.utils.mapred.counter.MapRedCounter.java
License:Apache License
protected static Job buildMapRedCounterJobWithoutCombiner(String name, @SuppressWarnings("rawtypes") Class<? extends OutputFormat> outputFormat, String outPath, Configuration conf) throws IOException { Job job = new Job(conf, name); Path output = new Path(outPath); HadoopUtils.deleteIfExists(FileSystem.get(conf), output); job.setJarByClass(MapRedCounter.class); job.setReducerClass(MapRedCountReducer.class); job.setMapOutputKeyClass(CounterKey.class); job.setMapOutputValueClass(CounterValue.class); job.setOutputFormatClass(outputFormat); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); // Secondary sorting configuration. job.setGroupingComparatorClass(CounterKey.IdGroupComparator.class); job.setPartitionerClass(CounterKey.IdGroupPartitioner.class); FileOutputFormat.setOutputPath(job, output); String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try {//from www. jav a 2s . com DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf); job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); // Multioutput configuration PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTFILE.toString(), new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterKey.class, LongWritable.class); PangoolMultipleOutputs.addNamedOutput(job, Outputs.COUNTDISTINCTFILE.toString(), new HadoopOutputFormat(SequenceFileOutputFormat.class), CounterDistinctKey.class, LongPairWritable.class); } catch (URISyntaxException e) { e.printStackTrace(); throw new IOException(e); } return job; }
From source file:com.datasalt.utils.mapred.crossproduct.CrossProductMapRed.java
License:Apache License
public Job getJob() throws IOException { if (job == null) { MultiJoiner multiJoiner = new MultiJoiner(name, conf); multiJoiner.setReducer(CrossProductReducer.class); multiJoiner.setOutputKeyClass(CrossProductPair.class); multiJoiner.setOutputValueClass(NullWritable.class); multiJoiner.setOutputFormat(outputFormat); multiJoiner.setOutputPath(outputPath); multiJoiner.setJarByClass((jarByClass != null) ? jarByClass : leftInputMapper); Job job = multiJoiner .addChanneledInput(SECOND_CHANNEL_IN_REDUCER, leftInputPath, Object.class, leftInputFormat, leftInputMapper) .addChanneledInput(FIRST_CHANNEL_IN_REDUCER, rightInputPath, Object.class, rightInputFormat, rightInputMapper) .getJob();/* www . j ava 2 s . c om*/ /* * Outputs */ String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try { DCUtils.serializeToDC(new HadoopOutputFormat(SequenceFileOutputFormat.class), uniqueName, conf); job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); PangoolMultipleOutputs.addNamedOutput(job, EXTRA_OUTPUT, new HadoopOutputFormat(SequenceFileOutputFormat.class), CrossProductExtraKey.class, CrossProductPair.class); } catch (URISyntaxException e) { throw new IOException(e); } this.job = job; } return job; }
From source file:com.david.mos.out.FileOutputFormat.java
License:Apache License
/** * Set the {@link Path} of the output directory for the map-reduce job. * * @param job The job to modify/*ww w .j a v a 2 s. com*/ * @param outputDir the {@link Path} of the output directory for * the map-reduce job. */ public static void setOutputPath(Job job, Path outputDir) { try { outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir); } catch (IOException e) { // Throw the IOException as a RuntimeException to be compatible with MR1 throw new RuntimeException(e); } job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString()); }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Set a PathFilter to be applied to the input paths for the map-reduce job. * @param job the job to modify/*from w ww. jav a 2 s.c o m*/ * @param filter the PathFilter class use for filtering the input paths. */ public static void setInputPathFilter(Job job, Class<? extends PathFilter> filter) { job.getConfiguration().setClass(PATHFILTER_CLASS, filter, PathFilter.class); }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Set the minimum input split size//from w ww .jav a 2 s.c o m * @param job the job to modify * @param size the minimum size */ public static void setMinInputSplitSize(Job job, long size) { job.getConfiguration().setLong(SPLIT_MINSIZE, size); }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Set the maximum split size/*from w w w. j a v a2 s . co m*/ * @param job the job to modify * @param size the maximum split size */ public static void setMaxInputSplitSize(Job job, long size) { job.getConfiguration().setLong(SPLIT_MAXSIZE, size); }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Set the array of {@link Path}s as the list of inputs * for the map-reduce job./*from w w w . j a v a 2 s .com*/ * * @param job The job to modify * @param inputPaths the {@link Path}s of the input directories/files * for the map-reduce job. */ public static void setInputPaths(Job job, Path... inputPaths) throws IOException { Configuration conf = job.getConfiguration(); Path path = inputPaths[0].getFileSystem(conf).makeQualified(inputPaths[0]); StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); for (int i = 1; i < inputPaths.length; i++) { str.append(StringUtils.COMMA_STR); path = inputPaths[i].getFileSystem(conf).makeQualified(inputPaths[i]); str.append(StringUtils.escapeString(path.toString())); } conf.set(INPUT_DIR, str.toString()); }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Add a {@link Path} to the list of inputs for the map-reduce job. * /*from www . java 2 s. c o m*/ * @param job The {@link Job} to modify * @param path {@link Path} to be added to the list of inputs for * the map-reduce job. */ public static void addInputPath(Job job, Path path) throws IOException { Configuration conf = job.getConfiguration(); path = path.getFileSystem(conf).makeQualified(path); String dirStr = StringUtils.escapeString(path.toString()); String dirs = conf.get(INPUT_DIR); conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr); }