List of usage examples for org.apache.hadoop.mapred JobConf setClass
public void setClass(String name, Class<?> theClass, Class<?> xface)
name
property to the name of a theClass
implementing the given interface xface
. From source file:com.alexholmes.hadooputils.sort.Sort.java
License:Apache License
/** * The driver for the sort MapReduce job. * * @param jobConf sort configuration * @param numMapTasks number of map tasks * @param numReduceTasks number of reduce tasks * @param sampler sampler, if required * @param codecClass the compression codec for compressing final outputs * @param mapCodecClass the compression codec for compressing intermediary map outputs * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes * for the job output files * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws IOException if something went wrong * @throws URISyntaxException if a URI wasn't correctly formed *///w w w . j av a 2s . com public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks, final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass, final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException { jobConf.setJarByClass(Sort.class); jobConf.setJobName("sorter"); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); if (numMapTasks != null) { jobConf.setNumMapTasks(numMapTasks); } if (numReduceTasks != null) { jobConf.setNumReduceTasks(numReduceTasks); } else { int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sortReduces = jobConf.get("test.sort.reduces_per_host"); if (sortReduces != null) { numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces); } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(numReduces); } jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(SortReduce.class); jobConf.setInputFormat(SortInputFormat.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); if (mapCodecClass != null) { jobConf.setMapOutputCompressorClass(mapCodecClass); } if (codecClass != null) { jobConf.setBoolean("mapred.output.compress", true); jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class); } FileInputFormat.setInputPaths(jobConf, inputDirAsString); FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString)); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; FileSystem fileSystem = FileSystem.get(jobConf); if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) { inputDir = inputDir.getParent(); } inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + jobConf.getNumReduceTasks() + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); if (jobResult.isSuccessful()) { if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) { new LzoIndexer(jobConf).index(new Path(outputDirAsString)); } return true; } return false; }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Set a PathFilter to be applied to the input paths for the map-reduce job. * * @param filter the PathFilter class use for filtering the input paths. *//*from w w w . ja v a 2 s. c o m*/ public static void setInputPathFilter(JobConf conf, Class<? extends PathFilter> filter) { conf.setClass("mapred.input.pathFilter.class", filter, PathFilter.class); }
From source file:com.ebay.erl.mobius.core.JobSetup.java
License:Apache License
/** * Setup the output path of the given <code>job</code> to * <code>outputFolder</code> with the given * <code>outputFormat</code>. *///from ww w .j a va2 s. c o m public static void setupOutputs(JobConf job, Path outputFolder, Class<? extends FileOutputFormat> outputFormat) throws IOException { FileOutputFormat.setOutputPath(job, outputFolder); job.setClass("mapred.output.format.class", outputFormat, FileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Tuple.class); ensureOutputDelete(outputFolder, job); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static void setInputInfo(JobConf job, byte input, InputInfo inputinfo, int brlen, int bclen, ConvertTarget target) {/*w w w. j a v a2 s .c o m*/ Class<? extends Converter> converterClass = getConverterClass(inputinfo, brlen, bclen, target); job.setClass(INPUT_CONVERTER_CLASS_PREFIX_CONFIG + input, converterClass, Converter.class); job.setClass(INPUT_KEY_CLASS_PREFIX_CONFIG + input, inputinfo.inputKeyClass, Writable.class); job.setClass(INPUT_VALUE_CLASS_PREFIX_CONFIG + input, inputinfo.inputValueClass, Writable.class); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static void setOutputInfo(JobConf job, int i, OutputInfo outputinfo, boolean sourceInBlock) throws DMLRuntimeException { Class<? extends Converter> converterClass; if (sourceInBlock) { if (outputinfo.outputValueClass.equals(MatrixCell.class)) converterClass = BinaryBlockToBinaryCellConverter.class; else if (outputinfo.outputValueClass.equals(Text.class)) converterClass = BinaryBlockToTextCellConverter.class; else if (outputinfo.outputValueClass.equals(MatrixBlock.class)) converterClass = IdenticalConverter.class; else if (outputinfo.outputValueClass.equals(IntWritable.class)) converterClass = WeightedCellToSortInputConverter.class; else if (outputinfo.outputValueClass.equals(WeightedPair.class)) converterClass = IdenticalConverter.class; else/* ww w . j a v a2 s .com*/ converterClass = IdenticalConverter.class; } else { if (outputinfo.outputValueClass.equals(MatrixCell.class)) converterClass = IdenticalConverter.class; else if (outputinfo.outputValueClass.equals(Text.class)) converterClass = BinaryCellToTextConverter.class; else if (outputinfo.outputValueClass.equals(IntWritable.class)) converterClass = WeightedCellToSortInputConverter.class; else if (outputinfo.outputValueClass.equals(WeightedPair.class)) converterClass = IdenticalConverter.class; else throw new DMLRuntimeException("unsupported conversion: " + outputinfo.outputValueClass); // converterClass=IdenticalConverter.class; } job.setClass(OUTPUT_CONVERTER_CLASS_PREFIX_CONFIG + i, converterClass, Converter.class); }
From source file:com.ibm.bi.dml.runtime.matrix.sort.CompactInputFormat.java
License:Open Source License
public static void setKeyValueClasses(JobConf job, Class<? extends WritableComparable> keyClass, Class<? extends Writable> valueClass) { job.setClass(KEY_CLASS, keyClass, WritableComparable.class); job.setClass(VALUE_CLASS, valueClass, Writable.class); }
From source file:com.ibm.bi.dml.runtime.matrix.sort.PickFromCompactInputFormat.java
License:Open Source License
@SuppressWarnings("rawtypes") public static void setKeyValueClasses(JobConf job, Class<? extends WritableComparable> keyClass, Class<? extends Writable> valueClass) { // job.setClass(KEY_CLASS, keyClass, WritableComparable.class); job.setClass(VALUE_CLASS, valueClass, Writable.class); }
From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java
License:Open Source License
public static void setTargetKeyValueClasses(JobConf job, Class<? extends WritableComparable> keyClass, Class<? extends Writable> valueClass) { job.setClass(TARGET_KEY_CLASS, keyClass, WritableComparable.class); job.setClass(TARGET_VALUE_CLASS, valueClass, Writable.class); }
From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java
License:Apache License
public void setSequential(JobConf conf) throws Exception { registerSerializers(conf);//from www .j a v a 2s . co m // For an expression, the location is the final file name Path outPath = new Path(location); FileSystem fs = outPath.getFileSystem(conf); outPath = outPath.makeQualified(fs); if (fs.exists(outPath)) { // TODO: Jaql currently has overwrite semantics; add flag to control this if (fs.isFile(outPath)) { fs.delete(outPath, false); } else { // Look for a map-reduce output directory FileStatus[] nonMR = fs.listStatus(outPath, new PathFilter() { boolean onlyOne = true; public boolean accept(Path path) { String name = path.getName(); if (name.matches("([.][.]?)|([.]part-[0-9]+.crc)|(part-[0-9]+)")) { return false; } if (onlyOne) { onlyOne = false; return true; } return false; } }); if (nonMR.length > 0) { throw new IOException( "directory exists and is not a map-reduce output directory: " + nonMR[0].getPath()); } fs.delete(outPath, true); } } // In sequential mode, we will write directly to the output file // and bypass the _temporary directory and rename of the standard // FileOutputCommitter by using our own DirectFileOutputCommitter. FileOutputFormat.setOutputPath(conf, outPath.getParent()); conf.setClass("mapred.output.committer.class", DirectFileOutputCommiter.class, OutputCommitter.class); }
From source file:com.ricemap.spateDB.operations.RangeQuery.java
License:Apache License
/** * Performs a range query using MapReduce * //from ww w. j a v a 2 s . com * @param fs * @param inputFile * @param queryRange * @param shape * @param output * @return * @throws IOException */ public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape, Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException { JobConf job = new JobConf(FileMBR.class); FileSystem outFs = inputFile.getFileSystem(job); Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path( inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000)); } while (outFs.exists(outputPath)); } else { if (outFs.exists(outputPath)) { if (overwrite) { outFs.delete(outputPath, true); } else { throw new RuntimeException("Output path already exists and -overwrite flag is not set"); } } } job.setJobName("RangeQuery"); job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); RangeFilter.setQueryRange(job, queryShape); // Set query range for // filter ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setNumReduceTasks(3); // Decide which map function to use depending on how blocks are indexed // And also which input format to use if (SpatialSite.isRTree(fs, inputFile)) { // RTree indexed file LOG.info("Searching an RTree indexed file"); job.setInputFormat(RTreeInputFormat.class); } else { // A file with no local index LOG.info("Searching a non local-indexed file"); job.setInputFormat(ShapeInputFormat.class); } GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile); // if (gIndex != null && gIndex.isReplicated()){ // job.setMapperClass(RangeQueryMap.class); Class<?> OutputKey = NullWritable.class; try { Class<?> c = shape.getClass(); Field f = c.getDeclaredField(query.field); f.setAccessible(true); if (f.getType().equals(Integer.TYPE)) { OutputKey = IntWritable.class; } else if (f.getType().equals(Double.TYPE)) { OutputKey = DoubleWritable.class; } else if (f.getType().equals(Long.TYPE)) { OutputKey = LongWritable.class; } } catch (SecurityException e) { e.printStackTrace(); } catch (NoSuchFieldException e) { // TODO Auto-generated catch block e.printStackTrace(); } job.setMapOutputKeyClass(OutputKey); switch (query.type) { case Distinct: job.setMapperClass(DistinctQueryMap.class); job.setReducerClass(DistinctQueryReduce.class); job.setMapOutputValueClass(NullWritable.class); break; case Distribution: job.setMapperClass(DistributionQueryMap.class); job.setReducerClass(DistributionQueryReduce.class); job.setMapOutputValueClass(IntWritable.class); break; default: break; } // } // else // job.setMapperClass(RangeQueryMapNoDupAvoidance.class); // Set query range for the map function job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName()); job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString()); job.set(QUERY_FIELD, query.field); // Set shape class for the SpatialInputFormat SpatialSite.setShapeClass(job, shape.getClass()); job.setOutputFormat(TextOutputFormat.class); ShapeInputFormat.setInputPaths(job, inputFile); TextOutputFormat.setOutputPath(job, outputPath); // Submit the job if (!background) { RunningJob runningJob = JobClient.runJob(job); Counters counters = runningJob.getCounters(); Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS); final long resultCount = outputRecordCounter.getValue(); // If outputPath not set by user, automatically delete it if (userOutputPath == null) outFs.delete(outputPath, true); return resultCount; } else { JobClient jc = new JobClient(job); lastRunningJob = jc.submitJob(job); return -1; } }