List of usage examples for org.apache.hadoop.mapred JobConf setJarByClass
public void setJarByClass(Class cls)
From source file:edu.ucsb.cs.sort.length.LengthSortMain.java
License:Apache License
/** * Sets the job configurations including the mapper and reducer classes to * do the sorting based on vector lengths. *//* www .ja v a2 s. c o m*/ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(LengthSortMain.class.getSimpleName()); job.setJarByClass(LengthSortMain.class); job.setMapperClass(LengthSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(LengthRangePartitioner.class); job.setReducerClass(LengthSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By Vector Lenghts", -1); }
From source file:edu.ucsb.cs.sort.maxw.MaxwSortMain.java
License:Apache License
/** * Main method sets the job configurations including the mapper and reducer * classes to do the sorting.// ww w . ja v a 2s . c o m */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); // ToolRunner.printGenericCommandUsage(System.out); job.setJobName(MaxwSortMain.class.getSimpleName()); job.setJarByClass(MaxwSortMain.class); job.setMapperClass(MaxwSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(MaxwRangePartitioner.class); job.setReducerClass(MaxwSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By infinity-Norm", -1); }
From source file:edu.ucsb.cs.sort.norm.NormSortMain.java
License:Apache License
/** * Main method sets the job configurations including the mapper and reducer * classes to do the sorting. Some of the produced partitions might be * merged later to reflect the number of partitions chosen by the user. *//* w w w. java 2 s .c o m*/ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName("NormSort"); job.setJarByClass(NormSortMain.class); job.setMapperClass(NormSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(NormRangePartitioner.class); job.setReducerClass(NormSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By p-norm", -1); }
From source file:edu.ucsb.cs.sort.signature.SigSortMain.java
License:Apache License
/** * Sets the job configurations including the mapper and reducer classes to * do the sorting based signatures./* w w w .j av a 2 s . c o m*/ */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(SigSortMain.class.getSimpleName()); job.setJarByClass(SigSortMain.class); job.setMapperClass(SigSortMapper.class); job.setMapOutputKeyClass(BitSignature.class); job.setMapOutputValueClass(LongWritable.class); job.setPartitionerClass(SigRangePartitioner.class); job.setReducerClass(SigSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BitSignature.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(OUTPUT_PATH); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By Signature Bytes", -1); }
From source file:edu.umd.cloud9.webgraph.driver.BuildIndexableAnchorCollection.java
License:Apache License
/** * Runs this tool./*from w w w . j a v a2s . c o m*/ */ public int run(String[] args) throws Exception { if (args.length < 5) { printUsage(); return -1; } JobConf conf = new JobConf(getConf()); FileSystem fs = FileSystem.get(conf); String collectionPath = DriverUtil.argValue(args, DriverUtil.CL_INPUT); String outputPath = DriverUtil.argValue(args, DriverUtil.CL_OUTPUT); String docnoMappingClass = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING_CLASS); String docnoMapping = DriverUtil.argValue(args, DriverUtil.CL_DOCNO_MAPPING); int numReducers = Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_NUMBER_OF_REDUCERS)); if (DriverUtil.argExists(args, DriverUtil.CL_MAX_LENGTH)) { conf.setInt("Cloud9.maxContentLength", Integer.parseInt(DriverUtil.argValue(args, DriverUtil.CL_MAX_LENGTH))); } conf.set("Cloud9.DocnoMappingClass", docnoMappingClass); LOG.info("Tool name: BuildAnchorTextForwardIndex"); LOG.info(" - collection path: " + collectionPath); LOG.info(" - output path: " + outputPath); LOG.info(" - docno-mapping class: " + docnoMappingClass); LOG.info(" - docno-mapping file: " + docnoMapping); if (args.length == 6) { LOG.info(" - maximum content length: " + conf.getInt("Cloud9.maxContentLength", 0)); } conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setJobName("BuildIndexableAnchorCollection"); conf.setJarByClass(BuildIndexableAnchorCollection.class); conf.setNumMapTasks(100); conf.setNumReduceTasks(numReducers); DistributedCache.addCacheFile(new URI(docnoMapping), conf); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, new Path(collectionPath)); SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IndexableAnchorText.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(IdentityReducer.class); // delete the output directory if it exists already fs.delete(new Path(outputPath), true); RunningJob job = JobClient.runJob(conf); return 0; }
From source file:eu.stratosphere.myriad.driver.hadoop.MyriadDriverHadoopJob.java
License:Apache License
private JobConf createJobConf() { // create job JobConf conf = new JobConf(getConf()); conf.setJarByClass(MyriadDriverHadoopJob.class); conf.setJobName(String.format("%s", this.parameters.getDGenName())); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); conf.setNumReduceTasks(0);// w w w . ja v a 2s. c o m conf.setInputFormat(MyriadInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // input format configuration MyriadInputFormat.setDriverJobParameters(conf, this.parameters); // output format configuration FileOutputFormat.setOutputPath(conf, new Path(this.parameters.getJobOutputPath())); return conf; }
From source file:findstableweatherstate.FindStableWeatherState.java
public String call() throws Exception { Path firstOutputPath = new Path("input/firstOutput"); Path secondOutputPath = new Path("input/secondOutput"); long startTime, stopTime, elapsedTime; JobConf job = new JobConf(); job.setJarByClass(getClass()); job.setJobName("invertedindex"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setReducerClass(JoinReducer.class); MultipleInputs.addInputPath(job, new Path(getInputPathStation()), TextInputFormat.class, StationMapper.class); MultipleInputs.addInputPath(job, new Path(getInputPathReadings()), TextInputFormat.class, ReadingsMapper.class); FileOutputFormat.setOutputPath(job, firstOutputPath); JobConf job2 = new JobConf(); job2.setJarByClass(getClass());//w w w.j a v a 2 s.c o m job2.setJobName("secondJob"); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); //job2.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); FileInputFormat.setInputPaths(job2, firstOutputPath); job2.setMapperClass(CalculateMinMaxTemperatureMapper.class); job2.setReducerClass(CalculateMaxMinTemperatureReducer.class); if (getOutputPath() != null) { FileOutputFormat.setOutputPath(job2, secondOutputPath); } JobConf job3 = new JobConf(); job3.setJarByClass(getClass()); job3.setJobName("thirdJob"); job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(Text.class); job3.setMapOutputKeyClass(DoubleWritable.class); job3.setMapOutputValueClass(Text.class); //job2.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); FileInputFormat.setInputPaths(job3, secondOutputPath); job3.setMapperClass(SortStateMapper.class); job3.setReducerClass(SortStateReducer.class); if (getOutputPath() != null) { FileOutputFormat.setOutputPath(job3, new Path(getOutputPath())); } startTime = System.currentTimeMillis(); JobClient.runJob(job); stopTime = System.currentTimeMillis(); elapsedTime = stopTime - startTime; System.out.println("******************** First Job : " + elapsedTime / 1000); startTime = System.currentTimeMillis(); JobClient.runJob(job2); stopTime = System.currentTimeMillis(); elapsedTime = stopTime - startTime; System.out.println("******************** Second Job : " + elapsedTime / 1000); startTime = System.currentTimeMillis(); JobClient.runJob(job3); stopTime = System.currentTimeMillis(); elapsedTime = stopTime - startTime; System.out.println("******************** Third Job : " + elapsedTime / 1000); return ""; }
From source file:FormatStorage1.MergeFileUtil.java
License:Open Source License
public static void run(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil.class); FileSystem fs = null;//ww w .jav a 2 s . c om fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setInputFormat(CombineFormatStorageFileInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil.java
License:Open Source License
public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil.class); FileSystem fs = null;/*from w w w . j a v a 2 s. c o m*/ fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } for (FileStatus status : fss) { if (status.isDir()) { throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString()); } } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } long fl = 512 * 1024 * 1024; int reduces = (int) (wholesize / fl + 1); job.setNumReduceTasks(reduces); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setInputFormat(MergeIFormatInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil1.java
License:Open Source License
public static void run(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil1"); job.setJarByClass(MergeFileUtil1.class); FileSystem fs = null;//from w w w . j a v a2 s .co m fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setInputFormat(CombineFormatStorageFileInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat1.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }