List of usage examples for org.apache.hadoop.mapred JobConf setJobName
public void setJobName(String name)
From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java
License:Apache License
public static void main(JobConf job) throws IOException { int nBits/*D*/, nFeatures/*K*/, nReducers; job.setJobName(ProjectionsGenerator.class.getSimpleName()); FileSystem fs = FileSystem.get(job); nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); nFeatures = readCollectionFeatureCount(fs, job); setParameters(nBits, nFeatures);/*from w w w .ja v a2s . co m*/ nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE); Path inputPath = new Path(INPUT_DIR); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath, true); if (fs.exists(inputPath)) fs.delete(inputPath, true); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"), IntWritable.class, IntWritable.class); for (int i = 0; i < nReducers; i++) writer.append(new IntWritable(i), new IntWritable(i)); writer.close(); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setNumMapTasks(1); job.setNumReduceTasks(nReducers); job.setMapperClass(IdentityMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ProjectionReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(RandomVector.class); JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.projection.SignaturesGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf job = new JobConf(SignaturesGenerator.class); new GenericOptionsParser(job, args); job.setJobName(SignaturesGenerator.class.getSimpleName()); int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); setParameters();//w w w .j av a 2s . c om FileSystem fs = FileSystem.get(job); prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR)); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath); FileInputFormat.setInputPaths(job, INPUT_DIR); // Path(INPUT_DIR)); FileOutputFormat.setOutputPath(job, outputPath); // FileOutputFormat.setCompressOutput(job, false); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setInt("mapred.task.timeout", 6000000); job.setMapperClass(SigMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BitSignature.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BitSignature.class); JobSubmitter.run(job, "LSH", -1); }
From source file:edu.ucsb.cs.partitioning.cosine.CosineAllPartitionMain.java
License:Apache License
/** * Job3: Core Cosine partitioning with skipping based on partition maximum * vectors length, size and weight./* w w w. j a v a 2s .co m*/ */ public static JobConf runCosinePartition(JobConf job, String[] args) throws IOException { new GenericOptionsParser(job, args); job.setJobName(Partitioner.class.getSimpleName() + " + " + CosineAllPartitionMain.class.getSimpleName()); job.setJarByClass(CosineAllPartitionMain.class); job = setMapReduce(job, CosineAllPartMapper.class, IdentityReducer.class); job = setInputOutput(job, new Path(Partitioner.OUTPUT_DIR), interPath); JobSubmitter.run(job, "Cosine Partitioning", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); FileSystem.get(job).delete(new Path(Partitioner.OUTPUT_DIR), true); return job; }
From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java
License:Apache License
/** * Job3: Core Cosine partitioning on sorted weights vectors. *///from w w w. j a v a2 s . co m public static void runCosinePartition(JobConf job, String[] args, Class jobSpawner, Class mapper) throws IOException { new GenericOptionsParser(job, args); job.setJobName(Partitioner.class.getSimpleName() + " + " + jobSpawner.getSimpleName()); job = setMapReduce(job, mapper, IdentityReducer.class); job = setInputOutput(job, new Path(Partitioner.OUTPUT_DIR), interPath); JobSubmitter.run(job, "Cosine Partitioning", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); // FileSystem.get(job).delete(new Path(Partitioner.OUTPUT_DIR), true); }
From source file:edu.ucsb.cs.partitioning.jaccard.JaccardCoarsePartitionMain.java
License:Apache License
public static void main(String[] args) throws IOException { runSort(args, "lengthsort"); JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(JaccardCoarsePartitionMain.class.getSimpleName()); job.setJarByClass(JaccardCoarsePartitionMain.class); ///*from ww w. j a v a2s .c o m*/ // set input & output & threshold & numPartitions // String inputDir = PartDriver.INPUT_DIR; //String inputDir = SortDriver.OUTPUT_DIR; FileSystem.get(job).delete(new Path(PartDriver.OUTPUT_DIR), true); float threshold = job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE); int nPartitions = job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE); // // run regular java program // System.out.println(JobSubmitter.stars() + "\n Running Sequential Job: jaccard coarse 1D partitioning " + "\n Threshold: " + threshold); FileSystem hdfs = produceStaticParitions(inputDir, PartDriver.OUTPUT_DIR, nPartitions); produceSkipList(true, threshold, nPartitions, hdfs, job); Collector.printJaccardStatistics(job, PartDriver.OUTPUT_DIR); }
From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { JobConf job = new JobConf(); job.setJarByClass(LshPartitionMain.class); job.setJobName(LshPartitionMain.class.getSimpleName()); GenericOptionsParser gop = new GenericOptionsParser(job, args); args = gop.getRemainingArgs();/*from w w w. j av a 2s . c o m*/ job.setMapperClass(LshMapper.class); job.setMapOutputKeyClass(IntArrayWritable.class); // signatures job.setMapOutputValueClass(LongWritable.class); // doc IDs job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE)); job.setReducerClass(LshReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); String inputDir = args[0]; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input directory not set."); } FileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path("lsh-jaccard-buckets"); FileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024, job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE), job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); writeLsh(job, outputPath.getFileSystem(job), lshTable); run(job); }
From source file:edu.ucsb.cs.preprocessing.sequence.SeqWriter.java
License:Apache License
/** * Runs a MR job with maps only to convert input directory of numeric valued * records to hadoop sequence format. It assumes a text input of format of * [id feature weight ..] to be the format of input. *//* w ww .j a va2s. c o m*/ public static void writeSequence() throws IOException { JobConf job = new JobConf(); job.setJobName("Convert text vectors to hadoop seqeunce "); job.setJarByClass(SeqWriter.class); job.setMapperClass(SeqMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(FeatureWeightArrayWritable.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); job.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(INPUT_DIR)); FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true); Path outputPath = new Path(OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); JobSubmitter.run(job, "PREPROCESS", -1); }
From source file:edu.ucsb.cs.sort.length.LengthSortMain.java
License:Apache License
/** * Sets the job configurations including the mapper and reducer classes to * do the sorting based on vector lengths. *//*from www. java2 s. c o m*/ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(LengthSortMain.class.getSimpleName()); job.setJarByClass(LengthSortMain.class); job.setMapperClass(LengthSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(LengthRangePartitioner.class); job.setReducerClass(LengthSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By Vector Lenghts", -1); }
From source file:edu.ucsb.cs.sort.maxw.MaxwSortMain.java
License:Apache License
/** * Main method sets the job configurations including the mapper and reducer * classes to do the sorting./* w w w. java 2 s. c om*/ */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); // ToolRunner.printGenericCommandUsage(System.out); job.setJobName(MaxwSortMain.class.getSimpleName()); job.setJarByClass(MaxwSortMain.class); job.setMapperClass(MaxwSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(MaxwRangePartitioner.class); job.setReducerClass(MaxwSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By infinity-Norm", -1); }
From source file:edu.ucsb.cs.sort.norm.NormSortMain.java
License:Apache License
/** * Main method sets the job configurations including the mapper and reducer * classes to do the sorting. Some of the produced partitions might be * merged later to reflect the number of partitions chosen by the user. */// www . j a va 2 s .co m public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName("NormSort"); job.setJarByClass(NormSortMain.class); job.setMapperClass(NormSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(NormRangePartitioner.class); job.setReducerClass(NormSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By p-norm", -1); }