List of usage examples for org.apache.hadoop.mapred JobConf setMapperClass
public void setMapperClass(Class<? extends Mapper> theClass)
From source file:edu.ucsb.cs.hybrid.HybridDriver.java
License:Apache License
/** * @param job : passed by reference to set its mapper class. *//*from www . j a v a 2s.co m*/ public static void setMapperAndRunner(JobConf job) { int numSplits = job.getInt(Config.NUMBER_SPLITS_PROPERTY, Config.NUMBER_SPLITS_VALUE); int PSSChoice = job.getInt(Config.BLOCK_CHOICE_PROPERTY, Config.BLOCK_CHOICE_VALUE);//1,2 String name = "PSS"; if (numSplits > 1) { //check can I set # splits for runner here? job.setMapRunnerClass(MultipleS_Runner.class); if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) { // threads testing job.setMapperClass(PSS1_Threaded_Mapper.class);// naming } else if (PSSChoice == 1) { name += "1"; job.setMapperClass(PSS1_Mapper.class); } else if (PSSChoice == 2) { name += "2"; job.setMapperClass(PSS2_Mapper.class);// MultipleS_Block1_Mapper } else ;//For future implementations } else { job.setMapRunnerClass(SingleS_Runner.class); if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) // threads throw new RuntimeException( "ERROR: Single S with multithreads! Set hybrid.threads.property to false."); if (PSSChoice == 1) { job.setMapperClass(PSS_Mapper.class); if (job.getBoolean(Config.BAYADRO_SKIP_PROPERTY, Config.BAYADRO_SKIP_VALUE)) { name += "/Bayardo_Dynamic_filter"; job.setMapperClass(PSS_Bayardo_Mapper.class);//PSS+Bayardo WWW'07 } } else if (PSSChoice == 2) { name += "2/SingleS"; job.setMapperClass(PSS2_SingleS_Mapper.class); } else job.setMapperClass(PSS3_SingleS_Mapper.class); //what is this? } job.setJobName(name); }
From source file:edu.ucsb.cs.hybrid.HybridDriver.java
License:Apache License
public static void IDMappingJob(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJarByClass(HybridDriver.class); job.setJobName("Converting binary similarity scores to text"); job.setMapperClass(IDMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0);/*from w w w . j a v a 2 s.c o m*/ job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Path inputPath = new Path(OUTPUT_DIR); job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); Path outputPath = new Path("SimilarityScores"); job.setOutputFormat(TextOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure JobSubmitter.run(job, "BINARY TO TEXT", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { JobConf job = new JobConf(); job.setJarByClass(MinHashLshDriver.class); job.setJobName(MinHashLshDriver.class.getSimpleName()); GenericOptionsParser gop = new GenericOptionsParser(job, args); args = gop.getRemainingArgs();/* w ww. ja v a 2 s . co m*/ job.setMapperClass(LshMapper.class); job.setMapOutputKeyClass(IntArrayWritable.class); // signatures job.setMapOutputValueClass(LongWritable.class); // doc IDs job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE)); job.setReducerClass(LshReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); String inputDir = args[0]; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input directory not set."); } FileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path("lsh-jaccard-buckets"); FileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024, job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE), job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); writeLsh(job, outputPath.getFileSystem(job), lshTable); JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java
License:Apache License
public static void main(JobConf job) throws IOException { int nBits/*D*/, nFeatures/*K*/, nReducers; job.setJobName(ProjectionsGenerator.class.getSimpleName()); FileSystem fs = FileSystem.get(job); nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); nFeatures = readCollectionFeatureCount(fs, job); setParameters(nBits, nFeatures);/*from ww w. jav a 2s . c o m*/ nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE); Path inputPath = new Path(INPUT_DIR); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath, true); if (fs.exists(inputPath)) fs.delete(inputPath, true); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"), IntWritable.class, IntWritable.class); for (int i = 0; i < nReducers; i++) writer.append(new IntWritable(i), new IntWritable(i)); writer.close(); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setNumMapTasks(1); job.setNumReduceTasks(nReducers); job.setMapperClass(IdentityMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ProjectionReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(RandomVector.class); JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.projection.SignaturesGenerator.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf job = new JobConf(SignaturesGenerator.class); new GenericOptionsParser(job, args); job.setJobName(SignaturesGenerator.class.getSimpleName()); int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); setParameters();/*from w ww . j av a 2s .co m*/ FileSystem fs = FileSystem.get(job); prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR)); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath); FileInputFormat.setInputPaths(job, INPUT_DIR); // Path(INPUT_DIR)); FileOutputFormat.setOutputPath(job, outputPath); // FileOutputFormat.setCompressOutput(job, false); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setInt("mapred.task.timeout", 6000000); job.setMapperClass(SigMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BitSignature.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BitSignature.class); JobSubmitter.run(job, "LSH", -1); }
From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java
License:Apache License
/** * Sets MapReduce input configurations for the core cosine partitioning job. *//*www .j a v a 2 s . co m*/ public static JobConf setMapReduce(JobConf job, Class mapper, Class reducer) { job.setMapperClass(mapper); job.setMapOutputKeyClass(IntIntWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setNumReduceTasks(job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE)); job.setReducerClass(reducer); job.setOutputKeyClass(IntIntWritable.class); job.setOutputValueClass(IdFeatureWeightArrayWritable.class); return job; }
From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { JobConf job = new JobConf(); job.setJarByClass(LshPartitionMain.class); job.setJobName(LshPartitionMain.class.getSimpleName()); GenericOptionsParser gop = new GenericOptionsParser(job, args); args = gop.getRemainingArgs();/*from ww w .j av a 2 s .c o m*/ job.setMapperClass(LshMapper.class); job.setMapOutputKeyClass(IntArrayWritable.class); // signatures job.setMapOutputValueClass(LongWritable.class); // doc IDs job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE)); job.setReducerClass(LshReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); String inputDir = args[0]; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input directory not set."); } FileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path("lsh-jaccard-buckets"); FileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024, job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE), job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); writeLsh(job, outputPath.getFileSystem(job), lshTable); run(job); }
From source file:edu.ucsb.cs.preprocessing.sequence.SeqWriter.java
License:Apache License
/** * Runs a MR job with maps only to convert input directory of numeric valued * records to hadoop sequence format. It assumes a text input of format of * [id feature weight ..] to be the format of input. *//* www .j a va 2 s .co m*/ public static void writeSequence() throws IOException { JobConf job = new JobConf(); job.setJobName("Convert text vectors to hadoop seqeunce "); job.setJarByClass(SeqWriter.class); job.setMapperClass(SeqMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(FeatureWeightArrayWritable.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); job.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(INPUT_DIR)); FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true); Path outputPath = new Path(OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); JobSubmitter.run(job, "PREPROCESS", -1); }
From source file:edu.ucsb.cs.sort.length.LengthSortMain.java
License:Apache License
/** * Sets the job configurations including the mapper and reducer classes to * do the sorting based on vector lengths. *///from w ww .ja va 2s . c om public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(LengthSortMain.class.getSimpleName()); job.setJarByClass(LengthSortMain.class); job.setMapperClass(LengthSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(LengthRangePartitioner.class); job.setReducerClass(LengthSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By Vector Lenghts", -1); }
From source file:edu.ucsb.cs.sort.maxw.MaxwSortMain.java
License:Apache License
/** * Main method sets the job configurations including the mapper and reducer * classes to do the sorting./* w w w. j a v a 2 s.co m*/ */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); // ToolRunner.printGenericCommandUsage(System.out); job.setJobName(MaxwSortMain.class.getSimpleName()); job.setJarByClass(MaxwSortMain.class); job.setMapperClass(MaxwSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(MaxwRangePartitioner.class); job.setReducerClass(MaxwSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By infinity-Norm", -1); }