List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:edu.ucsb.cs.partitioning.jaccard.JaccardCoarsePartitionMain.java
License:Apache License
public static void main(String[] args) throws IOException { runSort(args, "lengthsort"); JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(JaccardCoarsePartitionMain.class.getSimpleName()); job.setJarByClass(JaccardCoarsePartitionMain.class); ///* ww w . j a va2s. co m*/ // set input & output & threshold & numPartitions // String inputDir = PartDriver.INPUT_DIR; //String inputDir = SortDriver.OUTPUT_DIR; FileSystem.get(job).delete(new Path(PartDriver.OUTPUT_DIR), true); float threshold = job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE); int nPartitions = job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE); // // run regular java program // System.out.println(JobSubmitter.stars() + "\n Running Sequential Job: jaccard coarse 1D partitioning " + "\n Threshold: " + threshold); FileSystem hdfs = produceStaticParitions(inputDir, PartDriver.OUTPUT_DIR, nPartitions); produceSkipList(true, threshold, nPartitions, hdfs, job); Collector.printJaccardStatistics(job, PartDriver.OUTPUT_DIR); }
From source file:edu.ucsb.cs.partitioning.lsh.LshMapper.java
License:Apache License
@Override public void configure(JobConf job) { l = job.getInt(LshPartitionMain.L_PROPERTY, LshPartitionMain.L_VALUE); try {/*ww w .j a v a 2 s . co m*/ Path[] localFiles = DistributedCache.getLocalCacheFiles(job); // System.out.println("local:" + localFiles[0].getName()); // FileSystem fs = localFiles[0].getFileSystem(job); FileSystem fs = FileSystem.get(job); // Reader reader = new SequenceFile.Reader(fs, localFiles[0], job); Reader reader = new SequenceFile.Reader(fs, new Path("lshfile"), job); reader.next(lsh); reader.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { JobConf job = new JobConf(); job.setJarByClass(LshPartitionMain.class); job.setJobName(LshPartitionMain.class.getSimpleName()); GenericOptionsParser gop = new GenericOptionsParser(job, args); args = gop.getRemainingArgs();/*w w w . j a v a2 s .com*/ job.setMapperClass(LshMapper.class); job.setMapOutputKeyClass(IntArrayWritable.class); // signatures job.setMapOutputValueClass(LongWritable.class); // doc IDs job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE)); job.setReducerClass(LshReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); String inputDir = args[0]; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input directory not set."); } FileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path("lsh-jaccard-buckets"); FileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024, job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE), job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); writeLsh(job, outputPath.getFileSystem(job), lshTable); run(job); }
From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java
License:Apache License
public static void run(JobConf job) throws IOException { String ret = stars() + "\n Running job: " + job.getJobName() + "\n Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; }//from w ww.j av a 2 s . com ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Threshold: " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n k: " + job.getInt(K_PROPERTY, K_VALUE) + "\n l: " + job.getInt(L_PROPERTY, L_VALUE); System.out.println(ret); JobClient.runJob(job); }
From source file:edu.ucsb.cs.sort.length.LengthSortMain.java
License:Apache License
/** * Sets the job configurations including the mapper and reducer classes to * do the sorting based on vector lengths. *///from ww w .ja va2 s .c o m public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(LengthSortMain.class.getSimpleName()); job.setJarByClass(LengthSortMain.class); job.setMapperClass(LengthSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(LengthRangePartitioner.class); job.setReducerClass(LengthSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By Vector Lenghts", -1); }
From source file:edu.ucsb.cs.sort.maxw.MaxwSortMain.java
License:Apache License
/** * Main method sets the job configurations including the mapper and reducer * classes to do the sorting./*from w ww.java2 s . c o m*/ */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); // ToolRunner.printGenericCommandUsage(System.out); job.setJobName(MaxwSortMain.class.getSimpleName()); job.setJarByClass(MaxwSortMain.class); job.setMapperClass(MaxwSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(MaxwRangePartitioner.class); job.setReducerClass(MaxwSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By infinity-Norm", -1); }
From source file:edu.ucsb.cs.sort.norm.NormSortMain.java
License:Apache License
/** * Main method sets the job configurations including the mapper and reducer * classes to do the sorting. Some of the produced partitions might be * merged later to reflect the number of partitions chosen by the user. *///w ww . j a v a 2 s. c om public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName("NormSort"); job.setJarByClass(NormSortMain.class); job.setMapperClass(NormSortMapper.class); job.setMapOutputKeyClass(FloatWritable.class); job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class); job.setPartitionerClass(NormRangePartitioner.class); job.setReducerClass(NormSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(FeatureWeightArrayWritable.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(SortDriver.OUTPUT_DIR); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By p-norm", -1); }
From source file:edu.ucsb.cs.sort.signature.SigSortMain.java
License:Apache License
/** * Sets the job configurations including the mapper and reducer classes to * do the sorting based signatures./* ww w . j a v a 2 s. c om*/ */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJobName(SigSortMain.class.getSimpleName()); job.setJarByClass(SigSortMain.class); job.setMapperClass(SigSortMapper.class); job.setMapOutputKeyClass(BitSignature.class); job.setMapOutputValueClass(LongWritable.class); job.setPartitionerClass(SigRangePartitioner.class); job.setReducerClass(SigSortReducer.class); job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BitSignature.class); // // set input & output // String inputDir = SortDriver.INPUT_DIR; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input path not set"); } job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path(OUTPUT_PATH); FileSystem.get(job).delete(outputPath, true); job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // // run // JobSubmitter.run(job, "Sort By Signature Bytes", -1); }
From source file:edu.umd.cloud9.pagerank.RangePartitioner.java
License:Apache License
public void configure(JobConf job) { mNodeCnt = job.getInt("NodeCount", 0); }
From source file:edu.umd.cloud9.webgraph.BuildReverseWebGraph.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), BuildReverseWebGraph.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); conf.setJobName("ReverseWebGraph"); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInt("mapred.task.timeout", 60000000); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); conf.set("mapreduce.task.timeout", "60000000"); conf.setNumMapTasks(numMappers);//from w ww .ja v a 2s .c o m conf.setNumReduceTasks(numReducers); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(ArrayListWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); LOG.info("BuildReverseWebGraph"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); if (!fs.exists(new Path(outputPath))) { JobClient.runJob(conf); } else { LOG.info(outputPath + " already exists! Skipping this step..."); } return 0; }