List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass)
From source file:edu.uci.ics.pregelix.example.utils.DuplicateGraph.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DuplicateGraph.class); job.setJobName(DuplicateGraph.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0);/*w w w . ja v a 2s.c o m*/ JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.FilterCount.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(GraphPreProcessor.class); job.setJobName(FilterCount.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); job.setInputFormat(TextInputFormat.class); for (int i = 0; i < args.length - 1; i++) { FileInputFormat.addInputPath(job, new Path(args[i])); }//from w w w.j a va 2 s. c o m FileOutputFormat.setOutputPath(job, new Path(args[args.length - 1])); job.setNumReduceTasks(0); JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.FindLargest.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(GraphPreProcessor.class); job.setJobName(GraphPreProcessor.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setCombinerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setInputFormat(TextInputFormat.class); for (int i = 0; i < args.length - 2; i++) { FileInputFormat.addInputPath(job, new Path(args[i])); }// w ww . ja v a 2 s.c o m FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2])); job.setNumReduceTasks(Integer.parseInt(args[args.length - 1])); JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.GraphPreProcessor.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(GraphPreProcessor.class); job.setJobName(GraphPreProcessor.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(0);//w ww .j av a 2s. co m JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.VertexAggregator.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(VertexAggregator.class); job.setJobName(VertexAggregator.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setCombinerClass(CombineRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[2])); JobClient.runJob(job);/*from w ww .j a v a2 s. c o m*/ }
From source file:edu.uci.ics.pregelix.example.utils.VertexSorter.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(VertexSorter.class); job.setJobName(VertexSorter.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[2])); JobClient.runJob(job);/*ww w .j a va 2 s . co m*/ }
From source file:edu.ucsb.cs.hybrid.HybridDriver.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { // job.set("mapred.job.tracker", "local"); // job.set("fs.default.name", "file:///"); JobConf job = new JobConf(); job.setJarByClass(HybridDriver.class); new GenericOptionsParser(job, args); setMapperAndRunner(job);/*from w ww. j av a 2 s . c om*/ job.setMapOutputKeyClass(DocDocWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setNumReduceTasks(0); job.setOutputKeyClass(DocDocWritable.class); job.setOutputValueClass(FloatWritable.class); Path inputPath = new Path(INPUT_DIR); CustomSequenceFileInputFormat.addInputPath(job, inputPath); Path outputPath = new Path(OUTPUT_DIR); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) { job.setInputFormat(CustomSequenceFileInputFormat.class); Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024; job.setLong("mapred.min.split.size", splitMB); job.setLong("mapred.max.split.size", splitMB); job.setLong("dfs.block.size", splitMB); } else { // Comment the following of splitter for www experiments it assumes no splitting // of partitions for load balancing, should be fixed. Splitter.configure(job, inputPath);// remove comment unless for www job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment } //SIGIR'14 two-stage balancing //not yet fully incorporated if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) { TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE), new Path(PartDriver.OUTPUT_DIR), job); } JobSubmitter.run(job, "SIMILARITY", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); if (job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE)) IDMappingJob(args); }
From source file:edu.ucsb.cs.hybrid.HybridDriver.java
License:Apache License
public static void IDMappingJob(String[] args) throws IOException { JobConf job = new JobConf(); new GenericOptionsParser(job, args); job.setJarByClass(HybridDriver.class); job.setJobName("Converting binary similarity scores to text"); job.setMapperClass(IDMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0);// w w w .j a va 2 s. c o m job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Path inputPath = new Path(OUTPUT_DIR); job.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); Path outputPath = new Path("SimilarityScores"); job.setOutputFormat(TextOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure JobSubmitter.run(job, "BINARY TO TEXT", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java
License:Apache License
public static void main(String args[]) throws ParseException, IOException { JobConf job = new JobConf(); job.setJarByClass(MinHashLshDriver.class); job.setJobName(MinHashLshDriver.class.getSimpleName()); GenericOptionsParser gop = new GenericOptionsParser(job, args); args = gop.getRemainingArgs();/* ww w . j a v a2 s . co m*/ job.setMapperClass(LshMapper.class); job.setMapOutputKeyClass(IntArrayWritable.class); // signatures job.setMapOutputValueClass(LongWritable.class); // doc IDs job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE)); job.setReducerClass(LshReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); String inputDir = args[0]; if (inputDir == null) { throw new UnsupportedOperationException("ERROR: input directory not set."); } FileInputFormat.addInputPath(job, new Path(inputDir)); Path outputPath = new Path("lsh-jaccard-buckets"); FileOutputFormat.setOutputPath(job, outputPath); FileSystem.get(job).delete(outputPath, true); LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024, job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE), job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); writeLsh(job, outputPath.getFileSystem(job), lshTable); JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE)); }
From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java
License:Apache License
public static void main(JobConf job) throws IOException { int nBits/*D*/, nFeatures/*K*/, nReducers; job.setJobName(ProjectionsGenerator.class.getSimpleName()); FileSystem fs = FileSystem.get(job); nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); nFeatures = readCollectionFeatureCount(fs, job); setParameters(nBits, nFeatures);//from www . j av a2s .c o m nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE); Path inputPath = new Path(INPUT_DIR); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath, true); if (fs.exists(inputPath)) fs.delete(inputPath, true); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"), IntWritable.class, IntWritable.class); for (int i = 0; i < nReducers; i++) writer.append(new IntWritable(i), new IntWritable(i)); writer.close(); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setNumMapTasks(1); job.setNumReduceTasks(nReducers); job.setMapperClass(IdentityMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ProjectionReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(RandomVector.class); JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }