List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass
public void setMapOutputKeyClass(Class<?> theClass)
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { JobBuilder.printUsage(this, "<ncdc input> <station input> <output>"); return -1; }/* w w w .j a va 2 s.c o m*/ JobConf conf = new JobConf(getConf(), getClass()); conf.setJobName("Join record with station name"); Path ncdcInputPath = new Path(args[0]); Path stationInputPath = new Path(args[1]); Path outputPath = new Path(args[2]); MultipleInputs.addInputPath(conf, ncdcInputPath, TextInputFormat.class, JoinRecordMapper.class); MultipleInputs.addInputPath(conf, stationInputPath, TextInputFormat.class, JoinStationMapper.class); FileOutputFormat.setOutputPath(conf, outputPath); /*[*/conf.setPartitionerClass(KeyPartitioner.class); conf.setOutputValueGroupingComparator(TextPair.FirstComparator.class);/*]*/ conf.setMapOutputKeyClass(TextPair.class); conf.setReducerClass(JoinReducer.class); conf.setOutputKeyClass(Text.class); JobClient.runJob(conf); return 0; }
From source file:de.l3s.streamcorpus.mapreduce.TerrierIndexing.java
License:Mozilla Public License
/** Starts the MapReduce indexing. * @param args/*from ww w .ja va2 s . c om*/ * @throws Exception */ public int run(String[] args) throws Exception { long time = System.currentTimeMillis(); // For the moment: Hard-code the terrier home to quick test System.setProperty("terrier.home", "/home/tuan.tran/executable/StreamCorpusIndexer"); boolean docPartitioned = false; int numberOfReducers = Integer .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26")); final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing"); if (args.length == 2 && args[0].equals("-p")) { logger.debug("Document-partitioned Mode, " + numberOfReducers + " output indices."); numberOfReducers = Integer.parseInt(args[1]); docPartitioned = true; } else if (args.length == 1 && args[0].equals("--merge")) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); else logger.error("No point merging 1 reduce task output"); return 0; } else if (args.length == 0) { logger.debug("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index."); docPartitioned = false; if (numberOfReducers > MAX_REDUCE) { logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use " + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most"); } } /*else { logger.fatal(usage()); return 0; }*/ if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0], false) instanceof BitCompressionConfiguration)) { logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing" + " - you can recompress the inverted index later using IndexRecompressor"); return 0; } if (jf == null) throw new Exception("Could not get JobFactory from HadoopPlugin"); final JobConf conf = jf.newJob(); conf.setJarByClass(TerrierIndexing.class); conf.setJobName("StreamCorpusIndexer: Terrier Indexing"); if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH) && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return 0; } // boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING; boolean blockIndexing = true; if (blockIndexing) { conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class); conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class); } else { conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class); conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class); } FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH)); conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX); conf.setMapOutputKeyClass(SplitEmittedTerm.class); conf.setMapOutputValueClass(MapEmittedPostingList.class); conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned); if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); } else { conf.setCompressMapOutput(false); } conf.setInputFormat(MultiFileCollectionInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class); conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class); conf.setReduceSpeculativeExecution(false); //parse the collection.spec BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC); String line = null; List<Path> paths = new ArrayList<Path>(); while ((line = specBR.readLine()) != null) { if (line.startsWith("#")) continue; paths.add(new Path(line)); } specBR.close(); FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()])); // not sure if this is effective in YARN conf.setNumMapTasks(2000); // increase the heap usage conf.set("mapreduce.map.memory.mb", "6100"); conf.set("mapred.job.map.memory.mb", "6100"); conf.set("mapreduce.reduce.memory.mb", "6144"); conf.set("mapred.job.reduce.memory.mb", "6144"); conf.set("mapreduce.map.java.opts", "-Xmx6100m"); conf.set("mapred.map.child.java.opts", "-Xmx6100m"); conf.set("mapreduce.reduce.java.opts", "-Xmx6144m"); conf.set("mapred.reduce.child.opts", "-Xmx6144m"); //conf.setBoolean("mapred.used.genericoptionsparser", true) ; // This is the nasty thing in MapReduce v2 and YARN: They always prefer their ancient jars first. Set this on to say you don't like it conf.set("mapreduce.job.user.classpath.first", "true"); // increase the yarn memory to 10 GB conf.set("yarn.nodemanager.resource.memory-mb", "12288"); conf.set("yarn.nodemanager.resource.cpu-vcores", "16"); conf.set("yarn.scheduler.minimum-allocation-mb", "4096"); conf.setNumReduceTasks(numberOfReducers); if (numberOfReducers > 1) { if (docPartitioned) conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class); else conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); } else { //for JUnit tests, we seem to need to restore the original partitioner class conf.setPartitionerClass(HashPartitioner.class); } /*JobID jobId = null; boolean ranOK = true; try{ RunningJob rj = JobClient.runJob(conf); jobId = rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { logger.error("Problem running job", e); e.printStackTrace(); ranOK = false; } if (jobId != null) { deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId); } */ //if (ranOK) //{ System.out.println("Merging indices"); if (!docPartitioned) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); } Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH, docPartitioned ? numberOfReducers : 1, jf); //} System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); jf.close(); return 0; }
From source file:de.l3s.streamcorpus.StreamCorpusIndexing.java
License:Mozilla Public License
/** Starts the MapReduce indexing. * @param args//from w w w. ja v a2s .c om * @throws Exception */ public int run(String[] args) throws Exception { long time = System.currentTimeMillis(); // For the moment: Hard-code the terrier home to quick test System.setProperty("terrier.home", "/home/tuan.tran/executable/StreamCorpusIndexer"); boolean docPartitioned = false; int numberOfReducers = Integer .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26")); final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing"); if (args.length == 2 && args[0].equals("-p")) { logger.debug("Document-partitioned Mode, " + numberOfReducers + " output indices."); numberOfReducers = Integer.parseInt(args[1]); docPartitioned = true; } else if (args.length == 1 && args[0].equals("--merge")) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); else logger.error("No point merging 1 reduce task output"); return 0; } else if (args.length == 0) { logger.debug("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index."); docPartitioned = false; if (numberOfReducers > MAX_REDUCE) { logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use " + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most"); } } /*else { logger.fatal(usage()); return 0; }*/ if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0], false) instanceof BitCompressionConfiguration)) { logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing" + " - you can recompress the inverted index later using IndexRecompressor"); return 0; } if (jf == null) throw new Exception("Could not get JobFactory from HadoopPlugin"); final JobConf conf = jf.newJob(); conf.setJarByClass(StreamCorpusIndexing.class); conf.setJobName("StreamCorpusIndexer: Terrier Indexing"); if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH) && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return 0; } // boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING; boolean blockIndexing = true; if (blockIndexing) { conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class); conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class); } else { conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class); conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class); } FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH)); conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX); conf.setMapOutputKeyClass(SplitEmittedTerm.class); conf.setMapOutputValueClass(MapEmittedPostingList.class); conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned); if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); } else { conf.setCompressMapOutput(false); } conf.setInputFormat(MultiFileCollectionInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class); conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class); conf.setReduceSpeculativeExecution(false); //parse the collection.spec BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC); String line = null; List<Path> paths = new ArrayList<Path>(); while ((line = specBR.readLine()) != null) { if (line.startsWith("#")) continue; paths.add(new Path(line)); } specBR.close(); FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()])); // not sure if this is effective in YARN conf.setNumMapTasks(2000); // increase the heap usage conf.set("mapreduce.map.memory.mb", "6100"); conf.set("mapred.job.map.memory.mb", "6100"); conf.set("mapreduce.reduce.memory.mb", "6144"); conf.set("mapred.job.reduce.memory.mb", "6144"); conf.set("mapreduce.map.java.opts", "-Xmx6100m"); conf.set("mapred.map.child.java.opts", "-Xmx6100m"); conf.set("mapreduce.reduce.java.opts", "-Xmx6144m"); conf.set("mapred.reduce.child.opts", "-Xmx6144m"); //conf.setBoolean("mapred.used.genericoptionsparser", true) ; // This is the nasty thing in MapReduce v2 and YARN: They always prefer their ancient jars first. Set this on to say you don't like it conf.set("mapreduce.job.user.classpath.first", "true"); // increase the yarn memory to 10 GB conf.set("yarn.nodemanager.resource.memory-mb", "12288"); conf.set("yarn.nodemanager.resource.cpu-vcores", "16"); conf.set("yarn.scheduler.minimum-allocation-mb", "4096"); conf.setNumReduceTasks(numberOfReducers); if (numberOfReducers > 1) { if (docPartitioned) conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class); else conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); } else { //for JUnit tests, we seem to need to restore the original partitioner class conf.setPartitionerClass(HashPartitioner.class); } /*JobID jobId = null; boolean ranOK = true; try{ RunningJob rj = JobClient.runJob(conf); jobId = rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { logger.error("Problem running job", e); e.printStackTrace(); ranOK = false; } if (jobId != null) { deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId); } */ //if (ranOK) //{ System.out.println("Merging indices"); if (!docPartitioned) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); } Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH, docPartitioned ? numberOfReducers : 1, jf); //} System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); jf.close(); return 0; }
From source file:de.tudarmstadt.lt.n2n.hadoop.FlipJoBims.java
License:Apache License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FlipJoBims.class); /* begin necessary for UKP cluster */ conf.setMemoryForMapTask(1000L); // 1 GB /* necessary for UKP cdh3 */ conf.setMemoryForReduceTask(1000L); // 1 GB /* necessary for UKP cdh3 */ FileOutputFormat.setCompressOutput(conf, true); // compress output FileOutputFormat.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.BZip2Codec.class); /* use the bzip2 codec for compression */ conf.setCompressMapOutput(true); // compress mapper output /* end necessary for UKP cluster */ conf.setJobName(FlipJoBims.class.getSimpleName()); args = new GenericOptionsParser(conf, args).getRemainingArgs(); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(FlipJoBims.Map.class); conf.setNumReduceTasks(0);//from w ww . j a v a 2s.com // conf.setReducerClass(IdentityReducer.class); conf.setMapOutputKeyClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob4.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob4.class); conf.setJobName(GoogleSyntacticsJob4.class.getSimpleName()); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); // delete output path for testing purposes // FileSystem.get(conf).delete(new Path(args[1]), true); String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); if (extractorConfigurationFiles == null) { extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ','); System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n", SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles); }/*w w w . j av a 2 s. c om*/ String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf); conf.setMapperClass(GoogleSyntacticsJob4Mapper.class); conf.setInputFormat(TextInputFormat.class); conf.setMapOutputKeyClass(NullWritable.class); conf.setMapOutputValueClass(Text.class); conf.setNumReduceTasks(0); conf.setCombinerClass(IdentityReducer.class); JobClient.runJob(conf); return 0; }
From source file:dinocode.SpeciesGraphBuilder.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesDriver.class); conf.setJobName("Page-rank Species Graph Builder"); final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapperd.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(SpeciesGraphBuilderReducerd.class); //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); //conf.setInputPath(new Path("graph1")); //conf.setOutputPath(new Path("graph2")); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); client.setConf(conf);//from w ww . jav a2 s.c o m try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000"; for (int i = 0; i < 500; i++) { client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Iter"); int count = i + 1; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count; conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesIterMapper2d.class); conf.setReducerClass(SpeciesIterReducer2d.class); conf.setCombinerClass(SpeciesIterReducer2d.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = outFiles; } //Viewer client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Viewer"); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000"; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesViewerMapperd.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.ohsu.sonmezsysbio.cloudbreak.command.CommandNovoalignSingleEnds.java
public void runHadoopJob(Configuration configuration) throws IOException, URISyntaxException { JobConf conf = new JobConf(configuration); conf.setJobName("Single End Alignment"); conf.setJarByClass(Cloudbreak.class); FileInputFormat.addInputPath(conf, new Path(hdfsDataDir)); Path outputDir = new Path(hdfsAlignmentsDir); FileSystem.get(conf).delete(outputDir); FileOutputFormat.setOutputPath(conf, outputDir); addDistributedCacheFile(conf, reference, "novoalign.reference"); addDistributedCacheFile(conf, pathToNovoalign, "novoalign.executable"); if (pathToNovoalignLicense != null) { addDistributedCacheFile(conf, pathToNovoalignLicense, "novoalign.license"); }//from ww w . ja va2 s . c o m DistributedCache.createSymlink(conf); conf.set("mapred.task.timeout", "3600000"); conf.set("novoalign.threshold", threshold); conf.set("novoalign.quality.format", qualityFormat); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(NovoalignSingleEndMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.setReducerClass(SingleEndAlignmentsToPairsReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec"); JobClient.runJob(conf); }
From source file:edu.uci.ics.hyracks.imru.util.DataBalancer.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DataBalancer.class); job.setJobName(DataBalancer.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[2])); if (args.length > 3) { if (args[3].startsWith("bzip")) FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); if (args[3].startsWith("gz")) FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); }/*from w ww .j a v a 2 s . co m*/ JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.core.util.DataGenerator.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(DataGenerator.class); FileSystem dfs = FileSystem.get(job); String maxFile = "/maxtemp"; dfs.delete(new Path(maxFile), true); job.setJobName(DataGenerator.class.getSimpleName() + "max ID"); job.setMapperClass(MapMaxId.class); job.setCombinerClass(CombineMaxId.class); job.setReducerClass(ReduceMaxId.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(VLongWritable.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(maxFile)); job.setNumReduceTasks(1);//from ww w.java 2s. com JobClient.runJob(job); job = new JobConf(DataGenerator.class); job.set("hyracks.maxid.file", maxFile); job.setInt("hyracks.x", Integer.parseInt(args[2])); dfs.delete(new Path(args[1]), true); job.setJobName(DataGenerator.class.getSimpleName()); job.setMapperClass(MapRecordGen.class); job.setReducerClass(ReduceRecordGen.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setNumReduceTasks(Integer.parseInt(args[3])); if (args.length > 4) { if (args[4].startsWith("bzip")) FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class); if (args[4].startsWith("gz")) FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } JobClient.runJob(job); }
From source file:edu.uci.ics.pregelix.example.utils.CommonSource.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(GraphPreProcessor.class); job.setJobName(GraphPreProcessor.class.getSimpleName()); job.setMapperClass(MapRecordOnly.class); job.setReducerClass(ReduceRecordOnly.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setInputFormat(TextInputFormat.class); for (int i = 0; i < args.length - 2; i++) { FileInputFormat.addInputPath(job, new Path(args[i])); }// w w w. jav a 2 s . c o m FileOutputFormat.setOutputPath(job, new Path(args[args.length - 2])); job.setNumReduceTasks(Integer.parseInt(args[args.length - 1])); JobClient.runJob(job); }