List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:edu.isi.mavuno.app.mine.HarvestSentences.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String patternPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.PatternPath", conf); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.CorpusClass", conf); String outputPath = MavunoUtils.getRequiredParam("Mavuno.HarvestSentences.OutputPath", conf); sLogger.info("Tool name: HarvestSentences"); sLogger.info(" - Pattern file: " + patternPath); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Output path: " + outputPath); Job job = new Job(conf); job.setJobName("HarvestSentences"); MavunoUtils.recursivelyAddInputPaths(job, corpusPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass)); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.waitForCompletion(true);/*from w ww . j a v a2s. com*/ return 0; }
From source file:edu.isi.mavuno.app.nlp.ProcessStanfordNLP.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); // required parameters String corpusPath = MavunoUtils.getRequiredParam("Mavuno.ProcessStanfordNLP.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.ProcessStanfordNLP.CorpusClass", conf); String outputPath = MavunoUtils.getRequiredParam("Mavuno.ProcessStanfordNLP.OutputPath", conf); // optional parameters String suTime = MavunoUtils.getOptionalParam("Mavuno.ProcessStanfordNLP.UseSUTime", conf); String textOutput = MavunoUtils.getOptionalParam("Mavuno.ProcessStanfordNLP.TextOutputFormat", conf); sLogger.info("Tool name: ProcessStanfordNLP"); sLogger.info(" - Input path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Output path: " + outputPath); if (suTime != null && Boolean.parseBoolean(suTime)) { sLogger.info("- SUTime enabled"); }/*from w w w. jav a 2s .c o m*/ boolean textOutputFormat = false; if (textOutput != null && Boolean.parseBoolean(textOutput)) { sLogger.info("- Text output format enabled"); textOutputFormat = true; } Job job = new Job(conf); job.setJobName("ProcessStanfordNLP"); MavunoUtils.recursivelyAddInputPaths(job, corpusPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass)); // output format -- either plain text or sequencefile (default) if (textOutputFormat) { job.setOutputFormatClass(TextOutputFormat.class); } else { job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); } job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(StanfordParsedDocument.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(StanfordParsedDocument.class); job.setMapperClass(MyMapper.class); job.setJarByClass(ProcessStanfordNLP.class); // no reducers needed job.setNumReduceTasks(0); // run job job.waitForCompletion(true); // print job statistics Counters counters = job.getCounters(); sLogger.info(" - Total documents: " + counters.findCounter(MyCounters.TOTAL_DOCUMENTS).getValue()); sLogger.info(" - Total sentences: " + counters.findCounter(MyCounters.TOTAL_SENTENCES).getValue()); sLogger.info(" - Total tokens: " + counters.findCounter(MyCounters.TOTAL_TOKENS).getValue()); return 0; }
From source file:edu.isi.mavuno.app.nlp.TratzParse.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String corpusPath = MavunoUtils.getRequiredParam("Mavuno.Parse.CorpusPath", conf); String corpusClass = MavunoUtils.getRequiredParam("Mavuno.Parse.CorpusClass", conf); String outputPath = MavunoUtils.getRequiredParam("Mavuno.Parse.OutputPath", conf); // optional parameter that allows the parsed documents to be output in text format String textOutput = MavunoUtils.getOptionalParam("Mavuno.Parse.TextOutputFormat", conf); boolean textOutputFormat = false; if (textOutput != null && Boolean.parseBoolean(textOutput)) { textOutputFormat = true;//from w w w . j a v a 2s.c o m } sLogger.info("Tool name: TratzParse"); sLogger.info(" - Corpus path: " + corpusPath); sLogger.info(" - Corpus class: " + corpusClass); sLogger.info(" - Output path: " + outputPath); Job job = new Job(conf); job.setJobName("TratzParse"); MavunoUtils.recursivelyAddInputPaths(job, corpusPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(corpusClass)); // output format -- either plain text or sequencefile (default) if (textOutputFormat) { job.setOutputFormatClass(TextOutputFormat.class); } else { job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); } job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(TratzParsedDocument.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TratzParsedDocument.class); job.setMapperClass(MyMapper.class); job.setJarByClass(TratzParse.class); // no reducers needed job.setNumReduceTasks(0); // run job job.waitForCompletion(true); // print job statistics Counters counters = job.getCounters(); sLogger.info(" - Total documents: " + counters.findCounter(StatCounters.TOTAL_DOCUMENTS).getValue()); sLogger.info(" - Total sentences: " + counters.findCounter(StatCounters.TOTAL_SENTENCES).getValue()); sLogger.info(" - Total tokens: " + counters.findCounter(StatCounters.TOTAL_TOKENS).getValue()); sLogger.info(" - Total dropped sentences: " + counters.findCounter(StatCounters.TOTAL_DROPPED_SENTENCES).getValue()); sLogger.info( " - Total tokenization time (ms): " + counters.findCounter(StatCounters.TOKENIZE_TIME).getValue()); sLogger.info( " - Total POS tagging time (ms): " + counters.findCounter(StatCounters.POSTAG_TIME).getValue()); sLogger.info(" - Total chunking time (ms): " + counters.findCounter(StatCounters.CHUNK_TIME).getValue()); sLogger.info(" - Total named entity tagging time (ms): " + counters.findCounter(StatCounters.NETAG_TIME).getValue()); sLogger.info(" - Total parse time (ms): " + counters.findCounter(StatCounters.PARSE_TIME).getValue()); return 0; }
From source file:edu.isi.mavuno.app.util.SequenceFileToText.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String inputPath = MavunoUtils.getRequiredParam("Mavuno.SequenceFileToText.InputPath", conf); String outputPath = MavunoUtils.getRequiredParam("Mavuno.SequenceFileToText.OutputPath", conf); sLogger.info("Tool name: SequenceFileToText"); sLogger.info(" - Input path: " + inputPath); sLogger.info(" - Output path: " + outputPath); Job job = new Job(conf); job.setJobName("SequenceFileToText"); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.waitForCompletion(true);//from w ww . j av a 2 s .c o m return 0; }
From source file:edu.isi.mavuno.extract.Split.java
License:Apache License
public int run() throws ClassNotFoundException, InterruptedException, IOException { Configuration conf = getConf(); String inputPath = MavunoUtils.getRequiredParam("Mavuno.Split.InputPath", conf); String outputPath = MavunoUtils.getRequiredParam("Mavuno.Split.OutputPath", conf); String splitKey = MavunoUtils.getRequiredParam("Mavuno.Split.SplitKey", conf); sLogger.info("Tool name: Split"); sLogger.info(" - Input path: " + inputPath); sLogger.info(" - Output path: " + outputPath); sLogger.info(" - Split key: " + splitKey); Job job = new Job(conf); job.setJobName("Split"); MavunoUtils.recursivelyAddInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(ContextPatternWritable.class); job.setSortComparatorClass(ContextPatternWritable.Comparator.class); job.setPartitionerClass(ContextPatternWritable.FullPartitioner.class); job.setMapOutputValueClass(DoubleWritable.class); job.setOutputKeyClass(ContextPatternWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); job.waitForCompletion(true);/* w w w. j a v a 2 s . c o m*/ return 0; }
From source file:edu.iu.benchmark.JobLauncher.java
License:Apache License
private Job configureBenchmarkJob(String cmd, int bytesPerPartition, int numPartitions, int numMappers, int numIterations, Path inputDirPath, Path outputDirPath) throws IOException, URISyntaxException { Job job = Job.getInstance(getConf(), "benchmark_job"); FileInputFormat.setInputPaths(job, inputDirPath); FileOutputFormat.setOutputPath(job, outputDirPath); job.setInputFormatClass(SingleFileInputFormat.class); job.setJarByClass(JobLauncher.class); job.setMapperClass(BenchmarkMapper.class); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMappers);/*from w w w . ja va2s . c om*/ job.setNumReduceTasks(0); jobConf.set(Constants.BENCHMARK_CMD, cmd); jobConf.setInt(Constants.BYTES_PER_PARTITION, bytesPerPartition); jobConf.setInt(Constants.NUM_PARTITIONS, numPartitions); jobConf.setInt(Constants.NUM_MAPPERS, numMappers); jobConf.setInt(Constants.NUM_ITERATIONS, numIterations); return job; }
From source file:edu.iu.ccd.CCDLauncher.java
License:Apache License
private Job configureCCDJob(Path inputDir, int r, double lambda, int numIterations, int numMapTasks, int numThreadsPerWorker, int numModelSlices, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); System.out.println("Model Dir Path: " + modelDir.toString()); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.setInt(Constants.NUM_MODEL_SLICES, numModelSlices); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "ccd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(CCDLauncher.class); job.setMapperClass(CCDMPCollectiveMapper.class); job.setNumReduceTasks(0); return job;//from ww w. j av a2s. com }
From source file:edu.iu.daal_cov.COVDaalLauncher.java
License:Apache License
private Job configureCOVJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { // configuration.set(Constants.TEST_FILE_PATH, testDirPath); // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); // configuration.setInt(Constants.BATCH_SIZE, batchSize); Job job = Job.getInstance(configuration, "cov_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); jobConf.setInt("mapreduce.map.collective.memory.mb", mem); int xmx = (int) Math.ceil((mem - 2000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(COVDaalLauncher.class); job.setMapperClass(COVDaalCollectiveMapper.class); job.setNumReduceTasks(0); System.out.println("Launcher launched"); return job;/*from ww w .j av a 2 s .com*/ }
From source file:edu.iu.daal_linreg.LinRegDaalLauncher.java
License:Apache License
private Job configureLinRegJob(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem, int batchSize, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { configuration.set(Constants.TEST_FILE_PATH, testDirPath); configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setInt(Constants.BATCH_SIZE, batchSize); Job job = Job.getInstance(configuration, "linreg_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); // mapreduce.map.collective.memory.mb // 125000/* w ww. jav a 2s. c o m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); int xmx = (int) Math.ceil((mem - 2000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(LinRegDaalLauncher.class); job.setMapperClass(LinRegDaalCollectiveMapper.class); job.setNumReduceTasks(0); System.out.println("Launcher launched"); return job; }
From source file:edu.iu.daal_mom.MOMDaalLauncher.java
License:Apache License
private Job configureMOMJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { // configuration.set(Constants.TEST_FILE_PATH, testDirPath); // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); // configuration.setInt(Constants.BATCH_SIZE, batchSize); Job job = Job.getInstance(configuration, "mom_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); // mapreduce.map.collective.memory.mb // 125000// w w w . j a v a 2 s. c o m jobConf.setInt("mapreduce.map.collective.memory.mb", mem); int xmx = (int) Math.ceil((mem - 2000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(MOMDaalLauncher.class); job.setMapperClass(MOMDaalCollectiveMapper.class); job.setNumReduceTasks(0); System.out.println("Launcher launched"); return job; }