List of usage examples for org.apache.hadoop.mapred JobConf setMapperClass
public void setMapperClass(Class<? extends Mapper> theClass)
From source file:edu.ldzm.average.AverageResponseTime.java
License:Apache License
/** * The main driver for word count map/reduce program. Invoke this method to * submit the map/reduce job.// w w w. ja v a 2 s.c o m * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), AverageResponseTime.class); conf.setJobName("average_response_time"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combine.class); conf.setReducerClass(Reduce.class); int param = 0; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-l".equals(args[i])) { param++; String[] fields = args[++i].split(SEPARATOR); conf.setInt("NAME_LIST_LENGTH", fields.length); for (int j = 0; j < fields.length; j++) { if ("timeStamp".equals(fields[j])) { conf.setInt("REQUEST_TIME_INDEX", j); } else if ("elapsed".equals(fields[j])) { conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j); } else if ("label".equals(fields[j])) { conf.setInt("REQUEST_LABEL_INDEX", j); } else if ("success".equals(fields[j])) { conf.setInt("REQUEST_SUCCESSFUL_INDEX", j); } else if ("bytes".equals(fields[j])) { conf.setInt("REQUEST_BYTE_INDEX", j); } } } else if ("-i".equals(args[i])) { param++; conf.setInt("INTERVAL_TIME", Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (param != 2) { System.out.println("-l -i?"); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:edu.ncku.ikdd.ArtistAnalysis.java
public static void main(String[] argv) throws Exception { JobConf conf = new JobConf(ArtistAnalysis.class); conf.setJobName("artistanalysis"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Combine.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(argv[0])); FileOutputFormat.setOutputPath(conf, new Path(argv[1])); JobClient.runJob(conf);/*from w ww .j a v a2s. com*/ }
From source file:edu.ncku.ikdd.DataMining.java
public static void main(String[] argv) throws Exception { int candidateLength = 1; FileSystem dfs = FileSystem.get(new Configuration()); do {//from w w w.j a v a 2 s .c om JobConf countConf = new JobConf(DataMining.class); countConf.setOutputKeyClass(Text.class); countConf.setOutputValueClass(IntWritable.class); countConf.setMapperClass(CountMap.class); countConf.setCombinerClass(CountCombine.class); countConf.setReducerClass(CountReduce.class); countConf.setInputFormat(TextInputFormat.class); countConf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(countConf, new Path(argv[0])); FileOutputFormat.setOutputPath(countConf, new Path(count_path + String.valueOf(candidateLength))); countConf.setInt("minSupport", Integer.valueOf(argv[2])); countConf.setInt("candidateLength", candidateLength); JobClient.runJob(countConf); ++candidateLength; JobConf candidateConf = new JobConf(DataMining.class); candidateConf.setOutputKeyClass(Text.class); candidateConf.setOutputValueClass(Text.class); candidateConf.setMapperClass(CandidateMap.class); candidateConf.setReducerClass(CandidateReduce.class); candidateConf.setInputFormat(TextInputFormat.class); candidateConf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(candidateConf, new Path(count_path + String.valueOf(candidateLength - 1) + "/part-00000")); FileOutputFormat.setOutputPath(candidateConf, new Path(candidate_path + String.valueOf(candidateLength))); candidateConf.setInt("candidateLength", candidateLength); JobClient.runJob(candidateConf); } while (dfs.getFileStatus(new Path(candidate_path + String.valueOf(candidateLength) + "/part-00000")) .getLen() > 0); BufferedReader br; BufferedWriter bw = new BufferedWriter( new OutputStreamWriter(dfs.create(new Path(argv[1] + "/part-00000")))); String line; for (int i = 1; i < candidateLength; ++i) { br = new BufferedReader( new InputStreamReader(dfs.open(new Path(count_path + String.valueOf(i) + "/part-00000")))); while ((line = br.readLine()) != null) { bw.write(line + "\n"); } br.close(); } bw.close(); }
From source file:edu.ncku.ikdd.TempRecord.java
public static void main(String[] argv) throws Exception { JobConf conf = new JobConf(TempRecord.class); conf.setJobName("temprecord"); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(argv[0])); FileOutputFormat.setOutputPath(conf, new Path(argv[1])); JobClient.runJob(conf);/*www .ja v a 2 s . com*/ }
From source file:edu.ncku.ikdd.TitleParser.java
public static void main(String[] argv) throws Exception { JobConf conf = new JobConf(TitleParser.class); conf.setJobName("titleparser"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(argv[0])); FileOutputFormat.setOutputPath(conf, new Path(argv[1])); JobClient.runJob(conf);/*from w ww . jav a2 s . c o m*/ }
From source file:edu.ncku.ikdd.WordCount.java
public static void main(String[] argv) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(argv[0])); FileOutputFormat.setOutputPath(conf, new Path(argv[1])); JobClient.runJob(conf);/* ww w. java 2 s. c o m*/ }
From source file:edu.ohsu.sonmezsysbio.cloudbreak.command.CommandNovoalignSingleEnds.java
public void runHadoopJob(Configuration configuration) throws IOException, URISyntaxException { JobConf conf = new JobConf(configuration); conf.setJobName("Single End Alignment"); conf.setJarByClass(Cloudbreak.class); FileInputFormat.addInputPath(conf, new Path(hdfsDataDir)); Path outputDir = new Path(hdfsAlignmentsDir); FileSystem.get(conf).delete(outputDir); FileOutputFormat.setOutputPath(conf, outputDir); addDistributedCacheFile(conf, reference, "novoalign.reference"); addDistributedCacheFile(conf, pathToNovoalign, "novoalign.executable"); if (pathToNovoalignLicense != null) { addDistributedCacheFile(conf, pathToNovoalignLicense, "novoalign.license"); }/* w w w. ja v a 2 s . c om*/ DistributedCache.createSymlink(conf); conf.set("mapred.task.timeout", "3600000"); conf.set("novoalign.threshold", threshold); conf.set("novoalign.quality.format", qualityFormat); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(NovoalignSingleEndMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.setReducerClass(SingleEndAlignmentsToPairsReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec"); JobClient.runJob(conf); }
From source file:edu.stolaf.cs.wmrserver.streaming.StreamJob.java
License:Apache License
public static void setStreamMapper(JobConf conf, String mapCommand) { conf.setMapperClass(PipeMapper.class); conf.setMapRunnerClass(PipeMapRunner.class); try {// w w w . j a v a 2 s .c om conf.set("stream.map.streamprocessor", URLEncoder.encode(mapCommand, "UTF-8")); } catch (UnsupportedEncodingException ex) { // This is VERY likely to happen. Especially since the ENTIRE FREAKING // STRING IMPLEMENTATION is based on UTF-8. Thanks, Java. throw new RuntimeException("The sky is falling! Java doesn't support UTF-8."); } }
From source file:edu.ub.ahstfg.indexer.Indexer.java
License:Open Source License
@Override public int run(String[] arg0) throws Exception { LOG.info("Creating Hadoop job for Indexer."); JobConf job = new JobConf(getConf()); job.setJarByClass(Indexer.class); LOG.info("Setting input path to '" + INPUT_PATH + "'"); FileInputFormat.setInputPaths(job, new Path(INPUT_PATH)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + OUTPUT_PATH + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH), job); if (fs.exists(new Path(OUTPUT_PATH))) { fs.delete(new Path(OUTPUT_PATH), true); }/*from ww w .j a v a 2 s . c om*/ LOG.info("Setting output path to '" + OUTPUT_PATH + "'"); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(IndexOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IndexRecord.class); LOG.info("Setting mapper and reducer."); job.setMapperClass(IndexerMapper.class); job.setMapOutputValueClass(ParsedDocument.class); job.setReducerClass(IndexerReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }
From source file:edu.ub.ahstfg.indexer.wordcount.WordCount.java
License:Open Source License
@Override public int run(String[] args) throws Exception { LOG.info("Creating Hadoop job for ARC input files word count."); JobConf job = new JobConf(getConf()); job.setJarByClass(WordCount.class); LOG.info("Setting input path to '" + inputPath + "'"); FileInputFormat.setInputPaths(job, new Path(inputPath)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + outputPath + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(outputPath), job); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); }/*from w w w. ja v a 2 s.c o m*/ LOG.info("Setting output path to '" + outputPath + "'"); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); // job.setInputFormat(TextInputFormat.class); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(TextOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); LOG.info("Setting mapper and reducer."); // job.setMapperClass(WordCountTextInputMapper.class); job.setMapperClass(WordCountArcInputMapper.class); job.setReducerClass(LongSumReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }