List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass
public void setReducerClass(Class<? extends Reducer> theClass)
From source file:com.yahoo.semsearch.fastlinking.io.ExtractWikipediaAnchorText.java
License:Apache License
/** * Extracts CF for each found anchor.//from w ww . j a va 2 s .c o m * * @param inputPath * @param mapPath * @param outputPath * @throws IOException */ private void task3(String inputPath, String mapPath, String outputPath) throws IOException { LOG.info("Extracting anchor text (phase 3)..."); LOG.info(" - input: " + inputPath); LOG.info(" - output: " + outputPath); LOG.info(" - mapping: " + mapPath); JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class); conf.setJobName( String.format("ExtractWikipediaAnchorText:phase3[input: %s, output: %s]", inputPath, outputPath)); conf.setNumReduceTasks(1); String location = "map.dat"; try { DistributedCache.addCacheFile(new URI(mapPath + "/part-00000/data" + "#" + location), conf); //DistributedCache.addCacheFile(new URI(mapPath + "/singleentitymap.data" + "#" + location), conf); DistributedCache.createSymlink(conf); } catch (URISyntaxException e) { e.printStackTrace(); } FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(MapFileOutputFormat.class); // conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MyMapper3.class); conf.setCombinerClass(MyReducer3.class); conf.setReducerClass(MyReducer3.class); JobClient.runJob(conf); }
From source file:com.yahoo.semsearch.fastlinking.io.ExtractWikipediaAnchorText.java
License:Apache License
/** * Maps from (targetID, (anchor, count)) to (anchor, (targetID, count)). * * @param inputPath//from w ww . java2 s . co m * @param outputPath * @throws IOException */ private void task4(String inputPath, String outputPath) throws IOException { LOG.info("Extracting anchor text (phase 4)..."); LOG.info(" - input: " + inputPath); LOG.info(" - output: " + outputPath); JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class); conf.setJobName( String.format("ExtractWikipediaAnchorText:phase4[input: %s, output: %s]", inputPath, outputPath)); conf.setNumReduceTasks(1); //FileInputFormat.addInputPath(conf, new Path(inputPath + "/part-00000/data")); FileInputFormat.addInputPath(conf, new Path(inputPath + "/part-*/data")); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(MapFileOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(HMapSIW.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(HMapSIW.class); conf.setMapperClass(MyMapper4.class); conf.setReducerClass(MyReducer4.class); JobClient.runJob(conf); }
From source file:com.yolodata.tbana.hadoop.mapred.shuttl.TestMapper.java
License:Open Source License
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(TestConfigurations.getConfigurationWithShuttlSearch()); jobConf.setJarByClass(ShuttlTestJob.class); jobConf.setNumReduceTasks(1);//ww w . ja v a 2s .co m jobConf.setMapperClass(TestMapper.class); jobConf.setReducerClass(TestReducer.class); jobConf.setInputFormat(ShuttlCSVInputFormat.class); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(Text.class); ShuttlCSVInputFormat.addInputPath(jobConf, new Path(args[0])); TextOutputFormat.setOutputPath(jobConf, new Path(args[1])); JobClient.runJob(jobConf); return 0; }
From source file:com.yolodata.tbana.hadoop.mapred.splunk.inputformat.TestMapper.java
License:Open Source License
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf()); jobConf.set(SplunkInputFormat.INPUTFORMAT_MODE, args[0]); jobConf.setJarByClass(SplunkTestRunner.class); jobConf.setNumReduceTasks(1);/* www . ja v a 2 s .co m*/ jobConf.setMapperClass(TestMapper.class); jobConf.setReducerClass(TestReducer.class); jobConf.setInputFormat(SplunkInputFormat.class); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(Text.class); TextOutputFormat.setOutputPath(jobConf, new Path(args[1])); JobClient.runJob(jobConf); return 0; }
From source file:com.zfylin.demo.bigdata.hadoop.mr.WordCount2.java
License:Apache License
public static void main(String[] args) throws Exception { System.setProperty("HADOOP_USER_NAME", "hdfs"); //? ???hadoop? String input = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student"; /**//from w w w . j a v a2 s . c o m * HDFSout * ??? */ String output = "hdfs://hadoop-master:8020/data/hive/warehouse/channel_test.db/tbl_student/output/"; JobConf conf = new JobConf(WordCount2.class); /** * ERROR: Exception message: /bin/bash: line 0: fg: no job control */ conf.set("mapreduce.app-submission.cross-platform", "true"); conf.setJobName("WordCount"); // conf.addResource("classpath:/hadoop/core-site.xml"); // conf.addResource("classpath:/hadoop/hdfs-site.xml"); // conf.addResource("classpath:/hadoop/mapred-site.xml"); //?? conf.setOutputKeyClass(Text.class); //?? int conf.setOutputValueClass(IntWritable.class); //mapper conf.setMapperClass(WordCountMapper.class); /** * ??Reducer * ???mapreduce?? * ???? * ???? * ? * ??? * ????? * ? */ conf.setCombinerClass(WordCountReducer.class); //reduce conf.setReducerClass(WordCountReducer.class); /** * ?TextInputFormat? * ???? * LongWritable???? * Text */ conf.setInputFormat(TextInputFormat.class); /** * ?TextOutpuTFormat? * ????toString() * */ conf.setOutputFormat(TextOutputFormat.class); //? FileInputFormat.setInputPaths(conf, new Path(input)); //??? FileOutputFormat.setOutputPath(conf, new Path(output)); //?mapreduce JobClient.runJob(conf); System.exit(0); }
From source file:com.zjy.mongo.util.MongoTool.java
License:Apache License
private int runMapredJob(final Configuration conf) { final JobConf job = new JobConf(conf, getClass()); /**//w w w.j ava 2s . c o m * Any arguments specified with -D <property>=<value> * on the CLI will be picked up and set here * They override any XML level values * Note that -D<space> is important - no space will * not work as it gets picked up by Java itself */ // TODO - Do we need to set job name somehow more specifically? // This may or may not be correct/sane job.setJarByClass(getClass()); final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf); if (LOG.isDebugEnabled()) { LOG.debug("Mapper Class: " + mapper); LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI)); } job.setMapperClass(mapper); Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf); if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(MapredMongoConfigUtil.getReducer(conf)); job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf)); job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf)); job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf)); job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf)); Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf); Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf); if (mapOutputKeyClass != null) { job.setMapOutputKeyClass(mapOutputKeyClass); } if (mapOutputValueClass != null) { job.setMapOutputValueClass(mapOutputValueClass); } /** * Determines if the job will run verbosely e.g. print debug output * Only works with foreground jobs */ final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf); /** * Run job in foreground aka wait for completion or background? */ final boolean background = MapredMongoConfigUtil.isJobBackground(conf); try { RunningJob runningJob = JobClient.runJob(job); if (background) { LOG.info("Setting up and running MapReduce job in background."); return 0; } else { LOG.info("Setting up and running MapReduce job in foreground, will wait for results. {Verbose? " + verbose + "}"); runningJob.waitForCompletion(); return 0; } } catch (final Exception e) { LOG.error("Exception while executing job... ", e); return 1; } }
From source file:combiner.CombinerDriver.java
public static void main(String[] args) { JobClient client = new JobClient(); // Configurations for Job set in this variable JobConf conf = new JobConf(combiner.CombinerDriver.class); // Name of the Job conf.setJobName("BookCrossing1.0"); // Data type of Output Key and Value conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // Setting the Mapper and Reducer Class conf.setMapperClass(combiner.CombinerMapper.class); conf.setCombinerClass(combiner.CombinerReducer.class); conf.setReducerClass(combiner.CombinerReducer.class); // Formats of the Data Type of Input and output conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // Specify input and output DIRECTORIES (not files) FileInputFormat.setInputPaths(conf, new Path(args[1])); FileOutputFormat.setOutputPath(conf, new Path(args[2])); client.setConf(conf);/*from www . j ava2s. co m*/ try { // Running the job with Configurations set in the conf. JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:Corrector.Correction.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Correction [0/7]"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(Correction.class); conf.setJobName("Correction " + inputPath + " " + Config.K); Config.initializeConfiguration(conf); FileInputFormat.addInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setBoolean("mapred.output.compress", true); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CorrectionMapper.class); conf.setReducerClass(CorrectionReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.FindError.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, int idx, String hkmerlist) throws Exception { sLogger.info("Tool name: FindError"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(FindError.class); conf.setJobName("FindError " + inputPath + " " + Config.K); conf.setLong("IDX", idx); //\\//from www . j a v a 2 s . co m DistributedCache.addCacheFile(new URI(hkmerlist), conf); //\\ Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(FindErrorMapper.class); conf.setReducerClass(FindErrorReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Corrector.IdentifyTrustedReads.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, long kmer_threshold) throws Exception { sLogger.info("Tool name: IdentifyTrustedReads"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); JobConf conf = new JobConf(IdentifyTrustedReads.class); conf.setJobName("IdentifyTrustedReads " + inputPath + " " + Config.K); conf.setLong("KmerThreshold", kmer_threshold); // conf.setLong("AllKmer", allkmer); Config.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(IdentifyTrustedReads.IdentifyTrustedReadsMapper.class); conf.setReducerClass(IdentifyTrustedReads.IdentifyTrustedReadsReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }