List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:com.accumulobook.advanced.mapreduce.MapReduceFilesExample.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(this.getConf()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WordCount.WordCountMapper.class); job.setCombinerClass(WordCount.WordCountCombiner.class); job.setReducerClass(WordCount.WordCountReducer.class); // clone the articles table ZooKeeperInstance inst = new ZooKeeperInstance(args[0], args[1]); Connector conn = inst.getConnector(args[2], new PasswordToken(args[3])); conn.tableOperations().clone(WikipediaConstants.ARTICLES_TABLE, WikipediaConstants.ARTICLES_TABLE_CLONE, true, Collections.EMPTY_MAP, Collections.EMPTY_SET); // take cloned table offline, waiting until the operation is complete boolean wait = true; conn.tableOperations().offline(WikipediaConstants.ARTICLES_TABLE_CLONE, wait); ClientConfiguration zkiConfig = new ClientConfiguration().withInstance(args[0]).withZkHosts(args[1]); // input/*from w w w . j av a 2 s .c o m*/ job.setInputFormatClass(AccumuloInputFormat.class); AccumuloInputFormat.setInputTableName(job, WikipediaConstants.ARTICLES_TABLE_CLONE); List<Pair<Text, Text>> columns = new ArrayList<>(); columns.add(new Pair(WikipediaConstants.CONTENTS_FAMILY_TEXT, new Text(""))); AccumuloInputFormat.fetchColumns(job, columns); AccumuloInputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloInputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); // configure to use underlying RFiles AccumuloInputFormat.setOfflineTableScan(job, true); // output job.setOutputFormatClass(AccumuloOutputFormat.class); BatchWriterConfig bwConfig = new BatchWriterConfig(); AccumuloOutputFormat.setBatchWriterOptions(job, bwConfig); AccumuloOutputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloOutputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); AccumuloOutputFormat.setDefaultTableName(job, WikipediaConstants.WORD_COUNT_TABLE); AccumuloOutputFormat.setCreateTables(job, true); job.setJarByClass(WordCount.class); job.waitForCompletion(true); //job.submit(); return 0; }
From source file:com.accumulobook.advanced.mapreduce.WordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(new Configuration()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountCombiner.class); job.setReducerClass(WordCountReducer.class); // input/* www . j a v a 2s.c om*/ job.setInputFormatClass(AccumuloInputFormat.class); ClientConfiguration zkiConfig = new ClientConfiguration().withInstance(args[0]).withZkHosts(args[1]); AccumuloInputFormat.setInputTableName(job, WikipediaConstants.ARTICLES_TABLE); List<Pair<Text, Text>> columns = new ArrayList<>(); columns.add(new Pair(WikipediaConstants.CONTENTS_FAMILY_TEXT, new Text(""))); AccumuloInputFormat.fetchColumns(job, columns); AccumuloInputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloInputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); // output job.setOutputFormatClass(AccumuloOutputFormat.class); BatchWriterConfig config = new BatchWriterConfig(); AccumuloOutputFormat.setBatchWriterOptions(job, config); AccumuloOutputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloOutputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); AccumuloOutputFormat.setDefaultTableName(job, WikipediaConstants.WORD_COUNT_TABLE); AccumuloOutputFormat.setCreateTables(job, true); job.setJarByClass(WordCount.class); job.submit(); return 0; }
From source file:com.alectenharmsel.hadoop.qa.LineCount.java
License:Apache License
public static void main(String[] args) throws Exception { GenericOptionsParser parse = new GenericOptionsParser(new Configuration(), args); Configuration conf = parse.getConfiguration(); String[] remainingArgs = parse.getRemainingArgs(); if (remainingArgs.length != 2) { System.err.println("Usage: LineCount <input> <output>"); System.exit(-1);/*from w w w .j av a2 s. c o m*/ } Job job = Job.getInstance(conf, "LineCount"); job.setJarByClass(LineCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(remainingArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1])); boolean success = job.waitForCompletion(true); int res = success ? 0 : 1; System.exit(res); }
From source file:com.alectenharmsel.research.hadoop.LcCounters.java
License:Apache License
public static void main(String[] args) throws Exception { GenericOptionsParser parse = new GenericOptionsParser(new Configuration(), args); Configuration conf = parse.getConfiguration(); String[] remainingArgs = parse.getRemainingArgs(); if (remainingArgs.length != 2) { System.err.println("Usage: LineCount <input> <output>"); System.exit(-1);//from w w w .j av a 2 s.co m } Job job = Job.getInstance(conf, "LineCount"); job.setJarByClass(LineCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(remainingArgs[0])); FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1])); boolean success = job.waitForCompletion(true); //Get the counter here and print it Counters counters = job.getCounters(); long total = counters.findCounter(LcCounters.NUM_LINES).getValue(); System.out.println(Long.toString(total)); int res = success ? 0 : 1; System.exit(res); }
From source file:com.aliyun.emr.example.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser optionParser = new GenericOptionsParser(conf, args); String[] remainingArgs = optionParser.getRemainingArgs(); if (!(remainingArgs.length == 2 || remainingArgs.length == 4)) { System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]"); System.exit(2);/*w ww . ja va 2 s .c o m*/ } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < remainingArgs.length; ++i) { if ("-skip".equals(remainingArgs[i])) { job.addCacheFile(new Path(EMapReduceOSSUtil.buildOSSCompleteUri(remainingArgs[++i], conf)).toUri()); job.getConfiguration().setBoolean("wordcount.skip.patterns", true); } else { otherArgs.add(remainingArgs[i]); } } FileInputFormat.addInputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(0), conf))); FileOutputFormat.setOutputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(1), conf))); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.aliyun.openservices.tablestore.hadoop.RowCounter.java
License:Apache License
public static void main(String[] args) throws Exception { if (!parseArgs(args)) { printUsage();/*from w ww . j av a 2s . com*/ System.exit(1); } if (endpoint == null || accessKeyId == null || accessKeySecret == null || table == null || outputPath == null) { printUsage(); System.exit(1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "row count"); job.setJarByClass(RowCounter.class); job.setMapperClass(RowCounterMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(TableStoreInputFormat.class); TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken); TableStore.setEndpoint(job, endpoint, instance); TableStoreInputFormat.addCriteria(job, fetchCriteria()); FileOutputFormat.setOutputPath(job, new Path(outputPath)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.FeatureCounter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: wordcount <in> <out> <templatefile>"); System.exit(2);//from www .jav a 2 s . c o m } String[] templates = SgdCrf.readTemplates(otherArgs[2]).toArray(new String[0]); conf.set("templates", strArr2Str(templates)); Job job = new Job(conf, FeatureCounter.class.getSimpleName()); job.setJarByClass(FeatureCounter.class); job.setMapperClass(CounterMapper.class); job.setCombinerClass(SumReducer.class); job.setReducerClass(SumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.FeatureStat.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out> "); System.exit(2);/*from www. j ava2s . c o m*/ } Job job = new Job(conf, FeatureStat.class.getSimpleName()); job.setJarByClass(FeatureStat.class); job.setMapperClass(CounterMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.antbrains.crf.hadoop.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w ww. ja v a 2 s. c o m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
@SuppressWarnings("rawtypes") private void configureShuffle(Job job, VariableTable variables) { Class<? extends Reducer> reducer = getReducerClassOrNull(); if (reducer != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer: {0}", reducer.getName())); //$NON-NLS-1$ }/*from w w w. j a v a 2 s .c o m*/ job.setReducerClass(reducer); } else { if (LOG.isDebugEnabled()) { LOG.debug("Reducer: N/A"); //$NON-NLS-1$ } job.setNumReduceTasks(0); return; } Class<? extends Writable> outputKeyClass = or(getShuffleKeyClassOrNull(), NullWritable.class); Class<? extends Writable> outputValueClass = or(getShuffleValueClassOrNull(), NullWritable.class); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Shuffle: key={0}, value={1}", //$NON-NLS-1$ outputKeyClass.getName(), outputValueClass.getName())); } job.setMapOutputKeyClass(outputKeyClass); job.setMapOutputValueClass(outputValueClass); Class<? extends Reducer> combiner = getCombinerClassOrNull(); if (combiner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Combiner: {0}", combiner.getName())); //$NON-NLS-1$ } job.setCombinerClass(combiner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Combiner: N/A"); //$NON-NLS-1$ } } Class<? extends Partitioner> partitioner = getPartitionerClassOrNull(); if (partitioner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Partitioner: {0}", partitioner.getName())); //$NON-NLS-1$ } job.setPartitionerClass(partitioner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Partitioner: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> groupingComparator = getGroupingComparatorClassOrNull(); if (groupingComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("GroupingComparator: {0}", groupingComparator.getName())); //$NON-NLS-1$ } job.setGroupingComparatorClass(groupingComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("GroupingComparator: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> sortComparator = getSortComparatorClassOrNull(); if (sortComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("SortComparator: {0}", sortComparator.getName())); //$NON-NLS-1$ } job.setSortComparatorClass(sortComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("SortComparator: DEFAULT"); //$NON-NLS-1$ } } }