List of usage examples for org.apache.hadoop.mapreduce Job setGroupingComparatorClass
public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException
From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java
License:Apache License
private int runQueryCopy() throws Exception { log.info("Setting up Copy Tool with a query-based ruleset..."); setup();//w w w . j ava 2 s . c o m if (!useCopyFileOutput) { createChildInstance(conf); } // Set up the configuration final AccumuloRdfConfiguration aconf = new AccumuloRdfConfiguration(conf); aconf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock); aconf.setTablePrefix(tablePrefix); aconf.setFlush(false); ConfigUtils.setIndexers(aconf); // Since we're copying at the statement-level, ignore any given list of tables and determine // which tables we might need to create based on which indexers are desired. final TablePrefixLayoutStrategy prefixStrategy = new TablePrefixLayoutStrategy(tablePrefix); tables.clear(); // Always include core tables tables.add(prefixStrategy.getSpo()); tables.add(prefixStrategy.getOsp()); tables.add(prefixStrategy.getPo()); // Copy namespaces if they exist tables.add(prefixStrategy.getNs()); // Add tables associated with any configured indexers /* TODO: SEE RYA-160 if (aconf.getBoolean(ConfigUtils.USE_FREETEXT, false)) { tables.add(ConfigUtils.getFreeTextDocTablename(conf)); tables.add(ConfigUtils.getFreeTextTermTablename(conf)); } if (aconf.getBoolean(ConfigUtils.USE_GEO, false)) { tables.add(ConfigUtils.getGeoTablename(conf)); } if (aconf.getBoolean(ConfigUtils.USE_TEMPORAL, false)) { tables.add(ConfigUtils.getTemporalTableName(conf)); } if (aconf.getBoolean(ConfigUtils.USE_ENTITY, false)) { tables.add(ConfigUtils.getEntityTableName(conf)); } */ // Ignore anything else, e.g. statistics -- must be recalculated for the child if desired // Extract the ruleset, and copy the namespace table directly final AccumuloQueryRuleset ruleset = new AccumuloQueryRuleset(aconf); ruleset.addTable(prefixStrategy.getNs()); for (final String line : ruleset.toString().split("\n")) { log.info(line); } // Create a Job and configure its input and output final Job job = Job.getInstance(aconf); job.setJarByClass(this.getClass()); setupMultiTableInputFormat(job, ruleset); setupAccumuloOutput(job, ""); if (useCopyFileOutput) { // Configure job for file output job.setJobName("Ruleset-based export to file: " + tablePrefix + " -> " + localBaseOutputDir); // Map (row) to (table+key, key+value) job.setMapperClass(RowRuleMapper.class); job.setMapOutputKeyClass(GroupedRow.class); job.setMapOutputValueClass(GroupedRow.class); // Group according to table and and sort according to key job.setGroupingComparatorClass(GroupedRow.GroupComparator.class); job.setSortComparatorClass(GroupedRow.SortComparator.class); // Reduce ([table+row], rows): output each row to the file for that table, in sorted order job.setReducerClass(MultipleFileReducer.class); job.setOutputKeyClass(Key.class); job.setOutputValueClass(Value.class); } else { // Configure job for table output job.setJobName("Ruleset-based copy: " + tablePrefix + " -> " + childTablePrefix); // Map (row): convert to statement, insert to child (for namespace table, output row directly) job.setMapperClass(AccumuloRyaRuleMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); job.setNumReduceTasks(0); // Create the child tables, so mappers don't try to do this in parallel for (final String parentTable : tables) { final String childTable = parentTable.replaceFirst(tablePrefix, childTablePrefix); createTableIfNeeded(childTable); } } // Run the job and copy files to local filesystem if needed final Date beginTime = new Date(); log.info("Job started: " + beginTime); final boolean success = job.waitForCompletion(true); if (success) { if (useCopyFileOutput) { log.info("Moving data from HDFS to the local file system"); final Path baseOutputPath = new Path(baseOutputDir); for (final FileStatus status : FileSystem.get(conf).listStatus(baseOutputPath)) { if (status.isDirectory()) { final String tableName = status.getPath().getName(); final Path hdfsPath = getPath(baseOutputDir, tableName); final Path localPath = getPath(localBaseOutputDir, tableName); log.info("HDFS directory: " + hdfsPath.toString()); log.info("Local directory: " + localPath.toString()); copyHdfsToLocal(hdfsPath, localPath); } } } final Date endTime = new Date(); log.info("Job finished: " + endTime); log.info("The job took " + (endTime.getTime() - beginTime.getTime()) / 1000 + " seconds."); return 0; } else { log.error("Job failed!!!"); return 1; } }
From source file:org.apache.tez.mapreduce.examples.SecondarySort.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: secondarysort <in> <out>"); ToolRunner.printGenericCommandUsage(System.out); return 2; }/*from w ww . ja v a 2 s .com*/ Job job = new Job(conf, "secondary sort"); job.setJarByClass(SecondarySort.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:org.avenir.association.AssociationRuleMiner.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Association rule mining from frequent item sets"; job.setJobName(jobName);//from ww w .j a va 2s. c om job.setJarByClass(AssociationRuleMiner.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(AssociationRuleMiner.RuleMinerMapper.class); job.setReducerClass(AssociationRuleMiner.RuleMinerReducer.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); int numReducer = job.getConfiguration().getInt("arm.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.knn.FeatureCondProbJoiner.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Training vector feature cond probability joiner MR"; job.setJobName(jobName);// w w w . j a v a 2 s . c o m job.setJarByClass(FeatureCondProbJoiner.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(FeatureCondProbJoiner.JoinerMapper.class); job.setReducerClass(FeatureCondProbJoiner.JoinerReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.knn.NearestNeighbor.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "K nerest neighbor(KNN) MR"; job.setJobName(jobName);/* www . j a v a2 s. co m*/ job.setJarByClass(NearestNeighbor.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(NearestNeighbor.TopMatchesMapper.class); job.setReducerClass(NearestNeighbor.TopMatchesReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.reinforce.RandomFirstGreedyBandit.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Random first greedy bandit problem"; job.setJobName(jobName);//w w w . j av a2 s .c o m job.setJarByClass(RandomFirstGreedyBandit.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(RandomFirstGreedyBandit.BanditMapper.class); job.setReducerClass(RandomFirstGreedyBandit.BanditReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.avenir.sequence.CandidateGenerationWithSelfJoin.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Generates k candidate sequence"; job.setJobName(jobName);/*from w w w . j a v a 2 s .c o m*/ job.setJarByClass(CandidateGenerationWithSelfJoin.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration(), "avenir"); job.setMapperClass(CandidateGenerationWithSelfJoin.CandidateGenerationMapper.class); job.setReducerClass(CandidateGenerationWithSelfJoin.CandidateGenerationReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); int numReducer = job.getConfiguration().getInt("cgs.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.beymani.proximity.AverageDistance.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Nearest neighbour stat calculation MR"; job.setJobName(jobName);/* w w w. j av a 2 s . c o m*/ job.setJarByClass(AverageDistance.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(AverageDistance.TopMatchesMapper.class); job.setReducerClass(AverageDistance.TopMatchesReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(IdRankGroupComprator.class); job.setPartitionerClass(IdRankPartitioner.class); Utility.setConfiguration(job.getConfiguration()); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.beymani.proximity.NeighborDensity.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Nearest neighbour density"; job.setJobName(jobName);//from www. j av a 2 s. c o m job.setJarByClass(NeighborDensity.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(NeighborDensity.GroupingMapper.class); job.setReducerClass(NeighborDensity.GroupingReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TextIntIdPairGroupComprator.class); job.setPartitionerClass(SecondarySort.TextIntIdPairTuplePartitioner.class); Utility.setConfiguration(job.getConfiguration()); job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1)); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
From source file:org.chombo.mr.Joiner.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Joiner MR"; job.setJobName(jobName);/*from www .ja va 2 s. com*/ job.setJarByClass(Joiner.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration()); job.setMapperClass(Joiner.JoinerMapper.class); job.setReducerClass(Joiner.JoinerReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TextIntIdPairGroupComprator.class); job.setPartitionerClass(SecondarySort.TextIntIdPairTuplePartitioner.class); int numReducer = job.getConfiguration().getInt("joi.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }