List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:mvm.rya.accumulo.mr.fileinput.BulkNtripsInputTool.java
License:Apache License
@Override public int run(final String[] args) throws Exception { final Configuration conf = getConf(); try {// www . ja v a 2s . co m //conf zk = conf.get(MRUtils.AC_ZK_PROP, zk); ttl = conf.get(MRUtils.AC_TTL_PROP, ttl); instance = conf.get(MRUtils.AC_INSTANCE_PROP, instance); userName = conf.get(MRUtils.AC_USERNAME_PROP, userName); pwd = conf.get(MRUtils.AC_PWD_PROP, pwd); workDirBase = conf.get(WORKDIR_PROP, workDirBase); format = conf.get(MRUtils.FORMAT_PROP, format); conf.set(MRUtils.FORMAT_PROP, format); final String inputDir = args[0]; ZooKeeperInstance zooKeeperInstance = new ZooKeeperInstance(instance, zk); Connector connector = zooKeeperInstance.getConnector(userName, new PasswordToken(pwd)); TableOperations tableOperations = connector.tableOperations(); if (conf.get(AccumuloRdfConfiguration.CONF_ADDITIONAL_INDEXERS) != null) { throw new IllegalArgumentException("Cannot use Bulk N Trips tool with Additional Indexers"); } String tablePrefix = conf.get(MRUtils.TABLE_PREFIX_PROPERTY, null); if (tablePrefix != null) RdfCloudTripleStoreConstants.prefixTables(tablePrefix); String[] tables = { tablePrefix + RdfCloudTripleStoreConstants.TBL_OSP_SUFFIX, tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX, tablePrefix + RdfCloudTripleStoreConstants.TBL_PO_SUFFIX }; Collection<Job> jobs = new ArrayList<Job>(); for (final String tableName : tables) { PrintStream out = null; try { String workDir = workDirBase + "/" + tableName; System.out.println("Loading data into table[" + tableName + "]"); Job job = new Job(new Configuration(conf), "Bulk Ingest load data to Generic RDF Table[" + tableName + "]"); job.setJarByClass(this.getClass()); //setting long job Configuration jobConf = job.getConfiguration(); jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); jobConf.setBoolean("mapred.reduce.tasks.speculative.execution", false); jobConf.set("io.sort.mb", jobConf.get("io.sort.mb", "256")); jobConf.setBoolean("mapred.compress.map.output", true); // jobConf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); //TODO: I would like LZO compression job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(ParseNtripsMapper.class); job.setMapOutputKeyClass(Key.class); job.setMapOutputValueClass(Value.class); job.setCombinerClass(OutStmtMutationsReducer.class); job.setReducerClass(OutStmtMutationsReducer.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); // AccumuloFileOutputFormat.setZooKeeperInstance(jobConf, instance, zk); jobConf.set(ParseNtripsMapper.TABLE_PROPERTY, tableName); TextInputFormat.setInputPaths(job, new Path(inputDir)); FileSystem fs = FileSystem.get(conf); Path workPath = new Path(workDir); if (fs.exists(workPath)) fs.delete(workPath, true); //make failures dir Path failures = new Path(workDir, "failures"); fs.delete(failures, true); fs.mkdirs(new Path(workDir, "failures")); AccumuloFileOutputFormat.setOutputPath(job, new Path(workDir + "/files")); out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "/splits.txt")))); if (!tableOperations.exists(tableName)) tableOperations.create(tableName); Collection<Text> splits = tableOperations.getSplits(tableName, Integer.MAX_VALUE); for (Text split : splits) out.println(new String(Base64.encodeBase64(TextUtil.getBytes(split)))); job.setNumReduceTasks(splits.size() + 1); out.close(); job.setPartitionerClass(KeyRangePartitioner.class); RangePartitioner.setSplitFile(job, workDir + "/splits.txt"); jobConf.set(WORKDIR_PROP, workDir); job.submit(); jobs.add(job); } catch (Exception re) { throw new RuntimeException(re); } finally { if (out != null) out.close(); } } for (Job job : jobs) { while (!job.isComplete()) { Thread.sleep(1000); } } for (String tableName : tables) { String workDir = workDirBase + "/" + tableName; String filesDir = workDir + "/files"; String failuresDir = workDir + "/failures"; FileSystem fs = FileSystem.get(conf); //make sure that the "accumulo" user can read/write/execute into these directories this path fs.setPermission(new Path(filesDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); fs.setPermission(new Path(failuresDir), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); tableOperations.importDirectory(tableName, filesDir, failuresDir, false); } } catch (Exception e) { throw new RuntimeException(e); } return 0; }
From source file:mvm.rya.joinselect.mr.FullTableSize.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String inTable = conf.get(SPO_TABLE); String outTable = conf.get(SELECTIVITY_TABLE); String auths = conf.get(AUTHS); assert inTable != null && outTable != null; Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); JoinSelectStatsUtil.initTableMRJob(job, inTable, outTable, auths); job.setMapperClass(FullTableMapper.class); job.setCombinerClass(FullTableCombiner.class); job.setReducerClass(FullTableReducer.class); job.setNumReduceTasks(1);/*w ww . ja v a 2 s. c om*/ job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:mvm.rya.joinselect.mr.JoinSelectStatisticsSum.java
License:Apache License
@Override public int run(String[] args) throws AccumuloSecurityException, IOException, ClassNotFoundException, InterruptedException { Configuration conf = getConf(); String outTable = conf.get(SELECTIVITY_TABLE); String auths = conf.get(AUTHS); String inPath = conf.get(INPUTPATH); assert outTable != null && inPath != null; Job job = new Job(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); JoinSelectStatsUtil.initSumMRJob(job, inPath, outTable, auths); job.setMapperClass(CardinalityIdentityMapper.class); job.setCombinerClass(CardinalityIdentityCombiner.class); job.setReducerClass(CardinalityIdentityReducer.class); job.setNumReduceTasks(32);/* w w w .j a va 2s . c om*/ job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:mx.iteso.msc.asn.mrwordcount.MyDriver.java
License:Apache License
/** * @param args the command line arguments *///from www. j av a 2s . c om public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "WordCounter"); job.setJarByClass(MyDriver.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:myGrep.Grep.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 3) { System.out.println("Grep <inDir> <outDir> <regex> [<group>]"); ToolRunner.printGenericCommandUsage(System.out); return 2; }//from w ww . ja v a 2 s .co m System.out.println("0:" + args[0] + " 1:" + args[1] + " 2:" + args[2]); Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); Configuration conf = getConf(); conf.set(RegexMapper.PATTERN, args[2]); if (args.length == 4) conf.set(RegexMapper.GROUP, args[3]); Job grepJob = Job.getInstance(conf); try { init_log(); grepJob.setJobName("wzl-grep-search"); grepJob.setJarByClass(Grep.class); FileInputFormat.setInputPaths(grepJob, args[0]); //grepJob.setMapperClass(RegexMapper.class); grepJob.setMapperClass(myRegMapper.class); //(string, int) -> sub int -> (string , sum) grepJob.setCombinerClass(LongSumReducer.class); grepJob.setReducerClass(LongSumReducer.class); //FileOutputFormat.setOutputPath(grepJob, tempDir); FileOutputFormat.setOutputPath(grepJob, new Path(args[1])); grepJob.setOutputFormatClass(SequenceFileOutputFormat.class); grepJob.setOutputKeyClass(Text.class); grepJob.setOutputValueClass(LongWritable.class); grepJob.waitForCompletion(true); /* Job sortJob = Job.getInstance(conf); sortJob.setJobName("wzl-grep-sort"); sortJob.setJarByClass(Grep.class); FileInputFormat.setInputPaths(sortJob, tempDir); sortJob.setInputFormatClass(SequenceFileInputFormat.class); sortJob.setMapperClass(InverseMapper.class); sortJob.setNumReduceTasks(1); // write a single file FileOutputFormat.setOutputPath(sortJob, new Path(args[1])); sortJob.setSortComparatorClass( // sort by decreasing freq LongWritable.DecreasingComparator.class); sortJob.waitForCompletion(true); */ } finally { FileSystem.get(conf).delete(tempDir, true); } return 0; }
From source file:mylab0.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w w w . j a va2s . co m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:mylab0.WordCountMultipleInputs.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: wordcountmultipleinputs <input1> <input2> <out>"); System.exit(2);//from ww w. ja v a 2 s . c o m } Job job = new Job(conf, "word count multiple inputs"); job.setJarByClass(WordCountMultipleInputs.class); MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, Mapper1.class); MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, Mapper2.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:nbayes_mr.NBAYES_MR.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // TODO code application logic here splitter("/home/hduser/hw4data.csv"); //FileSystem hdfs=FileSystem.get(new Configuration()); //System.out.println("1-----"+hdfs.getHomeDirectory()); //Path inpdir=new Path(hdfs.getHomeDirectory().toString()+"/input"); for (int i = 1; i <= 5; i++) { //hdfs.delete(inpdir,true); //FileUtils.cleanDirectory(new File()); //hdfs.mkdirs(inpdir); //FileUtils.cleanDirectory(new File("/output")); // for(int j=1;j<=5;j++){ // if(j!=i){ // File source = new File("/home/hduser/data"+j+".txt"); // File dest = new File("/input"); // try { // // hdfs.copyFromLocalFile(new Path("/home/hduser/data"+j+".txt"),inpdir); // //FileUtils.copyFileToDirectory(source, dest); // //FileUtils.copyDirectory(source, dest); // } catch (IOException e) { // e.printStackTrace(); // } // } // } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "kmeans"); job.setJarByClass(NBAYES_MR.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumCombiner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path("/input" + String.valueOf(i))); FileOutputFormat.setOutputPath(job, new Path("/output")); job.waitForCompletion(true);//from w ww. j a v a 2 s. c o m FileSystem fOpen = FileSystem.get(conf); Path outputPathReduceFile = new Path("/output/part-r-00000"); BufferedReader reader = new BufferedReader(new InputStreamReader(fOpen.open(outputPathReduceFile))); String Line = reader.readLine(); //System.out.println(Line); while (Line != null) { String[] split = Line.split("_"); String belongs[] = split[0].split(":"); //System.out.println(Line); if (belongs[0].equalsIgnoreCase("X")) { probxmap.put(belongs[1], Integer.parseInt(split[1].trim())); } else if (belongs[0].equalsIgnoreCase("H")) { probhmap.put(belongs[1], Integer.parseInt(split[1].trim())); } else if (belongs[0].equalsIgnoreCase("X|H")) { //System.out.println(belongs[1]); probxhmap.put(belongs[1], Integer.parseInt(split[1].trim())); } else { total = Integer.parseInt(split[1].trim()); } //probmap.put(split[0], Integer.parseInt(split[1])); Line = reader.readLine(); } deleteFolder(conf, "/output"); test("/home/hduser/data" + i + ".txt"); } double avg = 0.0; for (int i = 0; i < accuracy.size(); i++) { avg += accuracy.get(i); } System.out.println("Accuracy : " + avg * 100 / 5); }
From source file:ngramown.Ngramown.java
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); Job job = Job.getInstance(conf, "ngram"); job.setJarByClass(Ngramown.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:njit.cs698.wenbin.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w ww . ja va 2 s .com } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //job.setReducerClass(IntSumReducer.class); ChainReducer.setReducer(job, IntSumReducer.class, Text.class, IntWritable.class, Text.class, IntWritable.class, new Configuration(false)); ChainReducer.addMapper(job, ReducerMapper.class, Text.class, IntWritable.class, IntWritable.class, Text.class, new Configuration(false)); //job.setOutputKeyClass(Text.class); //job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }