List of usage examples for org.apache.hadoop.mapred JobClient runJob
public static RunningJob runJob(JobConf job) throws IOException
From source file:SBP.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 11) { for (int i = 0; i < args.length; i++) { System.out.println("Args: " + i + " " + args[i]); }//from w w w .j a v a 2s . c om System.out.println(args.length); return printUsage(); } lambda = Double.parseDouble(args[10]); edge_path = new Path(args[0]); prior_path = new Path(args[1]); output_path = new Path(args[2]); Path prev_local_path = new Path("run_tmp/prev_local/"); Path new_local_path = new Path("run_tmp/new_local/"); Path tmp_output_path = new Path(output_path.toString()); number_msg = Long.parseLong(args[3]); nreducer = Integer.parseInt(args[4]); nreducer = 1; max_iter = Integer.parseInt(args[5]); nstate = Integer.parseInt(args[7]); edge_potential_str = read_edge_potential(args[8]); int cur_iter = 1; if (args[9].startsWith("new") == false) { cur_iter = Integer.parseInt(args[9].substring(4)); } System.out.println("edge_path=" + edge_path.toString() + ", prior_path=" + prior_path.toString() + ", output_path=" + output_path.toString() + ", |E|=" + number_msg + ", nreducer=" + nreducer + ", maxiter=" + max_iter + ", nstate=" + nstate + ", edge_potential_str=" + edge_potential_str + ", cur_iter=" + cur_iter + ", lambda=" + lambda); fs = FileSystem.get(getConf()); // Run Stage1 and Stage2. if (cur_iter == 1) { System.out.println("BP: Initializing messages..."); JobClient.runJob(configInitMessage()); } double converge_threshold = number_msg * EPS * nstate; for (int i = cur_iter; i <= max_iter; i++) { System.out.println(" *** ITERATION " + (i) + "/" + max_iter + " ***"); JobClient.runJob(configUpdateMessage()); JobClient.runJob(configSmoothMessage()); JobClient.runJob(configCheckErr()); JobClient.runJob(configSumErr()); String line = readLocaldirOneline(sum_error_path.toString()); fs.delete(check_error_path, true); fs.delete(sum_error_path, true); String[] parts = line.split("\t"); int n = Integer.parseInt(parts[0]); double sum = Double.parseDouble(parts[1]); System.out.println("Converged Msg: " + (number_msg - n)); System.out.println("Sum Error: " + sum); if (sum < converge_threshold) { break; } // rotate directory fs.delete(message_cur_path); fs.delete(message_next_path); fs.rename(message_smooth_path, message_cur_path); } System.out.println("BP: Computing beliefs..."); JobClient.runJob(configComputeBelief()); System.out.println("BP finished. The belief vector is in the HDFS " + args[2]); return 0; }
From source file:gen_rank.java
License:LGPL
public static void runjob(String input, String output) throws Exception { JobConf conf = new JobConf(gen_rank.class); conf.setJobName("Preparing_data"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); }
From source file:WikipediaForwardIndexBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override/*from ww w . j ava 2s .c o m*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("index file").create(INDEX_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(INDEX_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path inputPath = new Path(cmdline.getOptionValue(INPUT_OPTION)); String indexFile = cmdline.getOptionValue(INDEX_FILE_OPTION); String tmpPath = "tmp-" + WikipediaForwardIndexBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); if (!inputPath.isAbsolute()) { System.err.println("Error: " + INPUT_OPTION + " must be an absolute path!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } JobConf conf = new JobConf(getConf(), WikipediaForwardIndexBuilder.class); FileSystem fs = FileSystem.get(conf); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input path: " + inputPath); LOG.info(" - index file: " + indexFile); LOG.info(" - language: " + language); LOG.info("Note: This tool only works on block-compressed SequenceFiles!"); conf.setJobName(String.format("BuildWikipediaForwardIndex[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, INDEX_FILE_OPTION, indexFile, LANGUAGE_OPTION, language)); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); if (language != null) { conf.set("wiki.language", language); } conf.setInputFormat(NoSplitSequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(IdentityReducer.class); // Delete the output directory if it exists already. fs.delete(new Path(tmpPath), true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int blocks = (int) counters.getCounter(Blocks.Total); LOG.info("number of blocks: " + blocks); LOG.info("Writing index file..."); LineReader reader = new LineReader(fs.open(new Path(tmpPath + "/part-00000"))); FSDataOutputStream out = fs.create(new Path(indexFile), true); out.writeUTF(edu.umd.cloud9.collection.wikipedia.WikipediaForwardIndex.class.getCanonicalName()); out.writeUTF(inputPath.toString()); out.writeInt(blocks); int cnt = 0; Text line = new Text(); while (reader.readLine(line) > 0) { String[] arr = line.toString().split("\\s+"); int docno = Integer.parseInt(arr[0]); int offset = Integer.parseInt(arr[1]); short fileno = Short.parseShort(arr[2]); out.writeInt(docno); out.writeInt(offset); out.writeShort(fileno); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " blocks written"); } } reader.close(); out.close(); if (cnt != blocks) { throw new RuntimeException("Error: mismatch in block count!"); } // Clean up. fs.delete(new Path(tmpPath), true); return 0; }
From source file:GapDeduceRunner.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf conf = new JobConf(GapDeduceRunner.class); conf.setJobName("gapdeduce"); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Gapper.class); conf.setReducerClass(Deducer.class); // KeyValueTextInputFormat treats each line as an input record, // and splits the line by the tab character to separate it into key and value conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:LicenseStatewiseCount.java
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(ParkingTicketStatewiseCount.class); conf.setJobName("Statecounts"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:DijikstraAlgo.java
License:GNU General Public License
public static void run(String[] args) throws Exception { IN = "hdfs://10.8.3.161:9000/user/sagar/input/"; OUT = "hdfs://10.8.3.161:9000/user/sagar/output/"; String input = IN;/* w w w . j ava 2s . c o m*/ String output = OUT + System.nanoTime(); String MAX_SPLIT_SIZE = args[0]; boolean isdone = false; // Reiteration again and again till the convergence while (isdone == false) { JobConf conf = new JobConf(DijikstraAlgo.class); conf.setJobName("Dijikstra"); // conf.set("mapred.max.split.size", MAX_SPLIT_SIZE); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); input = output + "/part-00000"; isdone = true;// set the job to NOT run again! Path ofile = new Path(input); FileSystem fs = FileSystem.get(new URI("hdfs://10.8.3.165:9000"), conf); //FileSystem fs = FileSystem.get(new Configuration()); BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(ofile))); HashMap<Integer, Integer> imap = new HashMap<Integer, Integer>(); String line = br.readLine(); // Read the current output file and put it into HashMap while (line != null) { String[] sp = line.split("\t| "); int node = Integer.parseInt(sp[0]); int distance = Integer.parseInt(sp[1]); imap.put(node, distance); line = br.readLine(); } br.close(); // Check for convergence condition if any node is still left then // continue else stop Iterator<Integer> itr = imap.keySet().iterator(); while (itr.hasNext()) { int key = itr.next(); int value = imap.get(key); if (value >= 125) { isdone = false; } } input = output; output = OUT + System.nanoTime(); } }
From source file:PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from w ww .j a va 2 s. co m */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { // setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:DistribCountingDriver.java
License:Apache License
public int run(String args[]) throws Exception { long job_start_time, job_end_time; long job_runtime; JobConf conf = new JobConf(getConf()); int minFreqPercent = Integer.parseInt(args[0]); int datasetSize = Integer.parseInt(args[1]); conf.setInt("DISTRCOUNT.datasetSize", datasetSize); conf.setInt("DISTRCOUNT.minFreqPercent", minFreqPercent); conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); conf.setInt("mapred.task.timeout", 60000000); conf.setJarByClass(DistribCountingDriver.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(DistribCountingMapper.class); conf.setCombinerClass(DistribCountingCombiner.class); conf.setReducerClass(DistribCountingReducer.class); conf.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(conf, new Path(args[2])); FileOutputFormat.setOutputPath(conf, new Path(args[3])); job_start_time = System.currentTimeMillis(); JobClient.runJob(conf); job_end_time = System.currentTimeMillis(); job_runtime = (job_end_time - job_start_time) / 1000; System.out.println("total job runtime (seconds): " + job_runtime); return 0;/* w ww .j a va 2s. c om*/ }
From source file:NgramMatrixBuilder.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker. */// ww w . ja v a2 s . c o m public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), NgramMatrixBuilder.class); conf.setJobName("ngrammatrixbuilder"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } TextInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:BMTColumnLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), BMTColumnLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setJobName("BMTColumnLoader"); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from w w w . j ava 2s . co m conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]); } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }