List of usage examples for org.apache.hadoop.mapred RunningJob waitForCompletion
public void waitForCompletion() throws IOException;
From source file:WikipediaDocnoMappingBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override/* ww w.j a v a 2s .c o m*/ public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file") .create(OUTPUT_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); options.addOption(KEEP_ALL_OPTION, false, "keep all pages"); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION); boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION); String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input: " + inputPath); LOG.info(" - output file: " + outputFile); LOG.info(" - keep all pages: " + keepAll); LOG.info(" - language: " + language); // Job job = Job.getInstance(getConf()); JobConf conf = new JobConf(WikipediaDocnoMappingBuilder.class); conf.setJarByClass(WikipediaDocnoMappingBuilder.class); conf.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language)); conf.setBoolean(KEEP_ALL_OPTION, keepAll); // .getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll); if (language != null) { conf.set("wiki.language", language); } conf.setNumReduceTasks(1); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); conf.setInputFormat(WikipediaPageInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(tmpPath), true); // job.waitForCompletion(true); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); // JobClient jobClient = new JobClient(conf); long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue(); WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-00000", (int) cnt, outputFile); FileSystem.get(getConf()).delete(new Path(tmpPath), true); return 0; }
From source file:ca.etsmtl.lasi.hbasewikipedialoader.HBaseWikipediaLoader.java
License:Apache License
/** * Main entry point.// w w w . j a va2s . co m * * @param args * The command line parameters. * @throws Exception * When running the job fails. */ public static void main(String[] args) throws Exception { HBaseConfiguration conf = new HBaseConfiguration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 1) { System.err.println("ERROR: Wrong number of parameters: " + args.length); System.err.println("Usage: " + NAME + " <inputdir>"); System.exit(-1); } JobConf jobConf = createSubmittableJob(conf, otherArgs); RunningJob job = JobClient.runJob(jobConf); job.waitForCompletion(); System.exit(job.isSuccessful() ? 0 : 1); }
From source file:ca.etsmtl.lasi.hbasewikipedialoader.TestHBaseWikipediaLoader.java
License:Apache License
/** * Run the loader on the sample, test if it succeeded and * if the number of reduced articles is the same as the number of * rows in the table. This test expects that HBase was started on default * ports on the local machine.//w w w.jav a 2s. c om */ public void testWikipediaLoader() { try { HBaseConfiguration conf = new HBaseConfiguration(); String[] args = new String[] { "sample/sample.xml" }; JobConf jobConf = HBaseWikipediaLoader.createSubmittableJob(conf, args); RunningJob job = JobClient.runJob(jobConf); job.waitForCompletion(); assertTrue(job.isSuccessful()); HTable htable = new HTable(conf, HBaseWikipediaLoader.TABLE); Scan scan = new Scan(); scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("id")); htable.setScannerCaching(100); ResultScanner scanner = htable.getScanner(scan); Iterator<Result> ite = scanner.iterator(); int count = 0; while (ite.hasNext()) { Result res = ite.next(); if (res.getRow() == null) { break; } count++; } scanner.close(); assertTrue(job.getCounters().getCounter(HBaseWikipediaLoader.Counters.MAPPED_WIKI_ARTICLES) == count); } catch (IOException ex) { ex.printStackTrace(); fail(ex.getMessage()); } }
From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java
License:Apache License
/** * This method creates a sample MR job and submits that JobConf object to the * static MiniClusterController method to be executed. *///from www .java 2 s .c o m @Test public void testRunningJobLocally() throws IOException, InterruptedException { JobConf sampleJob = createWordCountMRJobConf(); RunningJob runningJob = miniCluster_.runJob(sampleJob); runningJob.waitForCompletion(); assertTrue(runningJob.isSuccessful()); }
From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java
License:Apache License
/** * This method gets a JobConf object from the static MiniClusterController * method, fills it with a sample MR job and then executes the job. *///from w w w . ja v a 2 s .c o m @Test public void testGetConfigForMiniCluster() throws IOException { JobConf sampleJob = miniCluster_.getJobConf(WordCount.class); fillInWordCountMRJobConf(sampleJob); RunningJob runningJob = JobClient.runJob(sampleJob); runningJob.waitForCompletion(); assertTrue(runningJob.isSuccessful()); }
From source file:com.mongodb.hadoop.util.MongoTool.java
License:Apache License
private int runMapredJob(final Configuration conf) { final JobConf job = new JobConf(conf, getClass()); /**// ww w .ja v a2 s. c om * Any arguments specified with -D <property>=<value> * on the CLI will be picked up and set here * They override any XML level values * Note that -D<space> is important - no space will * not work as it gets picked up by Java itself */ // TODO - Do we need to set job name somehow more specifically? // This may or may not be correct/sane job.setJarByClass(getClass()); final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf); LOG.debug("Mapper Class: " + mapper); LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI)); job.setMapperClass(mapper); Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf); if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(MapredMongoConfigUtil.getReducer(conf)); job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf)); job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf)); job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf)); job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf)); Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf); Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf); if (mapOutputKeyClass != null) { job.setMapOutputKeyClass(mapOutputKeyClass); } if (mapOutputValueClass != null) { job.setMapOutputValueClass(mapOutputValueClass); } /** * Determines if the job will run verbosely e.g. print debug output * Only works with foreground jobs */ final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf); /** * Run job in foreground aka wait for completion or background? */ final boolean background = MapredMongoConfigUtil.isJobBackground(conf); try { RunningJob runningJob = JobClient.runJob(job); if (background) { LOG.info("Setting up and running MapReduce job in background."); return 0; } else { LOG.info("Setting up and running MapReduce job in foreground, will wait for results. {Verbose? " + verbose + "}"); runningJob.waitForCompletion(); return 0; } } catch (final Exception e) { LOG.error("Exception while executing job... ", e); return 1; } }
From source file:com.mycompany.app.TestStagingDirectoryPermissions.java
License:Apache License
@Test public void perms() throws IOException, InterruptedException { MiniDFSCluster minidfs = null;// w w w .j av a 2 s. c o m FileSystem fs = null; MiniMRClientCluster minimr = null; try { Configuration conf = new Configuration(true); conf.set("fs.permission.umask-mode", "0077"); minidfs = new MiniDFSCluster.Builder(conf).build(); minidfs.waitActive(); fs = minidfs.getFileSystem(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); Path p = path("/in"); fs.mkdirs(p); FSDataOutputStream os = fs.create(new Path(p, "input.txt")); os.write("hello!".getBytes("UTF-8")); os.close(); String user = UserGroupInformation.getCurrentUser().getUserName(); Path home = new Path("/User/" + user); fs.mkdirs(home); minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); JobConf job = new JobConf(minimr.getConfig()); job.setJobName("PermsTest"); JobClient client = new JobClient(job); FileInputFormat.addInputPath(job, p); FileOutputFormat.setOutputPath(job, path("/out")); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MySleepMapper.class); job.setNumReduceTasks(1); RunningJob submittedJob = client.submitJob(job); // Sleep for a bit to let localization finish System.out.println("Sleeping..."); Thread.sleep(3 * 1000l); System.out.println("Done sleeping..."); assertFalse(UserGroupInformation.isSecurityEnabled()); Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/"); assertTrue(fs.exists(stagingRoot)); assertEquals(1, fs.listStatus(stagingRoot).length); Path staging = fs.listStatus(stagingRoot)[0].getPath(); Path jobXml = path(staging + "/job.xml"); assertTrue(fs.exists(jobXml)); FileStatus fileStatus = fs.getFileStatus(jobXml); System.out.println("job.xml permission = " + fileStatus.getPermission()); assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ)); assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ)); submittedJob.waitForCompletion(); } finally { if (minimr != null) { minimr.stop(); } if (fs != null) { fs.close(); } if (minidfs != null) { minidfs.shutdown(true); } } }
From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java
License:Apache License
/** * Submit the job for execution./*from w ww .j ava 2s. c o m*/ * * @param job job for the execution * @param sortEnabled enable sorting of reduce keys for that job * * @return {@link RunningJob} handler * @throws IOException if a ScaleOut hServer access error occurred */ public static RunningJob runJob(final JobConf job, boolean sortEnabled) throws IOException { HServerJobClient jobClient = new HServerJobClient(job); jobClient.setSortEnabled(sortEnabled); RunningJob hServerRunningJob = jobClient.submitJob(job); hServerRunningJob.waitForCompletion(); return hServerRunningJob; }
From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java
License:Apache License
/** * Submit the job for execution./*ww w . j a va 2 s . c om*/ * * @param job job for the execution * @param sortEnabled enable sorting of reduce keys for that job * @param grid invocation grid to run job on * * @return {@link RunningJob} handler * @throws IOException if a ScaleOut hServer access error occurred */ public static RunningJob runJob(final JobConf job, boolean sortEnabled, final InvocationGrid grid) throws IOException { HServerJobClient jobClient = new HServerJobClient(job); jobClient.setSortEnabled(sortEnabled); jobClient.setGrid(grid); jobClient.unloadGrid = false; RunningJob hServerRunningJob = jobClient.submitJob(job); hServerRunningJob.waitForCompletion(); return hServerRunningJob; }
From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java
License:Apache License
/** * Submit the job for execution./*from w ww. j a v a 2 s .c o m*/ * * @param job job for the execution * * @return {@link RunningJob} handler * @throws IOException if a ScaleOut hServer access error occurred */ public static RunningJob runJob(JobConf job) throws IOException { HServerJobClient jobClient = new HServerJobClient(job); RunningJob hServerRunningJob = jobClient.submitJob(job); hServerRunningJob.waitForCompletion(); return hServerRunningJob; }