Example usage for org.apache.hadoop.mapred RunningJob waitForCompletion

List of usage examples for org.apache.hadoop.mapred RunningJob waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob waitForCompletion.

Prototype

public void waitForCompletion() throws IOException;

Source Link

Document

Blocks until the job is complete.

Usage

From source file:WikipediaDocnoMappingBuilder.java

License:Apache License

@SuppressWarnings("static-access")
@Override/* ww w.j a v  a  2s .c o m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file")
            .create(OUTPUT_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));
    options.addOption(KEEP_ALL_OPTION, false, "keep all pages");

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION);
    boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION);

    String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output file: " + outputFile);
    LOG.info(" - keep all pages: " + keepAll);
    LOG.info(" - language: " + language);

    // Job job = Job.getInstance(getConf());
    JobConf conf = new JobConf(WikipediaDocnoMappingBuilder.class);
    conf.setJarByClass(WikipediaDocnoMappingBuilder.class);
    conf.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language));

    conf.setBoolean(KEEP_ALL_OPTION, keepAll);
    // .getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll);
    if (language != null) {
        conf.set("wiki.language", language);
    }
    conf.setNumReduceTasks(1);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setInputFormat(WikipediaPageInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(tmpPath), true);

    // job.waitForCompletion(true);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    // JobClient jobClient = new JobClient(conf);
    long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue()
            : job.getCounters().findCounter(PageTypes.ARTICLE).getValue();

    WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-00000", (int) cnt,
            outputFile);

    FileSystem.get(getConf()).delete(new Path(tmpPath), true);

    return 0;
}

From source file:ca.etsmtl.lasi.hbasewikipedialoader.HBaseWikipediaLoader.java

License:Apache License

/**
 * Main entry point.//  w  w  w  . j a va2s . co m
 * 
 * @param args
 *          The command line parameters.
 * @throws Exception
 *           When running the job fails.
 */
public static void main(String[] args) throws Exception {
    HBaseConfiguration conf = new HBaseConfiguration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("ERROR: Wrong number of parameters: " + args.length);
        System.err.println("Usage: " + NAME + " <inputdir>");
        System.exit(-1);
    }
    JobConf jobConf = createSubmittableJob(conf, otherArgs);
    RunningJob job = JobClient.runJob(jobConf);
    job.waitForCompletion();
    System.exit(job.isSuccessful() ? 0 : 1);
}

From source file:ca.etsmtl.lasi.hbasewikipedialoader.TestHBaseWikipediaLoader.java

License:Apache License

/**
 * Run the loader on the sample, test if it succeeded and
 * if the number of reduced articles is the same as the number of
 * rows in the table. This test expects that HBase was started on default
 * ports on the local machine.//w  w  w.jav a 2s.  c  om
 */
public void testWikipediaLoader() {
    try {
        HBaseConfiguration conf = new HBaseConfiguration();
        String[] args = new String[] { "sample/sample.xml" };
        JobConf jobConf = HBaseWikipediaLoader.createSubmittableJob(conf, args);
        RunningJob job = JobClient.runJob(jobConf);
        job.waitForCompletion();
        assertTrue(job.isSuccessful());
        HTable htable = new HTable(conf, HBaseWikipediaLoader.TABLE);
        Scan scan = new Scan();
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("id"));
        htable.setScannerCaching(100);
        ResultScanner scanner = htable.getScanner(scan);
        Iterator<Result> ite = scanner.iterator();
        int count = 0;
        while (ite.hasNext()) {
            Result res = ite.next();
            if (res.getRow() == null) {
                break;
            }
            count++;
        }
        scanner.close();
        assertTrue(job.getCounters().getCounter(HBaseWikipediaLoader.Counters.MAPPED_WIKI_ARTICLES) == count);
    } catch (IOException ex) {
        ex.printStackTrace();
        fail(ex.getMessage());
    }

}

From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java

License:Apache License

/**
 * This method creates a sample MR job and submits that JobConf object to the
 * static MiniClusterController method to be executed.
 *///from   www .java  2  s  .c o m
@Test
public void testRunningJobLocally() throws IOException, InterruptedException {
    JobConf sampleJob = createWordCountMRJobConf();
    RunningJob runningJob = miniCluster_.runJob(sampleJob);
    runningJob.waitForCompletion();
    assertTrue(runningJob.isSuccessful());
}

From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java

License:Apache License

/**
 * This method gets a JobConf object from the static MiniClusterController
 * method, fills it with a sample MR job and then executes the job.
 *///from   w w  w  . ja v a 2 s .c o  m
@Test
public void testGetConfigForMiniCluster() throws IOException {
    JobConf sampleJob = miniCluster_.getJobConf(WordCount.class);
    fillInWordCountMRJobConf(sampleJob);
    RunningJob runningJob = JobClient.runJob(sampleJob);
    runningJob.waitForCompletion();
    assertTrue(runningJob.isSuccessful());
}

From source file:com.mongodb.hadoop.util.MongoTool.java

License:Apache License

private int runMapredJob(final Configuration conf) {
    final JobConf job = new JobConf(conf, getClass());
    /**//  ww  w .ja v  a2 s. c om
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf);

    LOG.debug("Mapper Class: " + mapper);
    LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI));
    job.setMapperClass(mapper);
    Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MapredMongoConfigUtil.getReducer(conf));

    job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf));
    job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MapredMongoConfigUtil.isJobBackground(conf);
    try {
        RunningJob runningJob = JobClient.runJob(job);
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            runningJob.waitForCompletion();
            return 0;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }

}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;// w w  w  .j  av  a 2 s. c o m
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

/**
 * Submit the job for execution./*from w ww  .j  ava  2s.  c o m*/
 *
 * @param job job for the execution
 * @param sortEnabled enable sorting of reduce keys for that job
 *
 * @return {@link RunningJob} handler
 * @throws IOException if a ScaleOut hServer access error occurred
 */
public static RunningJob runJob(final JobConf job, boolean sortEnabled) throws IOException {
    HServerJobClient jobClient = new HServerJobClient(job);
    jobClient.setSortEnabled(sortEnabled);
    RunningJob hServerRunningJob = jobClient.submitJob(job);
    hServerRunningJob.waitForCompletion();
    return hServerRunningJob;
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

/**
 * Submit the job for execution./*ww w .  j a  va 2  s .  c om*/
 *
 * @param job job for the execution
 * @param sortEnabled enable sorting of reduce keys for that job
 * @param grid invocation grid to run job on
 *
 * @return {@link RunningJob} handler
 * @throws IOException if a ScaleOut hServer access error occurred
 */
public static RunningJob runJob(final JobConf job, boolean sortEnabled, final InvocationGrid grid)
        throws IOException {
    HServerJobClient jobClient = new HServerJobClient(job);
    jobClient.setSortEnabled(sortEnabled);
    jobClient.setGrid(grid);
    jobClient.unloadGrid = false;
    RunningJob hServerRunningJob = jobClient.submitJob(job);
    hServerRunningJob.waitForCompletion();
    return hServerRunningJob;
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

/**
 * Submit the job for execution./*from w  ww. j a  v a  2 s .c o  m*/
 *
 * @param job job for the execution
 *
 * @return {@link RunningJob} handler
 * @throws IOException if a ScaleOut hServer access error occurred
 */
public static RunningJob runJob(JobConf job) throws IOException {
    HServerJobClient jobClient = new HServerJobClient(job);
    RunningJob hServerRunningJob = jobClient.submitJob(job);
    hServerRunningJob.waitForCompletion();
    return hServerRunningJob;
}