Example usage for org.apache.hadoop.mapred RunningJob waitForCompletion

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RunningJob waitForCompletion.

Prototype

public void waitForCompletion() throws IOException;

Source Link

Document

Blocks until the job is complete.

Usage

From source file:WikipediaDocnoMappingBuilder.java

License:Apache License

@SuppressWarnings("static-access")
@Override/* ww w.j a v  a  2s .c o m*/
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file")
            .create(OUTPUT_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));
    options.addOption(KEEP_ALL_OPTION, false, "keep all pages");

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION);
    boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION);

    String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output file: " + outputFile);
    LOG.info(" - keep all pages: " + keepAll);
    LOG.info(" - language: " + language);

    // Job job = Job.getInstance(getConf());
    JobConf conf = new JobConf(WikipediaDocnoMappingBuilder.class);
    conf.setJarByClass(WikipediaDocnoMappingBuilder.class);
    conf.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language));

    conf.setBoolean(KEEP_ALL_OPTION, keepAll);
    // .getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll);
    if (language != null) {
        conf.set("wiki.language", language);
    }
    conf.setNumReduceTasks(1);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setInputFormat(WikipediaPageInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(tmpPath), true);

    // job.waitForCompletion(true);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    // JobClient jobClient = new JobClient(conf);
    long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue()
            : job.getCounters().findCounter(PageTypes.ARTICLE).getValue();

    WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-00000", (int) cnt,
            outputFile);

    FileSystem.get(getConf()).delete(new Path(tmpPath), true);

    return 0;
}

From source file:ca.etsmtl.lasi.hbasewikipedialoader.HBaseWikipediaLoader.java

License:Apache License

/**
 * Main entry point.//  w  w  w  . j a va2s . co m
 * 
 * @param args
 *          The command line parameters.
 * @throws Exception
 *           When running the job fails.
 */
public static void main(String[] args) throws Exception {
    HBaseConfiguration conf = new HBaseConfiguration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("ERROR: Wrong number of parameters: " + args.length);
        System.err.println("Usage: " + NAME + " <inputdir>");
        System.exit(-1);
    }
    JobConf jobConf = createSubmittableJob(conf, otherArgs);
    RunningJob job = JobClient.runJob(jobConf);
    job.waitForCompletion();
    System.exit(job.isSuccessful() ? 0 : 1);
}

From source file:ca.etsmtl.lasi.hbasewikipedialoader.TestHBaseWikipediaLoader.java

License:Apache License

/**
 * Run the loader on the sample, test if it succeeded and
 * if the number of reduced articles is the same as the number of
 * rows in the table. This test expects that HBase was started on default
 * ports on the local machine.//w  w  w.jav a 2s.  c  om
 */
public void testWikipediaLoader() {
    try {
        HBaseConfiguration conf = new HBaseConfiguration();
        String[] args = new String[] { "sample/sample.xml" };
        JobConf jobConf = HBaseWikipediaLoader.createSubmittableJob(conf, args);
        RunningJob job = JobClient.runJob(jobConf);
        job.waitForCompletion();
        assertTrue(job.isSuccessful());
        HTable htable = new HTable(conf, HBaseWikipediaLoader.TABLE);
        Scan scan = new Scan();
        scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("id"));
        htable.setScannerCaching(100);
        ResultScanner scanner = htable.getScanner(scan);
        Iterator<Result> ite = scanner.iterator();
        int count = 0;
        while (ite.hasNext()) {
            Result res = ite.next();
            if (res.getRow() == null) {
                break;
            }
            count++;
        }
        scanner.close();
        assertTrue(job.getCounters().getCounter(HBaseWikipediaLoader.Counters.MAPPED_WIKI_ARTICLES) == count);
    } catch (IOException ex) {
        ex.printStackTrace();
        fail(ex.getMessage());
    }

}

From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java

License:Apache License

/**
 * This method creates a sample MR job and submits that JobConf object to the
 * static MiniClusterController method to be executed.
 *///from   www .java  2  s  .c o m
@Test
public void testRunningJobLocally() throws IOException, InterruptedException {
    JobConf sampleJob = createWordCountMRJobConf();
    RunningJob runningJob = miniCluster_.runJob(sampleJob);
    runningJob.waitForCompletion();
    assertTrue(runningJob.isSuccessful());
}

From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java

License:Apache License

/**
 * This method gets a JobConf object from the static MiniClusterController
 * method, fills it with a sample MR job and then executes the job.
 *///from   w w  w  . ja v a 2 s .c o  m
@Test
public void testGetConfigForMiniCluster() throws IOException {
    JobConf sampleJob = miniCluster_.getJobConf(WordCount.class);
    fillInWordCountMRJobConf(sampleJob);
    RunningJob runningJob = JobClient.runJob(sampleJob);
    runningJob.waitForCompletion();
    assertTrue(runningJob.isSuccessful());
}

From source file:com.mongodb.hadoop.util.MongoTool.java

License:Apache License

private int runMapredJob(final Configuration conf) {
    final JobConf job = new JobConf(conf, getClass());
    /**//  ww  w .ja v  a2 s. c om
     * Any arguments specified with -D <property>=<value>
     * on the CLI will be picked up and set here
     * They override any XML level values
     * Note that -D<space> is important - no space will
     * not work as it gets picked up by Java itself
     */
    // TODO - Do we need to set job name somehow more specifically?
    // This may or may not be correct/sane
    job.setJarByClass(getClass());
    final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf);

    LOG.debug("Mapper Class: " + mapper);
    LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI));
    job.setMapperClass(mapper);
    Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf);
    if (combiner != null) {
        job.setCombinerClass(combiner);
    }
    job.setReducerClass(MapredMongoConfigUtil.getReducer(conf));

    job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf));
    job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf));
    job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf));
    job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf));
    Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf);
    Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf);

    if (mapOutputKeyClass != null) {
        job.setMapOutputKeyClass(mapOutputKeyClass);
    }
    if (mapOutputValueClass != null) {
        job.setMapOutputValueClass(mapOutputValueClass);
    }

    /**
     * Determines if the job will run verbosely e.g. print debug output
     * Only works with foreground jobs
     */
    final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf);
    /**
     * Run job in foreground aka wait for completion or background?
     */
    final boolean background = MapredMongoConfigUtil.isJobBackground(conf);
    try {
        RunningJob runningJob = JobClient.runJob(job);
        if (background) {
            LOG.info("Setting up and running MapReduce job in background.");
            return 0;
        } else {
            LOG.info("Setting up and running MapReduce job in foreground, will wait for results.  {Verbose? "
                    + verbose + "}");
            runningJob.waitForCompletion();
            return 0;
        }
    } catch (final Exception e) {
        LOG.error("Exception while executing job... ", e);
        return 1;
    }

}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;// w w  w  .j  av  a 2 s. c o m
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

/**
 * Submit the job for execution./*from w ww  .j  ava  2s.  c o m*/
 *
 * @param job job for the execution
 * @param sortEnabled enable sorting of reduce keys for that job
 *
 * @return {@link RunningJob} handler
 * @throws IOException if a ScaleOut hServer access error occurred
 */
public static RunningJob runJob(final JobConf job, boolean sortEnabled) throws IOException {
    HServerJobClient jobClient = new HServerJobClient(job);
    jobClient.setSortEnabled(sortEnabled);
    RunningJob hServerRunningJob = jobClient.submitJob(job);
    hServerRunningJob.waitForCompletion();
    return hServerRunningJob;
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

/**
 * Submit the job for execution./*ww w .  j a  va 2  s .  c om*/
 *
 * @param job job for the execution
 * @param sortEnabled enable sorting of reduce keys for that job
 * @param grid invocation grid to run job on
 *
 * @return {@link RunningJob} handler
 * @throws IOException if a ScaleOut hServer access error occurred
 */
public static RunningJob runJob(final JobConf job, boolean sortEnabled, final InvocationGrid grid)
        throws IOException {
    HServerJobClient jobClient = new HServerJobClient(job);
    jobClient.setSortEnabled(sortEnabled);
    jobClient.setGrid(grid);
    jobClient.unloadGrid = false;
    RunningJob hServerRunningJob = jobClient.submitJob(job);
    hServerRunningJob.waitForCompletion();
    return hServerRunningJob;
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

/**
 * Submit the job for execution./*from w  ww. j a  v a  2 s .c o  m*/
 *
 * @param job job for the execution
 *
 * @return {@link RunningJob} handler
 * @throws IOException if a ScaleOut hServer access error occurred
 */
public static RunningJob runJob(JobConf job) throws IOException {
    HServerJobClient jobClient = new HServerJobClient(job);
    RunningJob hServerRunningJob = jobClient.submitJob(job);
    hServerRunningJob.waitForCompletion();
    return hServerRunningJob;
}