Example usage for org.apache.hadoop.mapreduce Job getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobID.

Prototype

public JobID getJobID()

Source Link

Document

Get the unique ID for the job.

Usage

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes in two different files and checks
 * that writing them to a single file does not conflate them
 * //from  www  . java2 s .  c  o m
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_identity_01() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Different blank nodes in different files
        List<T> tuples = new ArrayList<>();
        Node bnode1 = NodeFactory.createBlankNode();
        Node bnode2 = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");

        tuples.add(createTuple(bnode1, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);

        tuples.clear();
        tuples.add(createTuple(bnode2, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);

        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - all triples go to a single
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not have converged
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java

License:Apache License

/**
 * Test that starts with two blank nodes in two different files and checks
 * that writing them to a single file does not conflate them
 * //from w w  w  .  jav a2 s . c  o  m
 * @throws IOException
 * @throws InterruptedException
 */
@Test
public void blank_node_identity_02() throws IOException, InterruptedException {
    Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile());
    Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity());

    // Temporary files
    File a = File.createTempFile("bnode_identity", getInitialInputExtension());
    File b = File.createTempFile("bnode_identity", getInitialInputExtension());
    File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile();

    try {
        // Prepare the input data
        // Same blank node but in different files so must be treated as
        // different blank nodes and not converge
        List<T> tuples = new ArrayList<>();
        Node bnode = NodeFactory.createBlankNode();
        Node pred = NodeFactory.createURI("http://example.org/predicate");

        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first")));
        writeTuples(a, tuples);

        tuples.clear();
        tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second")));
        writeTuples(b, tuples);

        // Set up fake job which will process the two files
        Configuration config = new Configuration(true);
        InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat();
        Job job = Job.getInstance(config);
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath()));
        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        List<InputSplit> splits = inputFormat.getSplits(context);
        Assert.assertEquals(2, splits.size());

        // Prepare the output writing - putting all output to a single file
        OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat();
        TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                createAttemptID(1, 2, 1));
        RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext);

        for (InputSplit split : splits) {
            // Initialize the input reading
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    createAttemptID(1, 1, 1));
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            // Copy the input to the output - all triples go to a single
            // output
            while (reader.nextKeyValue()) {
                writer.write(reader.getCurrentKey(), reader.getCurrentValue());
            }
        }
        writer.close(outputTaskContext);

        // Promote outputs from temporary status
        promoteInputs(intermediateOutputDir);

        // Now we need to create a subsequent job that reads the
        // intermediate outputs
        // The Blank nodes should have been given separate identities so we
        // should not be conflating them, this is the opposite problem to
        // that described in JENA-820
        LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath());
        job = Job.getInstance(config);
        inputFormat = createIntermediateInputFormat();
        job.setInputFormatClass(inputFormat.getClass());
        NLineInputFormat.setNumLinesPerSplit(job, 100);
        FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath()));
        context = new JobContextImpl(job.getConfiguration(), job.getJobID());

        // Get the splits
        splits = inputFormat.getSplits(context);
        Assert.assertEquals(1, splits.size());

        // Expect to end up with a single blank node
        Set<Node> nodes = new HashSet<Node>();
        for (InputSplit split : splits) {
            TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(),
                    new TaskAttemptID());
            RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext);
            reader.initialize(split, inputTaskContext);

            while (reader.nextKeyValue()) {
                nodes.add(getSubject(reader.getCurrentValue().get()));
            }
        }
        // Nodes must not diverge
        Assert.assertEquals(2, nodes.size());

    } finally {
        a.delete();
        b.delete();
        deleteDirectory(intermediateOutputDir);
    }
}

From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormatTests.java

License:Apache License

/**
 * Tests output//from   w w  w.  j a v  a  2  s.  c  o  m
 * 
 * @param f
 *            File to output to
 * @param num
 *            Number of tuples to output
 * @throws IOException
 * @throws InterruptedException
 */
protected final void testOutput(File f, int num) throws IOException, InterruptedException {
    // Prepare configuration
    Configuration config = this.prepareConfiguration();

    // Set up fake job
    OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat();
    Job job = Job.getInstance(config);
    job.setOutputFormatClass(outputFormat.getClass());
    this.addOutputPath(f, job.getConfiguration(), job);
    JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
    Assert.assertNotNull(FileOutputFormat.getOutputPath(context));

    // Output the data
    TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1);
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id);
    RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext);
    Iterator<T> tuples = this.generateTuples(num);
    while (tuples.hasNext()) {
        writer.write(NullWritable.get(), tuples.next());
    }
    writer.close(taskContext);

    // Check output
    File outputFile = this.findOutputFile(this.folder.getRoot(), context);
    Assert.assertNotNull(outputFile);
    this.checkTuples(outputFile, num);
}

From source file:org.apache.kylin.engine.mr.common.HadoopJobStatusChecker.java

License:Apache License

public static JobStepStatusEnum checkStatus(Job job, StringBuilder output) {
    if (job == null || job.getJobID() == null) {
        output.append("Skip status check with empty job id..\n");
        return JobStepStatusEnum.WAITING;
    }/*from  www.ja va  2 s  .c om*/

    JobStepStatusEnum status = null;
    try {
        switch (job.getStatus().getState()) {
        case SUCCEEDED:
            status = JobStepStatusEnum.FINISHED;
            break;
        case FAILED:
            status = JobStepStatusEnum.ERROR;
            break;
        case KILLED:
            status = JobStepStatusEnum.KILLED;
            break;
        case RUNNING:
            status = JobStepStatusEnum.RUNNING;
            break;
        case PREP:
            status = JobStepStatusEnum.WAITING;
            break;
        default:
            throw new IllegalStateException();
        }
    } catch (Exception e) {
        logger.error("error check status", e);
        output.append("Exception: " + e.getLocalizedMessage() + "\n");
        status = JobStepStatusEnum.ERROR;
    }

    return status;
}

From source file:org.apache.kylin.engine.mr.common.MapReduceExecutable.java

License:Apache License

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final String mapReduceJobClass = getMapReduceJobClass();
    String params = getMapReduceParams();
    Preconditions.checkNotNull(mapReduceJobClass);
    Preconditions.checkNotNull(params);// ww  w  . ja  v a 2  s  .co  m
    try {
        Job job;
        ExecutableManager mgr = getManager();
        final Map<String, String> extra = mgr.getOutput(getId()).getExtra();
        if (extra.containsKey(ExecutableConstants.MR_JOB_ID)) {
            Configuration conf = HadoopUtil.getCurrentConfiguration();
            job = new Cluster(conf).getJob(JobID.forName(extra.get(ExecutableConstants.MR_JOB_ID)));
            logger.info("mr_job_id:" + extra.get(ExecutableConstants.MR_JOB_ID) + " resumed");
        } else {
            final Constructor<? extends AbstractHadoopJob> constructor = ClassUtil
                    .forName(mapReduceJobClass, AbstractHadoopJob.class).getConstructor();
            final AbstractHadoopJob hadoopJob = constructor.newInstance();
            hadoopJob.setConf(HadoopUtil.getCurrentConfiguration());
            hadoopJob.setAsync(true); // so the ToolRunner.run() returns right away
            logger.info("parameters of the MapReduceExecutable:");
            logger.info(params);
            String[] args = params.trim().split("\\s+");
            try {
                //for async mr job, ToolRunner just return 0;

                // use this method instead of ToolRunner.run() because ToolRunner.run() is not thread-sale
                // Refer to: http://stackoverflow.com/questions/22462665/is-hadoops-toorunner-thread-safe
                MRUtil.runMRJob(hadoopJob, args);

                if (hadoopJob.isSkipped()) {
                    return new ExecuteResult(ExecuteResult.State.SUCCEED, "skipped");
                }
            } catch (Exception ex) {
                StringBuilder log = new StringBuilder();
                logger.error("error execute " + this.toString(), ex);
                StringWriter stringWriter = new StringWriter();
                ex.printStackTrace(new PrintWriter(stringWriter));
                log.append(stringWriter.toString()).append("\n");
                log.append("result code:").append(2);
                return new ExecuteResult(ExecuteResult.State.ERROR, log.toString());
            }
            job = hadoopJob.getJob();
        }
        final StringBuilder output = new StringBuilder();
        final HadoopCmdOutput hadoopCmdOutput = new HadoopCmdOutput(job, output);

        //            final String restStatusCheckUrl = getRestStatusCheckUrl(job, context.getConfig());
        //            if (restStatusCheckUrl == null) {
        //                logger.error("restStatusCheckUrl is null");
        //                return new ExecuteResult(ExecuteResult.State.ERROR, "restStatusCheckUrl is null");
        //            }
        //            String mrJobId = hadoopCmdOutput.getMrJobId();
        //            boolean useKerberosAuth = context.getConfig().isGetJobStatusWithKerberos();
        //            HadoopStatusChecker statusChecker = new HadoopStatusChecker(restStatusCheckUrl, mrJobId, output, useKerberosAuth);
        JobStepStatusEnum status = JobStepStatusEnum.NEW;
        while (!isDiscarded() && !isPaused()) {

            JobStepStatusEnum newStatus = HadoopJobStatusChecker.checkStatus(job, output);
            if (status == JobStepStatusEnum.KILLED) {
                mgr.updateJobOutput(getId(), ExecutableState.ERROR, hadoopCmdOutput.getInfo(),
                        "killed by admin");
                return new ExecuteResult(ExecuteResult.State.FAILED, "killed by admin");
            }
            if (status == JobStepStatusEnum.WAITING && (newStatus == JobStepStatusEnum.FINISHED
                    || newStatus == JobStepStatusEnum.ERROR || newStatus == JobStepStatusEnum.RUNNING)) {
                final long waitTime = System.currentTimeMillis() - getStartTime();
                setMapReduceWaitTime(waitTime);
            }
            mgr.addJobInfo(getId(), hadoopCmdOutput.getInfo());
            status = newStatus;
            if (status.isComplete()) {
                final Map<String, String> info = hadoopCmdOutput.getInfo();
                readCounters(hadoopCmdOutput, info);
                mgr.addJobInfo(getId(), info);

                if (status == JobStepStatusEnum.FINISHED) {
                    return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString());
                } else {
                    return new ExecuteResult(ExecuteResult.State.FAILED, output.toString());
                }
            }
            Thread.sleep(context.getConfig().getYarnStatusCheckIntervalSeconds() * 1000L);
        }

        // try to kill running map-reduce job to release resources.
        if (job != null) {
            try {
                job.killJob();
            } catch (Exception e) {
                logger.warn("failed to kill hadoop job: " + job.getJobID(), e);
            }
        }

        if (isDiscarded()) {
            return new ExecuteResult(ExecuteResult.State.DISCARDED, output.toString());
        } else {
            return new ExecuteResult(ExecuteResult.State.STOPPED, output.toString());
        }

    } catch (ReflectiveOperationException e) {
        logger.error("error getMapReduceJobClass, class name:" + getParam(KEY_MR_JOB), e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    } catch (Exception e) {
        logger.error("error execute " + this.toString(), e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }
}

From source file:org.apache.nutch.mapreduce.NutchUtil.java

License:Apache License

public static Map<String, Object> getJobState(Job job, String... groups) {
    Map<String, Object> jobState = Maps.newHashMap();
    if (job == null) {
        return jobState;
    }// w w w .ja  v  a2s.  c  o  m

    try {
        if (job.getStatus() == null || job.isRetired()) {
            return jobState;
        }
    } catch (IOException | InterruptedException e) {
        return jobState;
    }

    jobState.put("jobName", job.getJobName());
    jobState.put("jobID", job.getJobID());

    jobState.put(Nutch.STAT_COUNTERS, getJobCounters(job, groups));

    return jobState;
}

From source file:org.apache.nutch.util.ToolUtil.java

License:Apache License

@SuppressWarnings("unchecked")
public static final void recordJobStatus(String label, Job job, Map<String, Object> results) {
    Map<String, Object> jobs = (Map<String, Object>) results.get(Nutch.STAT_JOBS);
    if (jobs == null) {
        jobs = new LinkedHashMap<String, Object>();
        results.put(Nutch.STAT_JOBS, jobs);
    }//from   w  w w . ja  v  a 2  s .co m
    Map<String, Object> stats = new HashMap<String, Object>();
    Map<String, Object> countStats = new HashMap<String, Object>();
    try {
        Counters counters = job.getCounters();
        for (CounterGroup cg : counters) {
            Map<String, Object> cnts = new HashMap<String, Object>();
            countStats.put(cg.getDisplayName(), cnts);
            for (Counter c : cg) {
                cnts.put(c.getName(), c.getValue());
            }
        }
    } catch (Exception e) {
        countStats.put("error", e.toString());
    }
    stats.put(Nutch.STAT_COUNTERS, countStats);
    stats.put("jobName", job.getJobName());
    stats.put("jobID", job.getJobID());
    if (label == null) {
        label = job.getJobName();
        if (job.getJobID() != null) {
            label = label + "-" + job.getJobID();
        }
    }
    jobs.put(label, stats);
}

From source file:org.apache.pig.piggybank.test.storage.TestPathPartitionHelper.java

License:Apache License

@Test
public void testListStatusPartitionFilterNotFound() throws Exception {

    PathPartitionHelper partitionHelper = new PathPartitionHelper();

    Job job = new Job(conf);
    job.setJobName("TestJob");
    job.setInputFormatClass(FileInputFormat.class);

    Configuration conf = job.getConfiguration();
    FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath()));

    Iterator<Map.Entry<String, String>> iter = conf.iterator();
    while (iter.hasNext()) {
        Map.Entry<String, String> entry = iter.next();
        System.out.println(entry.getKey() + ": " + entry.getValue());
    }//w  ww .  j  a  va2 s .co  m
    JobContext jobContext = HadoopShims.createJobContext(conf, job.getJobID());

    partitionHelper.setPartitionFilterExpression("year < '2010'", PigStorage.class, "1");
    partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf, PigStorage.class, "1");

    List<FileStatus> files = partitionHelper.listStatus(jobContext, PigStorage.class, "1");

    assertEquals(0, files.size());

}

From source file:org.apache.pig.piggybank.test.storage.TestPathPartitionHelper.java

License:Apache License

@Test
public void testListStatusPartitionFilterFound() throws Exception {

    PathPartitionHelper partitionHelper = new PathPartitionHelper();

    Job job = new Job(conf);
    job.setJobName("TestJob");
    job.setInputFormatClass(FileInputFormat.class);

    Configuration conf = job.getConfiguration();
    FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath()));

    JobContext jobContext = HadoopShims.createJobContext(conf, job.getJobID());

    partitionHelper.setPartitionFilterExpression("year<='2010' and month=='01' and day>='01'", PigStorage.class,
            "2");
    partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf, PigStorage.class, "2");

    List<FileStatus> files = partitionHelper.listStatus(jobContext, PigStorage.class, "2");

    assertNotNull(files);/*  w ww  . ja  v  a 2s  .com*/
    assertEquals(1, files.size());

}

From source file:org.apache.pig.piggybank.test.storage.TestPathPartitionHelper.java

License:Apache License

@Test
public void testListStatus() throws Exception {

    PathPartitionHelper partitionHelper = new PathPartitionHelper();

    Job job = new Job(conf);
    job.setJobName("TestJob");
    job.setInputFormatClass(FileInputFormat.class);

    Configuration conf = job.getConfiguration();
    FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath()));

    JobContext jobContext = HadoopShims.createJobContext(conf, job.getJobID());

    partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf, PigStorage.class, "3");

    List<FileStatus> files = partitionHelper.listStatus(jobContext, PigStorage.class, "3");

    assertNotNull(files);//from w ww. j  ava  2 s. co  m
    assertEquals(1, files.size());

}