List of usage examples for org.apache.hadoop.mapreduce Job getJobID
public JobID getJobID()
From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java
License:Apache License
/** * Test that starts with two blank nodes in two different files and checks * that writing them to a single file does not conflate them * //from www . java2 s . c o m * @throws IOException * @throws InterruptedException */ @Test public void blank_node_identity_01() throws IOException, InterruptedException { Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile()); Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity()); // Temporary files File a = File.createTempFile("bnode_identity", getInitialInputExtension()); File b = File.createTempFile("bnode_identity", getInitialInputExtension()); File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile(); try { // Prepare the input data // Different blank nodes in different files List<T> tuples = new ArrayList<>(); Node bnode1 = NodeFactory.createBlankNode(); Node bnode2 = NodeFactory.createBlankNode(); Node pred = NodeFactory.createURI("http://example.org/predicate"); tuples.add(createTuple(bnode1, pred, NodeFactory.createLiteral("first"))); writeTuples(a, tuples); tuples.clear(); tuples.add(createTuple(bnode2, pred, NodeFactory.createLiteral("second"))); writeTuples(b, tuples); // Set up fake job which will process the two files Configuration config = new Configuration(true); InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat(); Job job = Job.getInstance(config); job.setInputFormatClass(inputFormat.getClass()); NLineInputFormat.setNumLinesPerSplit(job, 100); FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath())); FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath())); JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); // Get the splits List<InputSplit> splits = inputFormat.getSplits(context); Assert.assertEquals(2, splits.size()); // Prepare the output writing - putting all output to a single file OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat(); TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 2, 1)); RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext); for (InputSplit split : splits) { // Initialize the input reading TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1)); RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); reader.initialize(split, inputTaskContext); // Copy the input to the output - all triples go to a single // output while (reader.nextKeyValue()) { writer.write(reader.getCurrentKey(), reader.getCurrentValue()); } } writer.close(outputTaskContext); // Promote outputs from temporary status promoteInputs(intermediateOutputDir); // Now we need to create a subsequent job that reads the // intermediate outputs // The Blank nodes should have been given separate identities so we // should not be conflating them, this is the opposite problem to // that described in JENA-820 LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath()); job = Job.getInstance(config); inputFormat = createIntermediateInputFormat(); job.setInputFormatClass(inputFormat.getClass()); NLineInputFormat.setNumLinesPerSplit(job, 100); FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath())); context = new JobContextImpl(job.getConfiguration(), job.getJobID()); // Get the splits splits = inputFormat.getSplits(context); Assert.assertEquals(1, splits.size()); // Expect to end up with a single blank node Set<Node> nodes = new HashSet<Node>(); for (InputSplit split : splits) { TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); reader.initialize(split, inputTaskContext); while (reader.nextKeyValue()) { nodes.add(getSubject(reader.getCurrentValue().get())); } } // Nodes must not have converged Assert.assertEquals(2, nodes.size()); } finally { a.delete(); b.delete(); deleteDirectory(intermediateOutputDir); } }
From source file:org.apache.jena.hadoop.rdf.io.input.bnodes.AbstractBlankNodeTests.java
License:Apache License
/** * Test that starts with two blank nodes in two different files and checks * that writing them to a single file does not conflate them * //from w w w . jav a2 s . c o m * @throws IOException * @throws InterruptedException */ @Test public void blank_node_identity_02() throws IOException, InterruptedException { Assume.assumeTrue("Requires ParserProfile be respected", this.respectsParserProfile()); Assume.assumeFalse("Requires that Blank Node identity not be preserved", this.preservesBlankNodeIdentity()); // Temporary files File a = File.createTempFile("bnode_identity", getInitialInputExtension()); File b = File.createTempFile("bnode_identity", getInitialInputExtension()); File intermediateOutputDir = Files.createTempDirectory("bnode_identity", new FileAttribute[0]).toFile(); try { // Prepare the input data // Same blank node but in different files so must be treated as // different blank nodes and not converge List<T> tuples = new ArrayList<>(); Node bnode = NodeFactory.createBlankNode(); Node pred = NodeFactory.createURI("http://example.org/predicate"); tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("first"))); writeTuples(a, tuples); tuples.clear(); tuples.add(createTuple(bnode, pred, NodeFactory.createLiteral("second"))); writeTuples(b, tuples); // Set up fake job which will process the two files Configuration config = new Configuration(true); InputFormat<LongWritable, TValue> inputFormat = createInitialInputFormat(); Job job = Job.getInstance(config); job.setInputFormatClass(inputFormat.getClass()); NLineInputFormat.setNumLinesPerSplit(job, 100); FileInputFormat.setInputPaths(job, new Path(a.getAbsolutePath()), new Path(b.getAbsolutePath())); FileOutputFormat.setOutputPath(job, new Path(intermediateOutputDir.getAbsolutePath())); JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); // Get the splits List<InputSplit> splits = inputFormat.getSplits(context); Assert.assertEquals(2, splits.size()); // Prepare the output writing - putting all output to a single file OutputFormat<LongWritable, TValue> outputFormat = createIntermediateOutputFormat(); TaskAttemptContext outputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 2, 1)); RecordWriter<LongWritable, TValue> writer = outputFormat.getRecordWriter(outputTaskContext); for (InputSplit split : splits) { // Initialize the input reading TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), createAttemptID(1, 1, 1)); RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); reader.initialize(split, inputTaskContext); // Copy the input to the output - all triples go to a single // output while (reader.nextKeyValue()) { writer.write(reader.getCurrentKey(), reader.getCurrentValue()); } } writer.close(outputTaskContext); // Promote outputs from temporary status promoteInputs(intermediateOutputDir); // Now we need to create a subsequent job that reads the // intermediate outputs // The Blank nodes should have been given separate identities so we // should not be conflating them, this is the opposite problem to // that described in JENA-820 LOGGER.debug("Intermediate output directory is {}", intermediateOutputDir.getAbsolutePath()); job = Job.getInstance(config); inputFormat = createIntermediateInputFormat(); job.setInputFormatClass(inputFormat.getClass()); NLineInputFormat.setNumLinesPerSplit(job, 100); FileInputFormat.setInputPaths(job, new Path(intermediateOutputDir.getAbsolutePath())); context = new JobContextImpl(job.getConfiguration(), job.getJobID()); // Get the splits splits = inputFormat.getSplits(context); Assert.assertEquals(1, splits.size()); // Expect to end up with a single blank node Set<Node> nodes = new HashSet<Node>(); for (InputSplit split : splits) { TaskAttemptContext inputTaskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<LongWritable, TValue> reader = inputFormat.createRecordReader(split, inputTaskContext); reader.initialize(split, inputTaskContext); while (reader.nextKeyValue()) { nodes.add(getSubject(reader.getCurrentValue().get())); } } // Nodes must not diverge Assert.assertEquals(2, nodes.size()); } finally { a.delete(); b.delete(); deleteDirectory(intermediateOutputDir); } }
From source file:org.apache.jena.hadoop.rdf.io.output.AbstractNodeTupleOutputFormatTests.java
License:Apache License
/** * Tests output//from w w w. j a v a 2 s. c o m * * @param f * File to output to * @param num * Number of tuples to output * @throws IOException * @throws InterruptedException */ protected final void testOutput(File f, int num) throws IOException, InterruptedException { // Prepare configuration Configuration config = this.prepareConfiguration(); // Set up fake job OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat(); Job job = Job.getInstance(config); job.setOutputFormatClass(outputFormat.getClass()); this.addOutputPath(f, job.getConfiguration(), job); JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID()); Assert.assertNotNull(FileOutputFormat.getOutputPath(context)); // Output the data TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1); TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id); RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext); Iterator<T> tuples = this.generateTuples(num); while (tuples.hasNext()) { writer.write(NullWritable.get(), tuples.next()); } writer.close(taskContext); // Check output File outputFile = this.findOutputFile(this.folder.getRoot(), context); Assert.assertNotNull(outputFile); this.checkTuples(outputFile, num); }
From source file:org.apache.kylin.engine.mr.common.HadoopJobStatusChecker.java
License:Apache License
public static JobStepStatusEnum checkStatus(Job job, StringBuilder output) { if (job == null || job.getJobID() == null) { output.append("Skip status check with empty job id..\n"); return JobStepStatusEnum.WAITING; }/*from www.ja va 2 s .c om*/ JobStepStatusEnum status = null; try { switch (job.getStatus().getState()) { case SUCCEEDED: status = JobStepStatusEnum.FINISHED; break; case FAILED: status = JobStepStatusEnum.ERROR; break; case KILLED: status = JobStepStatusEnum.KILLED; break; case RUNNING: status = JobStepStatusEnum.RUNNING; break; case PREP: status = JobStepStatusEnum.WAITING; break; default: throw new IllegalStateException(); } } catch (Exception e) { logger.error("error check status", e); output.append("Exception: " + e.getLocalizedMessage() + "\n"); status = JobStepStatusEnum.ERROR; } return status; }
From source file:org.apache.kylin.engine.mr.common.MapReduceExecutable.java
License:Apache License
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final String mapReduceJobClass = getMapReduceJobClass(); String params = getMapReduceParams(); Preconditions.checkNotNull(mapReduceJobClass); Preconditions.checkNotNull(params);// ww w . ja v a 2 s .co m try { Job job; ExecutableManager mgr = getManager(); final Map<String, String> extra = mgr.getOutput(getId()).getExtra(); if (extra.containsKey(ExecutableConstants.MR_JOB_ID)) { Configuration conf = HadoopUtil.getCurrentConfiguration(); job = new Cluster(conf).getJob(JobID.forName(extra.get(ExecutableConstants.MR_JOB_ID))); logger.info("mr_job_id:" + extra.get(ExecutableConstants.MR_JOB_ID) + " resumed"); } else { final Constructor<? extends AbstractHadoopJob> constructor = ClassUtil .forName(mapReduceJobClass, AbstractHadoopJob.class).getConstructor(); final AbstractHadoopJob hadoopJob = constructor.newInstance(); hadoopJob.setConf(HadoopUtil.getCurrentConfiguration()); hadoopJob.setAsync(true); // so the ToolRunner.run() returns right away logger.info("parameters of the MapReduceExecutable:"); logger.info(params); String[] args = params.trim().split("\\s+"); try { //for async mr job, ToolRunner just return 0; // use this method instead of ToolRunner.run() because ToolRunner.run() is not thread-sale // Refer to: http://stackoverflow.com/questions/22462665/is-hadoops-toorunner-thread-safe MRUtil.runMRJob(hadoopJob, args); if (hadoopJob.isSkipped()) { return new ExecuteResult(ExecuteResult.State.SUCCEED, "skipped"); } } catch (Exception ex) { StringBuilder log = new StringBuilder(); logger.error("error execute " + this.toString(), ex); StringWriter stringWriter = new StringWriter(); ex.printStackTrace(new PrintWriter(stringWriter)); log.append(stringWriter.toString()).append("\n"); log.append("result code:").append(2); return new ExecuteResult(ExecuteResult.State.ERROR, log.toString()); } job = hadoopJob.getJob(); } final StringBuilder output = new StringBuilder(); final HadoopCmdOutput hadoopCmdOutput = new HadoopCmdOutput(job, output); // final String restStatusCheckUrl = getRestStatusCheckUrl(job, context.getConfig()); // if (restStatusCheckUrl == null) { // logger.error("restStatusCheckUrl is null"); // return new ExecuteResult(ExecuteResult.State.ERROR, "restStatusCheckUrl is null"); // } // String mrJobId = hadoopCmdOutput.getMrJobId(); // boolean useKerberosAuth = context.getConfig().isGetJobStatusWithKerberos(); // HadoopStatusChecker statusChecker = new HadoopStatusChecker(restStatusCheckUrl, mrJobId, output, useKerberosAuth); JobStepStatusEnum status = JobStepStatusEnum.NEW; while (!isDiscarded() && !isPaused()) { JobStepStatusEnum newStatus = HadoopJobStatusChecker.checkStatus(job, output); if (status == JobStepStatusEnum.KILLED) { mgr.updateJobOutput(getId(), ExecutableState.ERROR, hadoopCmdOutput.getInfo(), "killed by admin"); return new ExecuteResult(ExecuteResult.State.FAILED, "killed by admin"); } if (status == JobStepStatusEnum.WAITING && (newStatus == JobStepStatusEnum.FINISHED || newStatus == JobStepStatusEnum.ERROR || newStatus == JobStepStatusEnum.RUNNING)) { final long waitTime = System.currentTimeMillis() - getStartTime(); setMapReduceWaitTime(waitTime); } mgr.addJobInfo(getId(), hadoopCmdOutput.getInfo()); status = newStatus; if (status.isComplete()) { final Map<String, String> info = hadoopCmdOutput.getInfo(); readCounters(hadoopCmdOutput, info); mgr.addJobInfo(getId(), info); if (status == JobStepStatusEnum.FINISHED) { return new ExecuteResult(ExecuteResult.State.SUCCEED, output.toString()); } else { return new ExecuteResult(ExecuteResult.State.FAILED, output.toString()); } } Thread.sleep(context.getConfig().getYarnStatusCheckIntervalSeconds() * 1000L); } // try to kill running map-reduce job to release resources. if (job != null) { try { job.killJob(); } catch (Exception e) { logger.warn("failed to kill hadoop job: " + job.getJobID(), e); } } if (isDiscarded()) { return new ExecuteResult(ExecuteResult.State.DISCARDED, output.toString()); } else { return new ExecuteResult(ExecuteResult.State.STOPPED, output.toString()); } } catch (ReflectiveOperationException e) { logger.error("error getMapReduceJobClass, class name:" + getParam(KEY_MR_JOB), e); return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage()); } catch (Exception e) { logger.error("error execute " + this.toString(), e); return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage()); } }
From source file:org.apache.nutch.mapreduce.NutchUtil.java
License:Apache License
public static Map<String, Object> getJobState(Job job, String... groups) { Map<String, Object> jobState = Maps.newHashMap(); if (job == null) { return jobState; }// w w w .ja v a2s. c o m try { if (job.getStatus() == null || job.isRetired()) { return jobState; } } catch (IOException | InterruptedException e) { return jobState; } jobState.put("jobName", job.getJobName()); jobState.put("jobID", job.getJobID()); jobState.put(Nutch.STAT_COUNTERS, getJobCounters(job, groups)); return jobState; }
From source file:org.apache.nutch.util.ToolUtil.java
License:Apache License
@SuppressWarnings("unchecked") public static final void recordJobStatus(String label, Job job, Map<String, Object> results) { Map<String, Object> jobs = (Map<String, Object>) results.get(Nutch.STAT_JOBS); if (jobs == null) { jobs = new LinkedHashMap<String, Object>(); results.put(Nutch.STAT_JOBS, jobs); }//from w w w . ja v a 2 s .co m Map<String, Object> stats = new HashMap<String, Object>(); Map<String, Object> countStats = new HashMap<String, Object>(); try { Counters counters = job.getCounters(); for (CounterGroup cg : counters) { Map<String, Object> cnts = new HashMap<String, Object>(); countStats.put(cg.getDisplayName(), cnts); for (Counter c : cg) { cnts.put(c.getName(), c.getValue()); } } } catch (Exception e) { countStats.put("error", e.toString()); } stats.put(Nutch.STAT_COUNTERS, countStats); stats.put("jobName", job.getJobName()); stats.put("jobID", job.getJobID()); if (label == null) { label = job.getJobName(); if (job.getJobID() != null) { label = label + "-" + job.getJobID(); } } jobs.put(label, stats); }
From source file:org.apache.pig.piggybank.test.storage.TestPathPartitionHelper.java
License:Apache License
@Test public void testListStatusPartitionFilterNotFound() throws Exception { PathPartitionHelper partitionHelper = new PathPartitionHelper(); Job job = new Job(conf); job.setJobName("TestJob"); job.setInputFormatClass(FileInputFormat.class); Configuration conf = job.getConfiguration(); FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath())); Iterator<Map.Entry<String, String>> iter = conf.iterator(); while (iter.hasNext()) { Map.Entry<String, String> entry = iter.next(); System.out.println(entry.getKey() + ": " + entry.getValue()); }//w ww . j a va2 s .co m JobContext jobContext = HadoopShims.createJobContext(conf, job.getJobID()); partitionHelper.setPartitionFilterExpression("year < '2010'", PigStorage.class, "1"); partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf, PigStorage.class, "1"); List<FileStatus> files = partitionHelper.listStatus(jobContext, PigStorage.class, "1"); assertEquals(0, files.size()); }
From source file:org.apache.pig.piggybank.test.storage.TestPathPartitionHelper.java
License:Apache License
@Test public void testListStatusPartitionFilterFound() throws Exception { PathPartitionHelper partitionHelper = new PathPartitionHelper(); Job job = new Job(conf); job.setJobName("TestJob"); job.setInputFormatClass(FileInputFormat.class); Configuration conf = job.getConfiguration(); FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath())); JobContext jobContext = HadoopShims.createJobContext(conf, job.getJobID()); partitionHelper.setPartitionFilterExpression("year<='2010' and month=='01' and day>='01'", PigStorage.class, "2"); partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf, PigStorage.class, "2"); List<FileStatus> files = partitionHelper.listStatus(jobContext, PigStorage.class, "2"); assertNotNull(files);/* w ww . ja v a 2s .com*/ assertEquals(1, files.size()); }
From source file:org.apache.pig.piggybank.test.storage.TestPathPartitionHelper.java
License:Apache License
@Test public void testListStatus() throws Exception { PathPartitionHelper partitionHelper = new PathPartitionHelper(); Job job = new Job(conf); job.setJobName("TestJob"); job.setInputFormatClass(FileInputFormat.class); Configuration conf = job.getConfiguration(); FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath())); JobContext jobContext = HadoopShims.createJobContext(conf, job.getJobID()); partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf, PigStorage.class, "3"); List<FileStatus> files = partitionHelper.listStatus(jobContext, PigStorage.class, "3"); assertNotNull(files);//from w ww. j ava 2 s. co m assertEquals(1, files.size()); }