List of usage examples for org.apache.hadoop.mapreduce JobContext getJobID
public JobID getJobID();
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testAtomicCommitMissingFinal() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String workPath = "/tmp1/" + String.valueOf(rand.nextLong()); String finalPath = "/tmp1/" + String.valueOf(rand.nextLong()); FileSystem fs = null;//from w w w . j a v a 2 s.c om try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); fs.mkdirs(new Path(workPath)); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); //XXX set label to false explicitly, conf is not mixed up conf.setBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false); Assert.assertTrue(fs.exists(new Path(workPath))); Assert.assertFalse(fs.exists(new Path(finalPath))); committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); //Test for idempotent commit committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Atomic commit failure"); } finally { TestDistCpUtils.delete(fs, workPath); TestDistCpUtils.delete(fs, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testAtomicCommitExistingFinal() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class, Mockito.RETURNS_DEEP_STUBS); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String workPath = "/tmp1/" + String.valueOf(rand.nextLong()); String finalPath = "/tmp1/" + String.valueOf(rand.nextLong()); FileSystem fs = null;//from w w w. j a v a 2s . co m try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); fs.mkdirs(new Path(workPath)); fs.mkdirs(new Path(finalPath)); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); //XXX set label to false explicitly, conf is not mixed up conf.setBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false); Assert.assertTrue(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); //Test for idempotent commit committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Atomic commit failure"); } finally { TestDistCpUtils.delete(fs, workPath); TestDistCpUtils.delete(fs, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyOutputFormat.java
License:Apache License
@Test public void testCheckOutputSpecs() { try {//from ww w .j av a2 s .co m OutputFormat outputFormat = new CopyOutputFormat(); Configuration conf = new Configuration(); Job job = new Job(conf); JobID jobID = new JobID("200707121733", 1); try { JobContext context = Mockito.mock(JobContext.class); Mockito.when(context.getConfiguration()).thenReturn(job.getConfiguration()); Mockito.when(context.getJobID()).thenReturn(jobID); outputFormat.checkOutputSpecs(context); Assert.fail("No checking for invalid work/commit path"); } catch (IllegalStateException ignore) { } CopyOutputFormat.setWorkingDirectory(job, new Path("/tmp/work")); try { JobContext context = Mockito.mock(JobContext.class); Mockito.when(context.getConfiguration()).thenReturn(job.getConfiguration()); Mockito.when(context.getJobID()).thenReturn(jobID); outputFormat.checkOutputSpecs(context); Assert.fail("No checking for invalid commit path"); } catch (IllegalStateException ignore) { } job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, ""); CopyOutputFormat.setCommitDirectory(job, new Path("/tmp/commit")); try { JobContext context = Mockito.mock(JobContext.class); Mockito.when(context.getConfiguration()).thenReturn(job.getConfiguration()); Mockito.when(context.getJobID()).thenReturn(jobID); outputFormat.checkOutputSpecs(context); Assert.fail("No checking for invalid work path"); } catch (IllegalStateException ignore) { } CopyOutputFormat.setWorkingDirectory(job, new Path("/tmp/work")); CopyOutputFormat.setCommitDirectory(job, new Path("/tmp/commit")); try { JobContext context = Mockito.mock(JobContext.class); Mockito.when(context.getConfiguration()).thenReturn(job.getConfiguration()); Mockito.when(context.getJobID()).thenReturn(jobID); outputFormat.checkOutputSpecs(context); } catch (IllegalStateException ignore) { ignore.printStackTrace(); Assert.fail("Output spec check failed."); } } catch (IOException e) { LOG.error("Exception encountered while testing checkoutput specs", e); Assert.fail("Checkoutput Spec failure"); } catch (InterruptedException e) { LOG.error("Exception encountered while testing checkoutput specs", e); Assert.fail("Checkoutput Spec failure"); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java
License:Apache License
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(configuration); Mockito.when(jobContext.getJobID()).thenReturn(new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); //Removing the legacy check - Refer HADOOP-9230 int sizePerMap = totalFileSize / nMaps; checkSplits(listFile, splits);/*from w ww . j a va2 s .co m*/ int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i = 0; i < splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0); final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class); Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration); Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId); RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus[] = fs.listStatus(sourcePath); Assert.assertEquals(fileStatus.length, 1); currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
public S3MultipartOutputCommitter(Path outputPath, JobContext context) throws IOException { super(outputPath, (TaskAttemptContext) context); this.constructorOutputPath = outputPath; Configuration conf = context.getConfiguration(); this.uploadPartSize = conf.getLong(S3Committer.UPLOAD_SIZE, S3Committer.DEFAULT_UPLOAD_SIZE); // Spark will use a fake app id based on the current minute and job id 0. // To avoid collisions, use the YARN application ID for Spark. this.uuid = conf.get(S3Committer.UPLOAD_UUID, conf.get(S3Committer.SPARK_WRITE_UUID, conf.get(S3Committer.SPARK_APP_ID, context.getJobID().toString()))); if (context instanceof TaskAttemptContext) { this.workPath = taskAttemptPath((TaskAttemptContext) context, uuid); } else {/* www. ja va 2 s . c om*/ this.workPath = null; } this.wrappedCommitter = new FileOutputCommitter(Paths.getMultipartUploadCommitsDirectory(conf, uuid), context); }
From source file:edu.uci.ics.pregelix.dataflow.HDFSFileWriteOperatorDescriptor.java
License:Apache License
@SuppressWarnings("rawtypes") @Override//from w w w . j av a 2s . c om public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryInputSinkOperatorNodePushable() { private RecordDescriptor rd0; private FrameDeserializer frameDeserializer; private Configuration conf; private VertexWriter vertexWriter; private TaskAttemptContext context; private String TEMP_DIR = "_temporary"; private ClassLoader ctxCL; private ContextFactory ctxFactory = new ContextFactory(); @Override public void open() throws HyracksDataException { rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0) : inputRdFactory.createRecordDescriptor(); frameDeserializer = new FrameDeserializer(ctx.getFrameSize(), rd0); ctxCL = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf = confFactory.createConfiguration(); VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf); context = ctxFactory.createContext(conf, partition); try { vertexWriter = outputFormat.createVertexWriter(context); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @SuppressWarnings("unchecked") @Override public void nextFrame(ByteBuffer frame) throws HyracksDataException { frameDeserializer.reset(frame); try { while (!frameDeserializer.done()) { Object[] tuple = frameDeserializer.deserializeRecord(); Vertex value = (Vertex) tuple[1]; vertexWriter.writeVertex(value); } } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void fail() throws HyracksDataException { Thread.currentThread().setContextClassLoader(ctxCL); } @Override public void close() throws HyracksDataException { try { vertexWriter.close(context); moveFilesToFinalPath(); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } private void moveFilesToFinalPath() throws HyracksDataException { try { JobContext job = ctxFactory.createJobContext(conf); Path outputPath = FileOutputFormat.getOutputPath(job); FileSystem dfs = FileSystem.get(conf); Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString()); FileStatus[] results = findPartitionPaths(outputPath, dfs); if (results.length >= 1) { /** * for Hadoop-0.20.2 */ renameFile(dfs, filePath, results); } else { /** * for Hadoop-0.23.1 */ int jobId = job.getJobID().getId(); outputPath = new Path( outputPath.toString() + File.separator + TEMP_DIR + File.separator + jobId); results = findPartitionPaths(outputPath, dfs); renameFile(dfs, filePath, results); } } catch (IOException e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException, IOException { FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().endsWith(TEMP_DIR); } }); Path tempDir = tempPaths[0].getPath(); FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0; } }); return results; } private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException, HyracksDataException, FileNotFoundException { Path srcDir = results[0].getPath(); if (!dfs.exists(srcDir)) throw new HyracksDataException("file " + srcDir.toString() + " does not exist!"); FileStatus[] srcFiles = dfs.listStatus(srcDir); Path srcFile = srcFiles[0].getPath(); dfs.delete(filePath, true); dfs.rename(srcFile, filePath); } }; }
From source file:edu.uci.ics.pregelix.dataflow.VertexFileWriteOperatorDescriptor.java
License:Apache License
@SuppressWarnings("rawtypes") @Override//from ww w . ja v a2s . c om public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryInputSinkOperatorNodePushable() { private RecordDescriptor rd0; private FrameDeserializer frameDeserializer; private Configuration conf; private VertexWriter vertexWriter; private TaskAttemptContext context; private String TEMP_DIR = "_temporary"; private ClassLoader ctxCL; private ContextFactory ctxFactory = new ContextFactory(); @Override public void open() throws HyracksDataException { rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0) : inputRdFactory.createRecordDescriptor(ctx); frameDeserializer = new FrameDeserializer(rd0); ctxCL = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf = confFactory.createConfiguration(ctx); VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf); context = ctxFactory.createContext(conf, partition); context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader()); try { if (preHookFactory != null) { preHookFactory.createRuntimeHook().configure(ctx); } vertexWriter = outputFormat.createVertexWriter(context); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @SuppressWarnings("unchecked") @Override public void nextFrame(ByteBuffer frame) throws HyracksDataException { frameDeserializer.reset(frame); try { while (!frameDeserializer.done()) { Object[] tuple = frameDeserializer.deserializeRecord(); Vertex value = (Vertex) tuple[1]; vertexWriter.writeVertex(value); } } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } @Override public void fail() throws HyracksDataException { Thread.currentThread().setContextClassLoader(ctxCL); } @Override public void close() throws HyracksDataException { try { vertexWriter.close(context); moveFilesToFinalPath(); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } private void moveFilesToFinalPath() throws HyracksDataException { try { JobContext job = ctxFactory.createJobContext(conf); Path outputPath = FileOutputFormat.getOutputPath(job); FileSystem dfs = FileSystem.get(conf); Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString()); FileStatus[] results = findPartitionPaths(outputPath, dfs); if (results.length >= 1) { /** * for Hadoop-0.20.2 */ renameFile(dfs, filePath, results); } else { /** * for Hadoop-0.23.1 */ int jobId = job.getJobID().getId(); outputPath = new Path( outputPath.toString() + File.separator + TEMP_DIR + File.separator + jobId); results = findPartitionPaths(outputPath, dfs); renameFile(dfs, filePath, results); } } catch (IOException e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException, IOException { FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().endsWith(TEMP_DIR) && dir.getName().indexOf(".crc") < 0; } }); Path tempDir = tempPaths[0].getPath(); FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() { @Override public boolean accept(Path dir) { return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0 && dir.getName().indexOf(".crc") < 0; } }); return results; } private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException, HyracksDataException, FileNotFoundException { Path srcDir = results[0].getPath(); if (!dfs.exists(srcDir)) { throw new HyracksDataException("file " + srcDir.toString() + " does not exist!"); } FileStatus[] srcFiles = dfs.listStatus(srcDir); Path srcFile = srcFiles[0].getPath(); dfs.delete(filePath, true); dfs.rename(srcFile, filePath); } }; }
From source file:gobblin.runtime.mapreduce.GobblinOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { LOG.info("Aborting Job: " + jobContext.getJobID() + " with state: " + state); Configuration conf = jobContext.getConfiguration(); URI fsUri = URI.create(conf.get(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)); FileSystem fs = FileSystem.get(fsUri, conf); Path mrJobDir = new Path(conf.get(ConfigurationKeys.MR_JOB_ROOT_DIR_KEY), conf.get(ConfigurationKeys.JOB_NAME_KEY)); Path jobInputDir = new Path(mrJobDir, MRJobLauncher.INPUT_DIR_NAME); if (!fs.exists(jobInputDir) || !fs.isDirectory(jobInputDir)) { LOG.warn(String.format("%s either does not exist or is not a directory. No data to cleanup.", jobInputDir));/* w w w . j ava 2 s. c om*/ return; } // Iterate through all files in the jobInputDir, each file should correspond to a serialized wu or mwu try { for (FileStatus status : fs.listStatus(jobInputDir, new WorkUnitFilter())) { Closer workUnitFileCloser = Closer.create(); // If the file ends with ".wu" de-serialize it into a WorkUnit if (status.getPath().getName().endsWith(AbstractJobLauncher.WORK_UNIT_FILE_EXTENSION)) { WorkUnit wu = WorkUnit.createEmpty(); try { wu.readFields(workUnitFileCloser.register(new DataInputStream(fs.open(status.getPath())))); } finally { workUnitFileCloser.close(); } JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG); } // If the file ends with ".mwu" de-serialize it into a MultiWorkUnit if (status.getPath().getName().endsWith(AbstractJobLauncher.MULTI_WORK_UNIT_FILE_EXTENSION)) { MultiWorkUnit mwu = MultiWorkUnit.createEmpty(); try { mwu.readFields(workUnitFileCloser.register(new DataInputStream(fs.open(status.getPath())))); } finally { workUnitFileCloser.close(); } for (WorkUnit wu : mwu.getWorkUnits()) { JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(wu), LOG); } } } } finally { try { cleanUpWorkingDirectory(mrJobDir, fs); } finally { super.abortJob(jobContext, state); } } }
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext jobContext) throws IOException { // look through all the shards for attempts that need to be cleaned up. // also find all the attempts that are finished // then rename all the attempts jobs to commits LOG.info("Commiting Job [{0}]", jobContext.getJobID()); Configuration configuration = jobContext.getConfiguration(); Path tableOutput = BlurOutputFormat.getOutputPath(configuration); LOG.info("TableOutput path [{0}]", tableOutput); makeSureNoEmptyShards(configuration, tableOutput); FileSystem fileSystem = tableOutput.getFileSystem(configuration); for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) { LOG.info("Checking file status [{0}] with path [{1}]", fileStatus, fileStatus.getPath()); if (isShard(fileStatus)) { commitOrAbortJob(jobContext, fileStatus.getPath(), true); }//from w w w .j av a2s .c o m } LOG.info("Commiting Complete [{0}]", jobContext.getJobID()); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputCommitter.java
License:Apache License
private void commitOrAbortJob(JobContext jobContext, Path shardPath, boolean commit) throws IOException { LOG.info("CommitOrAbort [{0}] path [{1}]", commit, shardPath); FileSystem fileSystem = shardPath.getFileSystem(jobContext.getConfiguration()); FileStatus[] listStatus = fileSystem.listStatus(shardPath, new PathFilter() { @Override//from w w w . j a v a 2 s .c o m public boolean accept(Path path) { LOG.info("Checking path [{0}]", path); if (path.getName().endsWith(".task_complete")) { return true; } return false; } }); for (FileStatus fileStatus : listStatus) { Path path = fileStatus.getPath(); LOG.info("Trying to commitOrAbort [{0}]", path); String name = path.getName(); boolean taskComplete = name.endsWith(".task_complete"); if (fileStatus.isDir()) { String taskAttemptName = getTaskAttemptName(name); if (taskAttemptName == null) { LOG.info("Dir name [{0}] not task attempt", name); continue; } TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptName); if (taskAttemptID.getJobID().equals(jobContext.getJobID())) { if (commit) { if (taskComplete) { fileSystem.rename(path, new Path(shardPath, taskAttemptName + ".commit")); LOG.info("Committing [{0}] in path [{1}]", taskAttemptID, path); } else { fileSystem.delete(path, true); LOG.info("Deleting tmp dir [{0}] in path [{1}]", taskAttemptID, path); } } else { fileSystem.delete(path, true); LOG.info("Deleting aborted job dir [{0}] in path [{1}]", taskAttemptID, path); } } else { LOG.warn("TaskAttempt JobID [{0}] does not match JobContext JobId [{1}]", taskAttemptID.getJobID(), jobContext.getJobID()); } } } }