List of usage examples for org.apache.hadoop.mapreduce OutputCommitter commitJob
public void commitJob(JobContext jobContext) throws IOException
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java
License:Apache License
private void runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException { assert job.getJobID() != null; TaskID taskId = newMapTaskId(job.getJobID(), 0); Configuration conf = job.getConfiguration(); OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf); OutputCommitter committer = output .getOutputCommitter(newTaskAttemptContext(conf, newTaskAttemptId(taskId, 0))); boolean succeed = false; committer.setupJob(job);// w w w.j ava 2 s. com try { if (job.getNumReduceTasks() == 0) { runMap(job, null); } else { try (KeyValueSorter<?, ?> sorter = createSorter(job, job.getMapOutputKeyClass(), job.getMapOutputValueClass())) { runMap(job, sorter); runReduce(job, sorter); } } committer.commitJob(job); succeed = true; } finally { if (succeed == false) { try { committer.abortJob(job, State.FAILED); } catch (IOException e) { LOG.error(MessageFormat.format("error occurred while aborting job: {0} ({1})", job.getJobID(), job.getJobName()), e); } } } }
From source file:com.asakusafw.testdriver.file.FileOutputFormatDriver.java
License:Apache License
@Override public void close() throws IOException { LOG.debug("Committing output results: {}", format.getClass().getName()); try {/*from w w w . ja va 2 s . c o m*/ writer.close(context); OutputCommitter comitter = format.getOutputCommitter(context); comitter.commitTask(context); comitter.commitJob(context); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
public void testNoCommitAction() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); final String[] statusString = new String[1]; try {/* w ww. j av a 2 s . co m*/ Mockito.doAnswer(new Answer() { @Override public Object answer(InvocationOnMock invocationOnMock) throws Throwable { statusString[0] = (String) invocationOnMock.getArguments()[0]; return null; //To change body of implemented methods use File | Settings | File Templates. } }).when(taskAttemptContext).setStatus(Mockito.anyString()); } catch (Throwable e) { } try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); committer.commitJob(jobContext); Assert.assertEquals(statusString[0], "Commit Successful"); //Test for idempotent commit committer.commitJob(jobContext); Assert.assertEquals(statusString[0], "Commit Successful"); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Commit failed"); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
public void testValidationPass() { config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 100); Counters counters = new Counters(); CounterGroup grp = counters.getGroup(CopyMapper.Counter.class.getName()); grp.findCounter(CopyMapper.Counter.BYTES_COPIED.name()).increment(50); grp.findCounter(CopyMapper.Counter.BYTES_FAILED.name()).increment(20); grp.findCounter(CopyMapper.Counter.BYTES_SKIPPED.name()).increment(30); counterProvider.setCounters(counters); try {//from w w w . java2 s .c o m TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); final String[] statusString = new String[1]; try { Mockito.doAnswer(new Answer() { @Override public Object answer(InvocationOnMock invocationOnMock) throws Throwable { LOG.info("XXXX crap I am called now " + invocationOnMock.getArguments()[0]); statusString[0] = (String) invocationOnMock.getArguments()[0]; return null; //To change body of implemented methods use File | Settings | File Templates. } }).when(taskAttemptContext).setStatus(Mockito.anyString()); } catch (Throwable e) { } try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); committer.commitJob(jobContext); Assert.assertEquals(statusString[0], "Commit Successful"); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Commit failed"); } } finally { config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 0); counterProvider.setCounters(EMPTY_COUNTERS); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testPreserveStatus() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String sourceBase;/*from ww w . ja v a 2s.c o m*/ String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); FsPermission sourcePerm = new FsPermission((short) 511); FsPermission initialPerm = new FsPermission((short) 448); sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm); targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.preserve(FileAttribute.PERMISSION); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } //Test for idempotent commit committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Preserve status failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testDeleteMissing() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String sourceBase;/* www. jav a 2 s. c o m*/ String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault()); fs.rename(new Path(targetBaseAdd), new Path(targetBase)); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.setSyncFolder(true); options.setDeleteMissing(true); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } //Test for idempotent commit committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) { Assert.fail("Source and target folders are not in sync"); } } catch (Throwable e) { LOG.error("Exception encountered while testing for delete missing", e); Assert.fail("Delete missing failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testDeleteMissingFlatInterleavedFiles() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String sourceBase;/* w w w.j a va 2 s . c o m*/ String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); sourceBase = "/tmp1/" + String.valueOf(rand.nextLong()); targetBase = "/tmp1/" + String.valueOf(rand.nextLong()); TestDistCpUtils.createFile(fs, sourceBase + "/1"); TestDistCpUtils.createFile(fs, sourceBase + "/3"); TestDistCpUtils.createFile(fs, sourceBase + "/4"); TestDistCpUtils.createFile(fs, sourceBase + "/5"); TestDistCpUtils.createFile(fs, sourceBase + "/7"); TestDistCpUtils.createFile(fs, sourceBase + "/8"); TestDistCpUtils.createFile(fs, sourceBase + "/9"); TestDistCpUtils.createFile(fs, targetBase + "/2"); TestDistCpUtils.createFile(fs, targetBase + "/4"); TestDistCpUtils.createFile(fs, targetBase + "/5"); TestDistCpUtils.createFile(fs, targetBase + "/7"); TestDistCpUtils.createFile(fs, targetBase + "/9"); TestDistCpUtils.createFile(fs, targetBase + "/A"); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.setSyncFolder(true); options.setDeleteMissing(true); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase); committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4); //Test for idempotent commit committer.commitJob(jobContext); if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) { Assert.fail("Source and target folders are not in sync"); } Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4); } catch (IOException e) { LOG.error("Exception encountered while testing for delete missing", e); Assert.fail("Delete missing failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testAtomicCommitMissingFinal() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String workPath = "/tmp1/" + String.valueOf(rand.nextLong()); String finalPath = "/tmp1/" + String.valueOf(rand.nextLong()); FileSystem fs = null;/*w w w. ja va2 s . c o m*/ try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); fs.mkdirs(new Path(workPath)); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); //XXX set label to false explicitly, conf is not mixed up conf.setBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false); Assert.assertTrue(fs.exists(new Path(workPath))); Assert.assertFalse(fs.exists(new Path(finalPath))); committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); //Test for idempotent commit committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Atomic commit failure"); } finally { TestDistCpUtils.delete(fs, workPath); TestDistCpUtils.delete(fs, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testAtomicCommitExistingFinal() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class, Mockito.RETURNS_DEEP_STUBS); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String workPath = "/tmp1/" + String.valueOf(rand.nextLong()); String finalPath = "/tmp1/" + String.valueOf(rand.nextLong()); FileSystem fs = null;// www . j ava 2 s . c o m try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); fs.mkdirs(new Path(workPath)); fs.mkdirs(new Path(finalPath)); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath); conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true); //XXX set label to false explicitly, conf is not mixed up conf.setBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false); Assert.assertTrue(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); //Test for idempotent commit committer.commitJob(jobContext); Assert.assertFalse(fs.exists(new Path(workPath))); Assert.assertTrue(fs.exists(new Path(finalPath))); } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Atomic commit failure"); } finally { TestDistCpUtils.delete(fs, workPath); TestDistCpUtils.delete(fs, finalPath); conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false); } }
From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java
License:Apache License
/** * Runs the map-reduce job on ScaleOut hServer. * * @param job the job to run/*from w w w .j av a2s . co m*/ * @param grid invocation grid to run the job */ @SuppressWarnings("unchecked") void runJob(HServerJob job, InvocationGrid grid) throws IOException, InterruptedException, ClassNotFoundException { //Initialize user credential in advance long time = System.currentTimeMillis(); CreateUserCredentials.run(grid); String hadoopVersion = VersionInfo.getVersion(); try { //Check output specs before running the job OutputFormat outputFormat = ReflectionUtils.newInstance(job.getOutputFormatClass(), job.getConfiguration()); outputFormat.checkOutputSpecs(job); org.apache.hadoop.mapreduce.OutputCommitter outputCommitter = createOutputCommitter(true, job.getJobID(), job.getConfiguration()); //clear all temporary objects DataAccessor.clearObjects(job.getAppId()); //Calculating the partition layout com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping .getCurrent(); List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts()); //Generating mapping of Hadoop partitions to SOSS Regions, so they are equally distributed across hosts int numHosts = hostAddresses.size(); int numberOfSlotsPerNode = Math .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1); //Set the number of splits to the number of cores if (GridInputFormat.class.isAssignableFrom(job.getInputFormatClass())) { int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job.getConfiguration()) * numHosts * numberOfSlotsPerNode; GridInputFormat.setSuggestedNumberOfSplits(job, Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS)); } //Generating split to hostname map InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); List<InputSplit> splitList = inputFormat.getSplits(job); Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null); //Choose the optimal number of reducers for GridOutputFormat if (GridOutputFormat.class.isAssignableFrom(job.getOutputFormatClass())) { job.setNumReduceTasks(numHosts * numberOfSlotsPerNode); job.setSortEnabled(false); } int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(job.getNumReduceTasks()); //Generating invocation parameters Class<? extends InputSplit> splitType = splitList.size() > 0 ? splitList.get(0).getClass() : null; HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job.getConfiguration(), job.getJobID(), false); HServerInvocationParameters parameters = new HServerInvocationParameters(hadoopParameters, job.getAppId(), partitionMapping, hostNameToPartition, numberOfSlotsPerNode, splitType, splitList, splitToHostAddress, false, job.getSortEnabled(), hadoopVersion, job.getJobParameter(), SerializationMode.DEFAULT); StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("Splits created:\n"); for (InetAddress address : splitToHostAddress.keySet()) { stringBuilder.append("Host "); stringBuilder.append(address); stringBuilder.append(" has "); stringBuilder.append(splitToHostAddress.get(address).size()); stringBuilder.append(" splits.\n"); } System.out.println(stringBuilder.toString()); System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms."); time = System.currentTimeMillis(); InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid, RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds()); if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) { throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0)); } System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms."); time = System.currentTimeMillis(); MapperResult resultObject = mapInvokeResult.getResult(); if (resultObject == null || mapInvokeResult.getNumFailed() != 0) { throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed()); } if (resultObject.getNumberOfSplitsProcessed() != splitList.size()) { throw new IOException("Number of splits does not match the number of invocations. Nsplits = " + splitList.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed()); } if (partitionMapping.length > 0) { //Running the reduce step InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class, job.getAppId(), TimeSpan.INFINITE_TIMEOUT.getSeconds()); System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms."); DataAccessor.clearObjects(job.getAppId()); //clear all temporary objects if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) { throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0)); } if (reduceInvokeResult.getNumFailed() != 0) { throw new IOException("Reduce invocation failed."); } if (reduceInvokeResult.getResult() != partitionMapping.length) { throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length + " Actual = " + reduceInvokeResult.getResult()); } } outputCommitter.commitJob(job); } catch (StateServerException e) { throw new IOException("ScaleOut hServer access error.", e); } }