List of usage examples for org.apache.hadoop.mapreduce JobID JobID
public JobID()
From source file:com.asakusafw.testdriver.file.FileDeployer.java
License:Apache License
/** * Opens output for the specified {@link OutputFormat}. * @param <V> value type//from w w w . j a v a 2 s. co m * @param definition target model definition * @param destination output location * @param output format * @return the opened {@link ModelOutput} * @throws IOException if failed to open the target output * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination, FileOutputFormat<? super NullWritable, ? super V> output) throws IOException { assert destination != null; assert output != null; LOG.debug("Opening {} using {}", destination, output.getClass().getName()); Job job = Job.getInstance(configuration); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(definition.getModelClass()); final File temporaryDir = File.createTempFile("asakusa", ".tempdir"); if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) { throw new IOException("Failed to create temporary directory"); } LOG.debug("Using staging deploy target: {}", temporaryDir); URI uri = temporaryDir.toURI(); FileOutputFormat.setOutputPath(job, new Path(uri)); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) { @Override public void close() throws IOException { super.close(); deploy(destination, temporaryDir); } }; return result; }
From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java
License:Apache License
@Override public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description, TestContext context) throws IOException { LOG.info("??????: {}", description); VariableTable variables = createVariables(context); checkType(definition, description);//w w w . java 2 s. c om Configuration conf = configurations.newInstance(); Job job = Job.getInstance(conf); String resolved = variables.parse(description.getPathPrefix(), false); FileInputFormat.setInputPaths(job, new Path(resolved)); TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat()); FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format); return result; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java
License:Apache License
/** * Closes all the opened outputs.// w ww . jav a 2 s . c om * * This should be called from cleanup method of map/reduce task. If overridden subclasses must invoke * <code>super.close()</code> at the end of their <code>close()</code> * */ public void close() throws IOException, InterruptedException { for (OutputContext outputContext : this.outputContexts.values()) { outputContext.recordWriter.close(outputContext.taskAttemptContext); outputContext.outputCommitter.commitTask(outputContext.taskAttemptContext); // This is a trick for Hadoop 2.0 where there is extra business logic in commitJob() JobContext jContext; try { jContext = JobContextFactory.get(outputContext.taskAttemptContext.getConfiguration(), new JobID()); } catch (Exception e) { throw new IOException(e); } try { Class cl = Class.forName(OutputCommitter.class.getName()); Method method = cl.getMethod("commitJob", Class.forName(JobContext.class.getName())); if (method != null) { method.invoke(outputContext.outputCommitter, jContext); } } catch (Exception e) { // Hadoop 2.0 : do nothing // we need to call commitJob as a trick, but the trick itself may throw an IOException. // it doesn't mean that something went wrong. // If there was something really wrong it would have failed before. } outputContext.outputCommitter.cleanupJob(outputContext.jobContext); } }
From source file:com.facebook.hiveio.benchmark.InputBenchmark.java
License:Apache License
/** * Run benchmark//from w w w. jav a2s . com * * @param args parsed args * @throws Exception */ public void run(InputBenchmarkCmd args) throws Exception { HadoopNative.requireHadoopNative(); Timer allTime = Metrics.newTimer(InputBenchmark.class, "all-time", MILLISECONDS, MILLISECONDS); TimerContext allTimerContext = allTime.time(); HiveInputDescription input = new HiveInputDescription(); input.getTableDesc().setDatabaseName(args.tableOpts.database); input.getTableDesc().setTableName(args.tableOpts.table); input.setPartitionFilter(args.tableOpts.partitionFilter); input.getMetastoreDesc().setHost(args.metastoreOpts.host); input.getMetastoreDesc().setPort(args.metastoreOpts.port); HiveConf hiveConf = HiveUtils.newHiveConf(InputBenchmark.class); System.err.println("Initialize profile with input data"); HiveApiInputFormat.setProfileInputDesc(hiveConf, input, DEFAULT_PROFILE_ID); HiveApiInputFormat defaultInputFormat = new HiveApiInputFormat(); if (args.trackMetrics) { defaultInputFormat.setObserver(new MetricsObserver("default", args.recordPrintPeriod)); } List<InputSplit> splits = defaultInputFormat.getSplits(new JobContext(hiveConf, new JobID())); System.err.println("getSplits returned " + splits.size() + " splits"); long numRows = 0; for (int i = 0; i < splits.size(); ++i) { InputSplit split = splits.get(i); TaskAttemptID taskID = new TaskAttemptID(); TaskAttemptContext taskContext = new TaskAttemptContext(hiveConf, taskID); if (i % args.splitPrintPeriod == 0) { System.err.println("Handling split " + i + " of " + splits.size()); } RecordReader<WritableComparable, HiveReadableRecord> reader = defaultInputFormat .createRecordReader(split, taskContext); reader.initialize(split, taskContext); numRows += readFully(reader); } System.err.println("Parsed " + numRows + " rows"); allTimerContext.stop(); new ConsoleReporter(System.err).run(); }
From source file:com.facebook.hiveio.input.HiveInput.java
License:Apache License
/** * Read a Hive table/*from w w w . j ava 2 s .c o m*/ * * @param inputDesc Hive table description * @param conf Configuration * @return Iterable of Hive records * @throws IOException * @throws InterruptedException */ public static Iterable<HiveReadableRecord> readTable(HiveInputDescription inputDesc, final Configuration conf) throws IOException, InterruptedException { String profileID = Long.toString(System.currentTimeMillis()); HiveApiInputFormat.setProfileInputDesc(conf, inputDesc, profileID); final HiveApiInputFormat inputFormat = new HiveApiInputFormat(); inputFormat.setMyProfileId(profileID); JobContext jobContext = new JobContext(conf, new JobID()); final List<InputSplit> splits = inputFormat.getSplits(jobContext); return new Iterable<HiveReadableRecord>() { @Override public Iterator<HiveReadableRecord> iterator() { return new RecordIterator(inputFormat, conf, splits.iterator()); } }; }
From source file:com.facebook.hiveio.tailer.TailerCmd.java
License:Apache License
@Override public void execute() throws Exception { HadoopNative.requireHadoopNative();// w w w. j a va 2 s.c o m args.process(); chooseRecordPrinter(); HostPort metastoreHostPort = getMetastoreHostPort(); if (metastoreHostPort == null) { return; } LOG.info("Creating Hive client for Metastore at {}", metastoreHostPort); ThriftHiveMetastore.Iface client = HiveMetastores.create(metastoreHostPort.host, metastoreHostPort.port); HiveInputDescription inputDesc = initInput(metastoreHostPort); HiveStats hiveStats = HiveUtils.statsOf(client, inputDesc); LOG.info("{}", hiveStats); HiveConf hiveConf = HiveUtils.newHiveConf(TailerCmd.class); args.inputTable.process(hiveConf); LOG.info("Setting up input using {}", inputDesc); HiveApiInputFormat.setProfileInputDesc(hiveConf, inputDesc, DEFAULT_PROFILE_ID); HiveApiInputFormat hapi = new HiveApiInputFormat(); hapi.setMyProfileId(DEFAULT_PROFILE_ID); List<InputSplit> splits = hapi.getSplits(new JobContext(hiveConf, new JobID())); LOG.info("Have {} splits to read", splits.size()); HiveTableDesc hiveTableDesc = new HiveTableDesc(args.inputTable.database, args.inputTable.table); HiveTableSchema schema = HiveTableSchemas.lookup(client, hiveConf, hiveTableDesc); chooseRowParser(schema); Stats stats = Stats.create(hiveStats); Context context = new Context(hapi, hiveConf, schema, hiveStats, stats); long startNanos = System.nanoTime(); if (args.multiThread.isSingleThreaded()) { context.splitsQueue = Queues.newArrayDeque(splits); readSplits(context); } else { context.splitsQueue = Queues.newConcurrentLinkedQueue(splits); multiThreaded(context, args.multiThread.threads); } long timeNanos = System.nanoTime() - startNanos; if (args.appendStatsTo != null) { OutputStream out = new FileOutputStream(args.appendStatsTo, true); try { stats.printEndBenchmark(context, args, timeNanos, out); } finally { out.close(); } } System.err.println("Finished."); if (args.metricsOpts.stderrEnabled()) { args.metricsOpts.dumpMetricsToStderr(); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java
License:Apache License
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);//from w w w . ja v a 2s .com JobID jobId = new JobID(); JobContext jobContext = mock(JobContext.class); when(jobContext.getConfiguration()).thenReturn(configuration); when(jobContext.getJobID()).thenReturn(jobId); DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0); final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(configuration); when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId); RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { FileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
public void testNoCommitAction() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); final String[] statusString = new String[1]; try {/*from w ww . ja v a 2 s . c o m*/ Mockito.doAnswer(new Answer() { @Override public Object answer(InvocationOnMock invocationOnMock) throws Throwable { statusString[0] = (String) invocationOnMock.getArguments()[0]; return null; //To change body of implemented methods use File | Settings | File Templates. } }).when(taskAttemptContext).setStatus(Mockito.anyString()); } catch (Throwable e) { } try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); committer.commitJob(jobContext); Assert.assertEquals(statusString[0], "Commit Successful"); //Test for idempotent commit committer.commitJob(jobContext); Assert.assertEquals(statusString[0], "Commit Successful"); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Commit failed"); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
public void testValidationPass() { config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 100); Counters counters = new Counters(); CounterGroup grp = counters.getGroup(CopyMapper.Counter.class.getName()); grp.findCounter(CopyMapper.Counter.BYTES_COPIED.name()).increment(50); grp.findCounter(CopyMapper.Counter.BYTES_FAILED.name()).increment(20); grp.findCounter(CopyMapper.Counter.BYTES_SKIPPED.name()).increment(30); counterProvider.setCounters(counters); try {/* w ww . j av a 2 s . c o m*/ TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); final String[] statusString = new String[1]; try { Mockito.doAnswer(new Answer() { @Override public Object answer(InvocationOnMock invocationOnMock) throws Throwable { LOG.info("XXXX crap I am called now " + invocationOnMock.getArguments()[0]); statusString[0] = (String) invocationOnMock.getArguments()[0]; return null; //To change body of implemented methods use File | Settings | File Templates. } }).when(taskAttemptContext).setStatus(Mockito.anyString()); } catch (Throwable e) { } try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); committer.commitJob(jobContext); Assert.assertEquals(statusString[0], "Commit Successful"); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Commit failed"); } } finally { config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 0); counterProvider.setCounters(EMPTY_COUNTERS); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyCommitter.java
License:Apache License
@Test public void testPreserveStatus() { TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(config); JobID jobID = new JobID(); Mockito.when(jobContext.getJobID()).thenReturn(jobID); Configuration conf = jobContext.getConfiguration(); String sourceBase;/*from w w w.j a va 2s . c om*/ String targetBase; FileSystem fs = null; try { OutputCommitter committer = new CopyCommitter(null, taskAttemptContext); fs = FileSystem.get(conf); FsPermission sourcePerm = new FsPermission((short) 511); FsPermission initialPerm = new FsPermission((short) 448); sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm); targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm); DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out")); options.preserve(FileAttribute.PERMISSION); options.appendToConf(conf); CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS); Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong())); listing.buildListing(listingFile, options); conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase); committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } //Test for idempotent commit committer.commitJob(jobContext); if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) { Assert.fail("Permission don't match"); } } catch (IOException e) { LOG.error("Exception encountered while testing for preserve status", e); Assert.fail("Preserve status failure"); } finally { TestDistCpUtils.delete(fs, "/tmp1"); conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); } }