List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID
public TaskAttemptID(TaskID taskId, int id)
From source file:com.asakusafw.runtime.compatibility.hadoop1.JobCompatibilityHadoop1.java
License:Apache License
@Override public TaskAttemptID newTaskAttemptId(TaskID taskId, int id) { if (taskId == null) { throw new IllegalArgumentException("taskId must not be null"); //$NON-NLS-1$ }//w ww. j a v a2 s . com return new TaskAttemptID(taskId, id); }
From source file:com.asakusafw.testdriver.file.FileDeployer.java
License:Apache License
/** * Opens output for the specified {@link OutputFormat}. * @param <V> value type//w w w . j av a 2s . c o m * @param definition target model definition * @param destination output location * @param output format * @return the opened {@link ModelOutput} * @throws IOException if failed to open the target output * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination, FileOutputFormat<? super NullWritable, ? super V> output) throws IOException { assert destination != null; assert output != null; LOG.debug("Opening {} using {}", destination, output.getClass().getName()); Job job = Job.getInstance(configuration); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(definition.getModelClass()); final File temporaryDir = File.createTempFile("asakusa", ".tempdir"); if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) { throw new IOException("Failed to create temporary directory"); } LOG.debug("Using staging deploy target: {}", temporaryDir); URI uri = temporaryDir.toURI(); FileOutputFormat.setOutputPath(job, new Path(uri)); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) { @Override public void close() throws IOException { super.close(); deploy(destination, temporaryDir); } }; return result; }
From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java
License:Apache License
@Override public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description, TestContext context) throws IOException { LOG.info("??????: {}", description); VariableTable variables = createVariables(context); checkType(definition, description);/* w ww .j a v a 2s . c o m*/ Configuration conf = configurations.newInstance(); Job job = Job.getInstance(conf); String resolved = variables.parse(description.getPathPrefix(), false); FileInputFormat.setInputPaths(job, new Path(resolved)); TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat()); FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format); return result; }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleInputOutputFormat.java
License:Apache License
public void testSplits(long maxSplitSize, int generatedRows) throws IOException, InterruptedException, IllegalArgumentException, SecurityException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException { logger.info("Testing maxSplitSize: " + maxSplitSize + " and generatedRows:" + generatedRows); FileSystem fS = FileSystem.get(getConf()); Random r = new Random(1); Schema schema = new Schema("schema", Fields.parse("i:int,s:string")); ITuple tuple = new Tuple(schema); Path outPath = new Path(OUT); TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(getConf()), getConf(), outPath, schema); for (int i = 0; i < generatedRows; i++) { tuple.set("i", r.nextInt()); tuple.set("s", r.nextLong() + ""); writer.append(tuple);//from www . jav a 2s . c o m } writer.close(); TupleInputFormat format = ReflectionUtils.newInstance(TupleInputFormat.class, getConf()); Job job = new Job(getConf()); FileInputFormat.setInputPaths(job, outPath); logger.info("Using max input split size: " + maxSplitSize); FileInputFormat.setMaxInputSplitSize(job, maxSplitSize); job.setInputFormatClass(FileInputFormat.class); // Read all the splits and count. The number of read rows must // be the same than the written ones. int count = 0; for (InputSplit split : format.getSplits(job)) { TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1); TaskAttemptContext attemptContext = TaskAttemptContextFactory.get(getConf(), attemptId); logger.info("Sampling split: " + split); RecordReader<ITuple, NullWritable> reader = format.createRecordReader(split, attemptContext); reader.initialize(split, attemptContext); while (reader.nextKeyValue()) { tuple = reader.getCurrentKey(); count++; } reader.close(); } assertEquals(generatedRows, count); HadoopUtils.deleteIfExists(fS, outPath); }
From source file:com.google.appengine.tools.mapreduce.MapReduceServletTest.java
License:Apache License
/** * Test that handleController has reasonable behavior when there are still * active workers.// w ww.j a v a 2 s .c om * * @throws EntityNotFoundException */ public void testHandleController_withContinue() throws EntityNotFoundException { JobID jobId = new JobID("foo", 1); HttpServletRequest request = createMockControllerRequest(0, jobId); replay(request); Configuration sampleConf = getSampleMapReduceConfiguration(); persistMRState(jobId, sampleConf); ShardState shardState1 = ShardState.generateInitializedShardState(ds, new TaskAttemptID(new TaskID(jobId, true, 1), 1)); Counters counters1 = new Counters(); counters1.findCounter("a", "z").increment(1); shardState1.setCounters(counters1); shardState1.setInputSplit(sampleConf, new StubInputSplit(1)); shardState1.setRecordReader(sampleConf, new StubRecordReader()); shardState1.persist(); ShardState shardState2 = ShardState.generateInitializedShardState(ds, new TaskAttemptID(new TaskID(jobId, true, 2), 1)); Counters counters2 = new Counters(); counters2.findCounter("a", "z").increment(1); shardState2.setCounters(counters2); shardState2.setInputSplit(sampleConf, new StubInputSplit(2)); shardState2.setRecordReader(sampleConf, new StubRecordReader()); shardState2.setDone(); shardState2.persist(); // doPost should call handleCallback() // resp is never used servlet.doPost(request, null); MapReduceState mrState = MapReduceState.getMapReduceStateFromJobID(ds, jobId); // Check result of aggregateState() assertEquals(2, mrState.getCounters().findCounter("a", "z").getValue()); // Check the result of refillQuota() // Should fill the active thread but not the done one. assertEquals(1000, new QuotaManager(MemcacheServiceFactory.getMemcacheService()) .get("" + shardState1.getTaskAttemptID())); assertEquals(0, new QuotaManager(MemcacheServiceFactory.getMemcacheService()) .get("" + shardState2.getTaskAttemptID())); // Check that the next controller task got enqueued. QueueStateInfo defaultQueue = getDefaultQueueInfo(); assertEquals(1, defaultQueue.getCountTasks()); TaskStateInfo firstTask = defaultQueue.getTaskInfo().get(0); assertEquals("/mapreduce/" + MapReduceServlet.CONTROLLER_PATH, firstTask.getUrl()); assertTrue(firstTask.getBody(), firstTask.getBody().indexOf("jobID=job_foo_0001") != -1); assertEquals(1, mrState.getActiveShardCount()); assertEquals(2, mrState.getShardCount()); verify(request); }
From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java
License:Apache License
private void mockTaskAttemptContext(String indexType) { TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2), 2);//from w ww .ja v a 2 s.com when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId); when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration()); }
From source file:com.marklogic.contentpump.LocalJobRunner.java
License:Apache License
/** * Run the job. Get the input splits, create map tasks and submit it to * the thread pool if there is one; otherwise, runs the the task one by * one.//from www.ja v a2s .c om * * @param <INKEY> * @param <INVALUE> * @param <OUTKEY> * @param <OUTVALUE> * @throws Exception */ @SuppressWarnings("unchecked") public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run() throws Exception { Configuration conf = job.getConfiguration(); InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils .newInstance(job.getInputFormatClass(), conf); List<InputSplit> splits = inputFormat.getSplits(job); T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]); // sort the splits into order based on size, so that the biggest // goes first Arrays.sort(array, new SplitLengthComparator()); OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(job.getOutputFormatClass(), conf); Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass(); Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(mapperClass, conf); try { outputFormat.checkOutputSpecs(job); } catch (Exception ex) { if (LOG.isDebugEnabled()) { LOG.debug("Error checking output specification: ", ex); } else { LOG.error("Error checking output specification: "); LOG.error(ex.getMessage()); } return; } conf = job.getConfiguration(); progress = new AtomicInteger[splits.size()]; for (int i = 0; i < splits.size(); i++) { progress[i] = new AtomicInteger(); } Monitor monitor = new Monitor(); monitor.start(); reporter = new ContentPumpReporter(); List<Future<Object>> taskList = new ArrayList<Future<Object>>(); for (int i = 0; i < array.length; i++) { InputSplit split = array[i]; if (pool != null) { LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>( inputFormat, outputFormat, conf, i, split, reporter, progress[i]); availableThreads = assignThreads(i, array.length); Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass(); if (availableThreads > 1 && availableThreads != threadsPerSplit) { // possible runtime adjustment if (runtimeMapperClass != (Class) MultithreadedMapper.class) { runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads); } if (runtimeMapperClass != mapperClass) { task.setMapperClass(runtimeMapperClass); } if (runtimeMapperClass == (Class) MultithreadedMapper.class) { task.setThreadCount(availableThreads); if (LOG.isDebugEnabled()) { LOG.debug("Thread Count for Split#" + i + " : " + availableThreads); } } } if (runtimeMapperClass == (Class) MultithreadedMapper.class) { synchronized (pool) { taskList.add(pool.submit(task)); pool.wait(); } } else { pool.submit(task); } } else { // single-threaded JobID jid = new JobID(); TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i); TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0); TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId); RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context); RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context); OutputCommitter committer = outputFormat.getOutputCommitter(context); TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]); Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId, trackingReader, writer, committer, reporter, split); trackingReader.initialize(split, mapperContext); // no thread pool (only 1 thread specified) Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass(); mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class); mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass, mapperContext.getConfiguration()); mapper.run(mapperContext); trackingReader.close(); writer.close(mapperContext); committer.commitTask(context); } } // wait till all tasks are done if (pool != null) { for (Future<Object> f : taskList) { f.get(); } pool.shutdown(); while (!pool.awaitTermination(1, TimeUnit.DAYS)) ; jobComplete.set(true); } monitor.interrupt(); monitor.join(1000); // report counters Iterator<CounterGroup> groupIt = reporter.counters.iterator(); while (groupIt.hasNext()) { CounterGroup group = groupIt.next(); LOG.info(group.getDisplayName() + ": "); Iterator<Counter> counterIt = group.iterator(); while (counterIt.hasNext()) { Counter counter = counterIt.next(); LOG.info(counter.getDisplayName() + ": " + counter.getValue()); } } LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec"); }
From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java
License:Apache License
private static Set<String> runTasks(JobContext job, int numTasks, int numFiles) throws IOException { Set<String> uploads = Sets.newHashSet(); for (int taskId = 0; taskId < numTasks; taskId += 1) { TaskAttemptID attemptID = new TaskAttemptID(new TaskID(JOB_ID, TaskType.REDUCE, taskId), (taskId * 37) % numTasks); TaskAttemptContext attempt = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()), attemptID);/*from w w w. jav a 2 s .com*/ MockedS3Committer taskCommitter = new MockedS3Committer(S3_OUTPUT_PATH, attempt); commitTask(taskCommitter, attempt, numFiles); uploads.addAll(taskCommitter.results.getUploads()); } return uploads; }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.HadoopVersionSpecificCode_1x.java
License:Apache License
@Override public TaskAttemptID createTaskAttemptId(JobID jobID, boolean isMapper, int hadoopPartition) { return new TaskAttemptID(new TaskID(jobID, isMapper, hadoopPartition), 0); }
From source file:com.splout.db.hadoop.SchemaSampler.java
License:Apache License
public static Schema sample(Configuration conf, Path input, InputFormat<ITuple, NullWritable> inputFormat) throws IOException, InterruptedException { Schema schema = null;// w w w . j av a 2s . com // sample schema from input path given the provided InputFormat @SuppressWarnings("deprecation") Job job = new Job(conf); FileInputFormat.setInputPaths(job, input); // get first inputSplit List<InputSplit> inputSplits = inputFormat.getSplits(job); if (inputSplits == null || inputSplits.size() == 0) { throw new IOException( "Given input format doesn't produce any input split. Can't sample first record. PATH: " + input); } InputSplit inputSplit = inputSplits.get(0); TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1); TaskAttemptContext attemptContext; try { attemptContext = TaskAttemptContextFactory.get(conf, attemptId); } catch (Exception e) { throw new IOException(e); } RecordReader<ITuple, NullWritable> rReader = inputFormat.createRecordReader(inputSplit, attemptContext); rReader.initialize(inputSplit, attemptContext); if (!rReader.nextKeyValue()) { throw new IOException( "Can't read first record of first input split of the given path [" + input + "]."); } // finally get the sample schema schema = rReader.getCurrentKey().getSchema(); log.info("Sampled schema from [" + input + "] : " + schema); rReader.close(); return schema; }