Example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID.

Prototype

public TaskAttemptID(TaskID taskId, int id)

Source Link

Document

Constructs a TaskAttemptID object from given TaskID .

Usage

From source file:com.asakusafw.runtime.compatibility.hadoop1.JobCompatibilityHadoop1.java

License:Apache License

@Override
public TaskAttemptID newTaskAttemptId(TaskID taskId, int id) {
    if (taskId == null) {
        throw new IllegalArgumentException("taskId must not be null"); //$NON-NLS-1$
    }//w  ww.  j a  v a2  s  .  com
    return new TaskAttemptID(taskId, id);
}

From source file:com.asakusafw.testdriver.file.FileDeployer.java

License:Apache License

/**
 * Opens output for the specified {@link OutputFormat}.
 * @param <V> value type//w w  w .  j av a 2s  . c  o  m
 * @param definition target model definition
 * @param destination output location
 * @param output format
 * @return the opened {@link ModelOutput}
 * @throws IOException if failed to open the target output
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination,
        FileOutputFormat<? super NullWritable, ? super V> output) throws IOException {
    assert destination != null;
    assert output != null;
    LOG.debug("Opening {} using {}", destination, output.getClass().getName());
    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(definition.getModelClass());
    final File temporaryDir = File.createTempFile("asakusa", ".tempdir");
    if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) {
        throw new IOException("Failed to create temporary directory");
    }
    LOG.debug("Using staging deploy target: {}", temporaryDir);
    URI uri = temporaryDir.toURI();
    FileOutputFormat.setOutputPath(job, new Path(uri));
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) {
        @Override
        public void close() throws IOException {
            super.close();
            deploy(destination, temporaryDir);
        }
    };
    return result;
}

From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java

License:Apache License

@Override
public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description,
        TestContext context) throws IOException {
    LOG.info("??????: {}", description);
    VariableTable variables = createVariables(context);
    checkType(definition, description);/*  w  ww .j a v  a  2s . c  o m*/
    Configuration conf = configurations.newInstance();
    Job job = Job.getInstance(conf);
    String resolved = variables.parse(description.getPathPrefix(), false);
    FileInputFormat.setInputPaths(job, new Path(resolved));
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat());
    FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format);
    return result;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleInputOutputFormat.java

License:Apache License

public void testSplits(long maxSplitSize, int generatedRows) throws IOException, InterruptedException,
        IllegalArgumentException, SecurityException, ClassNotFoundException, InstantiationException,
        IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    logger.info("Testing maxSplitSize: " + maxSplitSize + " and generatedRows:" + generatedRows);
    FileSystem fS = FileSystem.get(getConf());
    Random r = new Random(1);
    Schema schema = new Schema("schema", Fields.parse("i:int,s:string"));
    ITuple tuple = new Tuple(schema);

    Path outPath = new Path(OUT);
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(getConf()), getConf(), outPath, schema);
    for (int i = 0; i < generatedRows; i++) {
        tuple.set("i", r.nextInt());
        tuple.set("s", r.nextLong() + "");
        writer.append(tuple);//from   www . jav a  2s  . c o m
    }
    writer.close();

    TupleInputFormat format = ReflectionUtils.newInstance(TupleInputFormat.class, getConf());
    Job job = new Job(getConf());
    FileInputFormat.setInputPaths(job, outPath);
    logger.info("Using max input split size: " + maxSplitSize);
    FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);
    job.setInputFormatClass(FileInputFormat.class);

    // Read all the splits and count. The number of read rows must
    // be the same than the written ones.
    int count = 0;
    for (InputSplit split : format.getSplits(job)) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);
        TaskAttemptContext attemptContext = TaskAttemptContextFactory.get(getConf(), attemptId);
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = format.createRecordReader(split, attemptContext);
        reader.initialize(split, attemptContext);
        while (reader.nextKeyValue()) {
            tuple = reader.getCurrentKey();
            count++;
        }
        reader.close();
    }

    assertEquals(generatedRows, count);

    HadoopUtils.deleteIfExists(fS, outPath);
}

From source file:com.google.appengine.tools.mapreduce.MapReduceServletTest.java

License:Apache License

/**
 * Test that handleController has reasonable behavior when there are still
 * active workers.// w ww.j a  v  a  2  s .c om
 *
 * @throws EntityNotFoundException
 */
public void testHandleController_withContinue() throws EntityNotFoundException {
    JobID jobId = new JobID("foo", 1);
    HttpServletRequest request = createMockControllerRequest(0, jobId);
    replay(request);

    Configuration sampleConf = getSampleMapReduceConfiguration();

    persistMRState(jobId, sampleConf);

    ShardState shardState1 = ShardState.generateInitializedShardState(ds,
            new TaskAttemptID(new TaskID(jobId, true, 1), 1));
    Counters counters1 = new Counters();
    counters1.findCounter("a", "z").increment(1);
    shardState1.setCounters(counters1);
    shardState1.setInputSplit(sampleConf, new StubInputSplit(1));
    shardState1.setRecordReader(sampleConf, new StubRecordReader());
    shardState1.persist();

    ShardState shardState2 = ShardState.generateInitializedShardState(ds,
            new TaskAttemptID(new TaskID(jobId, true, 2), 1));
    Counters counters2 = new Counters();
    counters2.findCounter("a", "z").increment(1);
    shardState2.setCounters(counters2);
    shardState2.setInputSplit(sampleConf, new StubInputSplit(2));
    shardState2.setRecordReader(sampleConf, new StubRecordReader());
    shardState2.setDone();
    shardState2.persist();

    // doPost should call handleCallback()
    // resp is never used
    servlet.doPost(request, null);

    MapReduceState mrState = MapReduceState.getMapReduceStateFromJobID(ds, jobId);

    // Check result of aggregateState()
    assertEquals(2, mrState.getCounters().findCounter("a", "z").getValue());

    // Check the result of refillQuota()
    // Should fill the active thread but not the done one.
    assertEquals(1000, new QuotaManager(MemcacheServiceFactory.getMemcacheService())
            .get("" + shardState1.getTaskAttemptID()));
    assertEquals(0, new QuotaManager(MemcacheServiceFactory.getMemcacheService())
            .get("" + shardState2.getTaskAttemptID()));

    // Check that the next controller task got enqueued.
    QueueStateInfo defaultQueue = getDefaultQueueInfo();
    assertEquals(1, defaultQueue.getCountTasks());
    TaskStateInfo firstTask = defaultQueue.getTaskInfo().get(0);
    assertEquals("/mapreduce/" + MapReduceServlet.CONTROLLER_PATH, firstTask.getUrl());
    assertTrue(firstTask.getBody(), firstTask.getBody().indexOf("jobID=job_foo_0001") != -1);

    assertEquals(1, mrState.getActiveShardCount());
    assertEquals(2, mrState.getShardCount());

    verify(request);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java

License:Apache License

private void mockTaskAttemptContext(String indexType) {
    TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2),
            2);//from   w ww .ja  v  a 2  s.com
    when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId);
    when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
}

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one.//from www.ja  v  a2s  .c om
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

private static Set<String> runTasks(JobContext job, int numTasks, int numFiles) throws IOException {
    Set<String> uploads = Sets.newHashSet();

    for (int taskId = 0; taskId < numTasks; taskId += 1) {
        TaskAttemptID attemptID = new TaskAttemptID(new TaskID(JOB_ID, TaskType.REDUCE, taskId),
                (taskId * 37) % numTasks);
        TaskAttemptContext attempt = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()),
                attemptID);/*from w  w  w. jav  a 2 s .com*/
        MockedS3Committer taskCommitter = new MockedS3Committer(S3_OUTPUT_PATH, attempt);
        commitTask(taskCommitter, attempt, numFiles);
        uploads.addAll(taskCommitter.results.getUploads());
    }

    return uploads;
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.HadoopVersionSpecificCode_1x.java

License:Apache License

@Override
public TaskAttemptID createTaskAttemptId(JobID jobID, boolean isMapper, int hadoopPartition) {
    return new TaskAttemptID(new TaskID(jobID, isMapper, hadoopPartition), 0);
}

From source file:com.splout.db.hadoop.SchemaSampler.java

License:Apache License

public static Schema sample(Configuration conf, Path input, InputFormat<ITuple, NullWritable> inputFormat)
        throws IOException, InterruptedException {
    Schema schema = null;// w w  w  .  j av a  2s  . com

    // sample schema from input path given the provided InputFormat
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    FileInputFormat.setInputPaths(job, input);
    // get first inputSplit
    List<InputSplit> inputSplits = inputFormat.getSplits(job);
    if (inputSplits == null || inputSplits.size() == 0) {
        throw new IOException(
                "Given input format doesn't produce any input split. Can't sample first record. PATH: "
                        + input);
    }
    InputSplit inputSplit = inputSplits.get(0);
    TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);
    TaskAttemptContext attemptContext;
    try {
        attemptContext = TaskAttemptContextFactory.get(conf, attemptId);
    } catch (Exception e) {
        throw new IOException(e);
    }

    RecordReader<ITuple, NullWritable> rReader = inputFormat.createRecordReader(inputSplit, attemptContext);
    rReader.initialize(inputSplit, attemptContext);

    if (!rReader.nextKeyValue()) {
        throw new IOException(
                "Can't read first record of first input split of the given path [" + input + "].");
    }

    // finally get the sample schema
    schema = rReader.getCurrentKey().getSchema();
    log.info("Sampled schema from [" + input + "] : " + schema);
    rReader.close();

    return schema;
}