Example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID.

Prototype

public TaskAttemptID(TaskID taskId, int id) 

Source Link

Document

Constructs a TaskAttemptID object from given TaskID .

Usage

From source file:com.asakusafw.runtime.compatibility.hadoop1.JobCompatibilityHadoop1.java

License:Apache License

@Override
public TaskAttemptID newTaskAttemptId(TaskID taskId, int id) {
    if (taskId == null) {
        throw new IllegalArgumentException("taskId must not be null"); //$NON-NLS-1$
    }//w  ww.  j a  v a2  s  .  com
    return new TaskAttemptID(taskId, id);
}

From source file:com.asakusafw.testdriver.file.FileDeployer.java

License:Apache License

/**
 * Opens output for the specified {@link OutputFormat}.
 * @param <V> value type//w w  w .  j av a 2s  . c  o  m
 * @param definition target model definition
 * @param destination output location
 * @param output format
 * @return the opened {@link ModelOutput}
 * @throws IOException if failed to open the target output
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination,
        FileOutputFormat<? super NullWritable, ? super V> output) throws IOException {
    assert destination != null;
    assert output != null;
    LOG.debug("Opening {} using {}", destination, output.getClass().getName());
    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(definition.getModelClass());
    final File temporaryDir = File.createTempFile("asakusa", ".tempdir");
    if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) {
        throw new IOException("Failed to create temporary directory");
    }
    LOG.debug("Using staging deploy target: {}", temporaryDir);
    URI uri = temporaryDir.toURI();
    FileOutputFormat.setOutputPath(job, new Path(uri));
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) {
        @Override
        public void close() throws IOException {
            super.close();
            deploy(destination, temporaryDir);
        }
    };
    return result;
}

From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java

License:Apache License

@Override
public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description,
        TestContext context) throws IOException {
    LOG.info("??????: {}", description);
    VariableTable variables = createVariables(context);
    checkType(definition, description);/*  w  ww .j a v  a  2s . c  o m*/
    Configuration conf = configurations.newInstance();
    Job job = Job.getInstance(conf);
    String resolved = variables.parse(description.getPathPrefix(), false);
    FileInputFormat.setInputPaths(job, new Path(resolved));
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat());
    FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format);
    return result;
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleInputOutputFormat.java

License:Apache License

public void testSplits(long maxSplitSize, int generatedRows) throws IOException, InterruptedException,
        IllegalArgumentException, SecurityException, ClassNotFoundException, InstantiationException,
        IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    logger.info("Testing maxSplitSize: " + maxSplitSize + " and generatedRows:" + generatedRows);
    FileSystem fS = FileSystem.get(getConf());
    Random r = new Random(1);
    Schema schema = new Schema("schema", Fields.parse("i:int,s:string"));
    ITuple tuple = new Tuple(schema);

    Path outPath = new Path(OUT);
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(getConf()), getConf(), outPath, schema);
    for (int i = 0; i < generatedRows; i++) {
        tuple.set("i", r.nextInt());
        tuple.set("s", r.nextLong() + "");
        writer.append(tuple);//from   www . jav a  2s  . c o m
    }
    writer.close();

    TupleInputFormat format = ReflectionUtils.newInstance(TupleInputFormat.class, getConf());
    Job job = new Job(getConf());
    FileInputFormat.setInputPaths(job, outPath);
    logger.info("Using max input split size: " + maxSplitSize);
    FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);
    job.setInputFormatClass(FileInputFormat.class);

    // Read all the splits and count. The number of read rows must
    // be the same than the written ones.
    int count = 0;
    for (InputSplit split : format.getSplits(job)) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);
        TaskAttemptContext attemptContext = TaskAttemptContextFactory.get(getConf(), attemptId);
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = format.createRecordReader(split, attemptContext);
        reader.initialize(split, attemptContext);
        while (reader.nextKeyValue()) {
            tuple = reader.getCurrentKey();
            count++;
        }
        reader.close();
    }

    assertEquals(generatedRows, count);

    HadoopUtils.deleteIfExists(fS, outPath);
}

From source file:com.google.appengine.tools.mapreduce.MapReduceServletTest.java

License:Apache License

/**
 * Test that handleController has reasonable behavior when there are still
 * active workers.// w ww.j a  v  a  2  s .c om
 *
 * @throws EntityNotFoundException
 */
public void testHandleController_withContinue() throws EntityNotFoundException {
    JobID jobId = new JobID("foo", 1);
    HttpServletRequest request = createMockControllerRequest(0, jobId);
    replay(request);

    Configuration sampleConf = getSampleMapReduceConfiguration();

    persistMRState(jobId, sampleConf);

    ShardState shardState1 = ShardState.generateInitializedShardState(ds,
            new TaskAttemptID(new TaskID(jobId, true, 1), 1));
    Counters counters1 = new Counters();
    counters1.findCounter("a", "z").increment(1);
    shardState1.setCounters(counters1);
    shardState1.setInputSplit(sampleConf, new StubInputSplit(1));
    shardState1.setRecordReader(sampleConf, new StubRecordReader());
    shardState1.persist();

    ShardState shardState2 = ShardState.generateInitializedShardState(ds,
            new TaskAttemptID(new TaskID(jobId, true, 2), 1));
    Counters counters2 = new Counters();
    counters2.findCounter("a", "z").increment(1);
    shardState2.setCounters(counters2);
    shardState2.setInputSplit(sampleConf, new StubInputSplit(2));
    shardState2.setRecordReader(sampleConf, new StubRecordReader());
    shardState2.setDone();
    shardState2.persist();

    // doPost should call handleCallback()
    // resp is never used
    servlet.doPost(request, null);

    MapReduceState mrState = MapReduceState.getMapReduceStateFromJobID(ds, jobId);

    // Check result of aggregateState()
    assertEquals(2, mrState.getCounters().findCounter("a", "z").getValue());

    // Check the result of refillQuota()
    // Should fill the active thread but not the done one.
    assertEquals(1000, new QuotaManager(MemcacheServiceFactory.getMemcacheService())
            .get("" + shardState1.getTaskAttemptID()));
    assertEquals(0, new QuotaManager(MemcacheServiceFactory.getMemcacheService())
            .get("" + shardState2.getTaskAttemptID()));

    // Check that the next controller task got enqueued.
    QueueStateInfo defaultQueue = getDefaultQueueInfo();
    assertEquals(1, defaultQueue.getCountTasks());
    TaskStateInfo firstTask = defaultQueue.getTaskInfo().get(0);
    assertEquals("/mapreduce/" + MapReduceServlet.CONTROLLER_PATH, firstTask.getUrl());
    assertTrue(firstTask.getBody(), firstTask.getBody().indexOf("jobID=job_foo_0001") != -1);

    assertEquals(1, mrState.getActiveShardCount());
    assertEquals(2, mrState.getShardCount());

    verify(request);
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java

License:Apache License

private void mockTaskAttemptContext(String indexType) {
    TaskAttemptID fakeTaskId = new TaskAttemptID(new TaskID("foo_task_" + indexType, 123, TaskType.REDUCE, 2),
            2);//from   w ww .ja  v  a 2  s.com
    when(fakeTaskAttemptContext.getTaskAttemptID()).thenReturn(fakeTaskId);
    when(fakeTaskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
}

From source file:com.marklogic.contentpump.LocalJobRunner.java

License:Apache License

/**
 * Run the job.  Get the input splits, create map tasks and submit it to
 * the thread pool if there is one; otherwise, runs the the task one by
 * one.//from www.ja  v  a2s  .c om
 * 
 * @param <INKEY>
 * @param <INVALUE>
 * @param <OUTKEY>
 * @param <OUTVALUE>
 * @throws Exception
 */
@SuppressWarnings("unchecked")
public <INKEY, INVALUE, OUTKEY, OUTVALUE, T extends org.apache.hadoop.mapreduce.InputSplit> void run()
        throws Exception {
    Configuration conf = job.getConfiguration();
    InputFormat<INKEY, INVALUE> inputFormat = (InputFormat<INKEY, INVALUE>) ReflectionUtils
            .newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = inputFormat.getSplits(job);
    T[] array = (T[]) splits.toArray(new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);

    // sort the splits into order based on size, so that the biggest
    // goes first
    Arrays.sort(array, new SplitLengthComparator());
    OutputFormat<OUTKEY, OUTVALUE> outputFormat = (OutputFormat<OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(job.getOutputFormatClass(), conf);
    Class<? extends Mapper<?, ?, ?, ?>> mapperClass = job.getMapperClass();
    Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils
            .newInstance(mapperClass, conf);
    try {
        outputFormat.checkOutputSpecs(job);
    } catch (Exception ex) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Error checking output specification: ", ex);
        } else {
            LOG.error("Error checking output specification: ");
            LOG.error(ex.getMessage());
        }
        return;
    }
    conf = job.getConfiguration();
    progress = new AtomicInteger[splits.size()];
    for (int i = 0; i < splits.size(); i++) {
        progress[i] = new AtomicInteger();
    }
    Monitor monitor = new Monitor();
    monitor.start();
    reporter = new ContentPumpReporter();
    List<Future<Object>> taskList = new ArrayList<Future<Object>>();
    for (int i = 0; i < array.length; i++) {
        InputSplit split = array[i];
        if (pool != null) {
            LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE> task = new LocalMapTask<INKEY, INVALUE, OUTKEY, OUTVALUE>(
                    inputFormat, outputFormat, conf, i, split, reporter, progress[i]);
            availableThreads = assignThreads(i, array.length);
            Class<? extends Mapper<?, ?, ?, ?>> runtimeMapperClass = job.getMapperClass();
            if (availableThreads > 1 && availableThreads != threadsPerSplit) {
                // possible runtime adjustment
                if (runtimeMapperClass != (Class) MultithreadedMapper.class) {
                    runtimeMapperClass = (Class<? extends Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>>) cmd
                            .getRuntimeMapperClass(job, mapperClass, threadsPerSplit, availableThreads);
                }
                if (runtimeMapperClass != mapperClass) {
                    task.setMapperClass(runtimeMapperClass);
                }
                if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                    task.setThreadCount(availableThreads);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Thread Count for Split#" + i + " : " + availableThreads);
                    }
                }
            }

            if (runtimeMapperClass == (Class) MultithreadedMapper.class) {
                synchronized (pool) {
                    taskList.add(pool.submit(task));
                    pool.wait();
                }
            } else {
                pool.submit(task);
            }
        } else { // single-threaded
            JobID jid = new JobID();
            TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
            TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
            TaskAttemptContext context = ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
            RecordReader<INKEY, INVALUE> reader = inputFormat.createRecordReader(split, context);
            RecordWriter<OUTKEY, OUTVALUE> writer = outputFormat.getRecordWriter(context);
            OutputCommitter committer = outputFormat.getOutputCommitter(context);
            TrackingRecordReader trackingReader = new TrackingRecordReader(reader, progress[i]);

            Mapper.Context mapperContext = ReflectionUtil.createMapperContext(mapper, conf, taskAttemptId,
                    trackingReader, writer, committer, reporter, split);

            trackingReader.initialize(split, mapperContext);

            // no thread pool (only 1 thread specified)
            Class<? extends Mapper<?, ?, ?, ?>> mapClass = job.getMapperClass();
            mapperContext.getConfiguration().setClass(CONF_MAPREDUCE_JOB_MAP_CLASS, mapClass, Mapper.class);
            mapper = (Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils.newInstance(mapClass,
                    mapperContext.getConfiguration());
            mapper.run(mapperContext);
            trackingReader.close();
            writer.close(mapperContext);
            committer.commitTask(context);
        }
    }
    // wait till all tasks are done
    if (pool != null) {
        for (Future<Object> f : taskList) {
            f.get();
        }
        pool.shutdown();
        while (!pool.awaitTermination(1, TimeUnit.DAYS))
            ;
        jobComplete.set(true);
    }
    monitor.interrupt();
    monitor.join(1000);

    // report counters
    Iterator<CounterGroup> groupIt = reporter.counters.iterator();
    while (groupIt.hasNext()) {
        CounterGroup group = groupIt.next();
        LOG.info(group.getDisplayName() + ": ");
        Iterator<Counter> counterIt = group.iterator();
        while (counterIt.hasNext()) {
            Counter counter = counterIt.next();
            LOG.info(counter.getDisplayName() + ": " + counter.getValue());
        }
    }
    LOG.info("Total execution time: " + (System.currentTimeMillis() - startTime) / 1000 + " sec");
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

private static Set<String> runTasks(JobContext job, int numTasks, int numFiles) throws IOException {
    Set<String> uploads = Sets.newHashSet();

    for (int taskId = 0; taskId < numTasks; taskId += 1) {
        TaskAttemptID attemptID = new TaskAttemptID(new TaskID(JOB_ID, TaskType.REDUCE, taskId),
                (taskId * 37) % numTasks);
        TaskAttemptContext attempt = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()),
                attemptID);/*from w  w  w. jav  a 2 s .com*/
        MockedS3Committer taskCommitter = new MockedS3Committer(S3_OUTPUT_PATH, attempt);
        commitTask(taskCommitter, attempt, numFiles);
        uploads.addAll(taskCommitter.results.getUploads());
    }

    return uploads;
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.HadoopVersionSpecificCode_1x.java

License:Apache License

@Override
public TaskAttemptID createTaskAttemptId(JobID jobID, boolean isMapper, int hadoopPartition) {
    return new TaskAttemptID(new TaskID(jobID, isMapper, hadoopPartition), 0);
}

From source file:com.splout.db.hadoop.SchemaSampler.java

License:Apache License

public static Schema sample(Configuration conf, Path input, InputFormat<ITuple, NullWritable> inputFormat)
        throws IOException, InterruptedException {
    Schema schema = null;// w w  w  .  j av a  2s  . com

    // sample schema from input path given the provided InputFormat
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    FileInputFormat.setInputPaths(job, input);
    // get first inputSplit
    List<InputSplit> inputSplits = inputFormat.getSplits(job);
    if (inputSplits == null || inputSplits.size() == 0) {
        throw new IOException(
                "Given input format doesn't produce any input split. Can't sample first record. PATH: "
                        + input);
    }
    InputSplit inputSplit = inputSplits.get(0);
    TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);
    TaskAttemptContext attemptContext;
    try {
        attemptContext = TaskAttemptContextFactory.get(conf, attemptId);
    } catch (Exception e) {
        throw new IOException(e);
    }

    RecordReader<ITuple, NullWritable> rReader = inputFormat.createRecordReader(inputSplit, attemptContext);
    rReader.initialize(inputSplit, attemptContext);

    if (!rReader.nextKeyValue()) {
        throw new IOException(
                "Can't read first record of first input split of the given path [" + input + "].");
    }

    // finally get the sample schema
    schema = rReader.getCurrentKey().getSchema();
    log.info("Sampled schema from [" + input + "] : " + schema);
    rReader.close();

    return schema;
}