Example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl.

Prototype

public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)

Source Link

Usage

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

@Before
public void setupCommitter() throws Exception {
    getConfiguration().set("s3.multipart.committer.num-threads", String.valueOf(numThreads));
    getConfiguration().set(UPLOAD_UUID, UUID.randomUUID().toString());
    this.job = new JobContextImpl(getConfiguration(), JOB_ID);
    this.jobCommitter = new MockedS3Committer(S3_OUTPUT_PATH, job);
    jobCommitter.setupJob(job);/*from  w  w  w.java  2s.  co m*/
    this.uuid = job.getConfiguration().get(UPLOAD_UUID);

    this.tac = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()), AID);

    // get the task's configuration copy so modifications take effect
    this.conf = tac.getConfiguration();
    conf.set("mapred.local.dir", "/tmp/local-0,/tmp/local-1");
    conf.setInt(UPLOAD_SIZE, 100);

    this.committer = new MockedS3Committer(S3_OUTPUT_PATH, tac);
}

From source file:com.netflix.bdp.s3.TestS3MultipartOutputCommitter.java

License:Apache License

private static Set<String> runTasks(JobContext job, int numTasks, int numFiles) throws IOException {
    Set<String> uploads = Sets.newHashSet();

    for (int taskId = 0; taskId < numTasks; taskId += 1) {
        TaskAttemptID attemptID = new TaskAttemptID(new TaskID(JOB_ID, TaskType.REDUCE, taskId),
                (taskId * 37) % numTasks);
        TaskAttemptContext attempt = new TaskAttemptContextImpl(new Configuration(job.getConfiguration()),
                attemptID);//from  w  w  w .j  av  a 2s . c  om
        MockedS3Committer taskCommitter = new MockedS3Committer(S3_OUTPUT_PATH, attempt);
        commitTask(taskCommitter, attempt, numFiles);
        uploads.addAll(taskCommitter.results.getUploads());
    }

    return uploads;
}

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * /*from   ww w  .  ja va2  s  .  c  o  m*/
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.HadoopVersionSpecificCode_CDH4.java

License:Apache License

@Override
public TaskAttemptContext createTaskAttemptContext(Configuration configuration, TaskAttemptID id) {
    return new TaskAttemptContextImpl(configuration, id);
}

From source file:com.splicemachine.derby.impl.io.WholeTextInputFormatTest.java

License:Apache License

@Test
public void testGetsStreamForDirectory() throws Exception {
    /*/*from   ww w. j a  va 2  s. c  o  m*/
     * This test failed before changes to WholeTextInputFormat(hooray for test-driven development!),
     * so this constitutes an effective regression test for SPLICE-739. Of course, we'll be certain
     * about it by ALSO writing an IT, but this is a nice little Unit test of the same thing.
     */
    Configuration configuration = HConfiguration.unwrapDelegate();
    String dirPath = SpliceUnitTest.getResourceDirectory() + "multiLineDirectory";
    configuration.set("mapred.input.dir", dirPath);

    WholeTextInputFormat wtif = new WholeTextInputFormat();
    wtif.setConf(configuration);

    JobContext ctx = new JobContextImpl(configuration, new JobID("test", 1));
    List<InputSplit> splits = wtif.getSplits(ctx);

    int i = 0;
    Set<String> files = readFileNames(dirPath);

    Assert.assertEquals("We didn't get a split per file", files.size(), splits.size());

    Set<String> readFiles = new HashSet<>();
    long totalRecords = 0;

    for (InputSplit is : splits) {
        TaskAttemptContext tac = new TaskAttemptContextImpl(configuration,
                new TaskAttemptID("test", 1, true, i, 1));
        RecordReader<String, InputStream> recordReader = wtif.createRecordReader(is, tac);
        CombineFileSplit cfs = (CombineFileSplit) is;
        System.out.println(cfs);

        totalRecords += collectRecords(readFiles, recordReader);
        i++;
    }
    Assert.assertEquals("did not read all data!", 28, totalRecords);

    Assert.assertEquals("Did not read all files!", files.size(), readFiles.size());
    for (String expectedFile : files) {
        Assert.assertTrue("Did not read file <" + expectedFile + "> read =" + readFiles + " exp",
                readFiles.contains(expectedFile));
    }
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java

License:Apache License

private List<Map.Entry> previewTextBatch(FileStatus fileStatus, int batchSize)
        throws IOException, InterruptedException {
    TextInputFormat textInputFormat = new TextInputFormat();
    InputSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), null);
    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(hadoopConf,
            TaskAttemptID.forName("attempt_1439420318532_0011_m_000000_0"));
    RecordReader<LongWritable, Text> recordReader = textInputFormat.createRecordReader(fileSplit,
            taskAttemptContext);/*w ww  .j a v a  2 s .co  m*/
    recordReader.initialize(fileSplit, taskAttemptContext);
    boolean hasNext = recordReader.nextKeyValue();
    List<Map.Entry> batch = new ArrayList<>();
    while (hasNext && batch.size() < batchSize) {
        batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + recordReader.getCurrentKey(),
                String.valueOf(recordReader.getCurrentValue())));
        hasNext = recordReader.nextKeyValue(); // not like iterator.hasNext, actually advances
    }
    return batch;
}

From source file:com.toshiba.mwcloud.gs.hadoop.mapred.GSRowRecordWriter.java

License:Apache License

/**
 * <div lang="ja">/*from   w  w  w  .  ja  va2 s  . c  om*/
 * 
 * @param confConfiguration
 * @throws IOExceptionGridDB??????
 * </div><div lang="en">
 * Constructor
 * @param conf Configuration object
 * @throws IOException an exception occurred in GridDB
 * </div>
 */
public GSRowRecordWriter(JobConf conf) throws IOException {
    TaskAttemptContext context = new TaskAttemptContextImpl(conf,
            TaskAttemptID.forName(conf.get("mapred.task.id")));
    writer_ = new GDRecordWriter(context);
}

From source file:cz.seznam.euphoria.hadoop.HadoopUtils.java

License:Apache License

public static TaskAttemptContext createTaskContext(Configuration conf, int taskNumber) {
    // TODO uses some default hard-coded values
    TaskAttemptID taskAttemptID = new TaskAttemptID("0", // job tracker ID
            0, // job number,
            TaskType.REDUCE, // task type,
            taskNumber, // task ID
            0); // task attempt
    return new TaskAttemptContextImpl(conf, taskAttemptID);
}

From source file:edu.uci.ics.hyracks.hdfs.ContextFactory.java

License:Apache License

public TaskAttemptContext createContext(Configuration conf, TaskAttemptID tid) throws HyracksDataException {
    try {//from   ww  w. j  a va  2s .co  m
        return new TaskAttemptContextImpl(conf, tid);
    } catch (Exception e) {
        throw new HyracksDataException(e);
    }
}

From source file:edu.uci.ics.hyracks.hdfs.ContextFactory.java

License:Apache License

public TaskAttemptContext createContext(Configuration conf, int partition) throws HyracksDataException {
    try {/* w  ww .j  a  va2 s  . c  o m*/
        TaskAttemptID tid = new TaskAttemptID("", 0, TaskType.REDUCE, partition, 0);
        return new TaskAttemptContextImpl(conf, tid);
    } catch (Exception e) {
        throw new HyracksDataException(e);
    }
}