Example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID.

Prototype

public TaskAttemptID(TaskID taskId, int id)

Source Link

Document

Constructs a TaskAttemptID object from given TaskID .

Usage

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

/**
 * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit
 * without using a Job.//from  www.ja v a2  s .  c  om
 * The output is SequenceFile with keys.
 *
 * @return The number of retrieved samples
 */
private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits,
        Map<InputSplit, TableSpec> splitToTableSpec,
        Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat,
        Map<InputSplit, Map<String, String>> specificHadoopConf,
        Map<InputSplit, RecordProcessor> recordProcessorPerSplit,
        Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException {

    // Instantiate the writer we will write samples to
    FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf);

    if (splits.size() == 0) {
        throw new IllegalArgumentException("There are no splits to sample from!");
    }

    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class,
            NullWritable.class);

    logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: "
            + sampleSize + ", total number of splits: " + splits.size());
    int blocks = Math.min(maxSplitsToVisit, splits.size());
    blocks = Math.min((int) sampleSize, blocks);
    long recordsPerSample = sampleSize / blocks;
    int sampleStep = splits.size() / blocks;

    long records = 0;

    CounterInterface counterInterface = new CounterInterface(null) {

        public Counter getCounter(String group, String name) {
            return Mockito.mock(Counter.class);
        }

        ;
    };

    // Take N samples from different parts of the input
    for (int i = 0; i < blocks; ++i) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);

        TaskAttemptContext attemptContext = null;
        try {
            attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        InputSplit split = splits.get(sampleStep * i);
        if (specificHadoopConf.get(split) != null) {
            for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) {
                attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue());
            }
        }
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = null;
        try {
            reader = splitToFormat.get(split).createRecordReader(split, attemptContext);
            reader.initialize(split, attemptContext);

            RecordProcessor processor = recordProcessorPerSplit.get(split);
            Text key = new Text();
            while (reader.nextKeyValue()) {
                //
                ITuple tuple = reader.getCurrentKey();

                ITuple uTuple;
                try {
                    uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface);
                } catch (Throwable e) {
                    throw new RuntimeException(e);
                }
                if (uTuple != null) { // user may have filtered the record
                    try {
                        key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split),
                                splitToJsEngine.get(split)));
                    } catch (Throwable e) {
                        throw new RuntimeException("Error when determining partition key.", e);
                    }

                    writer.append(key, NullWritable.get());
                    records += 1;
                    if ((i + 1) * recordsPerSample <= records) {
                        break;
                    }
                }
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }

    }

    writer.close();
    return records;
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

public AppStatusCheckerTest() throws ConfigurationAccessException, RestException, SAXException, IOException,
        ParserConfigurationException, YarnException {
    appId = new MyApplicationId();
    appId.setId(oldJobId.getId());//  ww  w  .  ja  v a  2s  .c o m
    appId.setClusterTimestamp(Long.parseLong(oldJobId.getJtIdentifier()));

    taskId = new TaskID(oldJobId, TaskType.MAP, 0);
    taskAttemptId = new TaskAttemptID(taskId, 0);

    vConf.setFloat(HadoopJobMonitorConfiguration.TASK_PROGRESS_THRESHOLD, 0.2f);
    vConf.getInt(HadoopJobMonitorConfiguration.MAX_CACHED_TASK_PROGRESSES, 10);
    vConf.getInt(HadoopJobMonitorConfiguration.MAX_CACHED_APP_CONFS, 10);
    AppConfCache.init(vConf);
    ProgressCache.init(vConf);
    HadoopJobMonitorMetrics.initSingleton(vConf);
    taskProgressCache = ProgressCache.getTaskProgressCache();
    attemptProgressCache = ProgressCache.getAttemptProgressCache();

    when(clientCache.getClient(any(JobID.class))).thenReturn(clientService);
    appReport = mock(ApplicationReport.class);
    when(appReport.getApplicationId()).thenReturn(appId);
    appStatusChecker = new AppStatusChecker(vConf, appReport, clientCache, rm, new AppCheckerProgress() {
        @Override
        public void finished() {
        }
    });

    mockStatic(RestClient.class);
    restClient = mock(RestClient.class);
    when(RestClient.getInstance()).thenReturn(restClient);
}

From source file:io.ssc.trackthetrackers.extraction.hadoop.util.Compaction.java

License:Open Source License

public static void main(String[] args) throws IOException, InterruptedException {

    if (args.length != 2) {
        System.out.println("Usage: <input folder> <output file>");
        System.exit(-1);//w w w  .j av  a2s .  c om
    }

    String inputPath = args[0];
    String outputFile = args[1];

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    FileStatus[] input = fs.listStatus(new Path(inputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.toString().endsWith(".parquet");
        }
    });

    Path output = new Path(outputFile);

    fs.delete(output, true);

    ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder> inputFormat = new ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder>();
    inputFormat.setReadSupportClass(new JobConf(conf), ProtoReadSupport.class);

    Job job = new Job(conf);
    ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage> outputFormat = new ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage>(
            ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setProtobufClass(job, ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
    ProtoParquetOutputFormat.setEnableDictionary(job, true);

    RecordWriter<Void, ParsedPageProtos.ParsedPage> recordWriter = outputFormat.getRecordWriter(conf, output,
            CompressionCodecName.SNAPPY);

    List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();

    for (FileStatus fileStatus : input) {
        System.out.println(fileStatus.getPath().toString());
        splits.addAll(inputFormat.getSplits(conf, ParquetFileReader.readFooters(conf, fileStatus)));
    }

    int splitIndex = 0;
    for (ParquetInputSplit split : splits) {

        System.out.println("Processing split: " + split.getPath().toString() + "(" + splitIndex + " of "
                + splits.size() + ")");

        TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", splitIndex, true, splitIndex),
                splitIndex);
        TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

        RecordReader<Void, ParsedPageProtos.ParsedPageOrBuilder> reader = inputFormat.createRecordReader(split,
                ctx);
        reader.initialize(split, ctx);

        while (reader.nextKeyValue()) {

            ParsedPageProtos.ParsedPageOrBuilder record = reader.getCurrentValue();

            ParsedPageProtos.ParsedPage.Builder builder = ParsedPageProtos.ParsedPage.newBuilder();

            builder.setUrl(record.getUrl());
            builder.setArchiveTime(record.getArchiveTime());

            builder.addAllScripts(record.getScriptsList());
            builder.addAllIframes(record.getIframesList());
            builder.addAllLinks(record.getLinksList());
            builder.addAllImages(record.getImagesList());

            recordWriter.write(null, builder.build());
        }

        if (reader != null) {
            reader.close();
        }

        splitIndex++;
    }

    TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", 1, true, 1), 1);
    TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

    if (recordWriter != null) {
        recordWriter.close(ctx);
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test PipesMapRunner    test the transfer data from reader
 *
 * @throws Exception/*from   w w  w. ja v a2s  .  c o  m*/
 */
@Test
public void testRunner() throws Exception {
    // clean old password files
    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(Submitter.IS_JAVA_RR, "true");
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        job.setInputFormatClass(DummyInputFormat.class);
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);

        InputSplit isplit = isplits.get(0);

        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordReader<FloatWritable, NullWritable> rReader = input_format.createRecordReader(isplit, tcontext);

        TestMapContext context = new TestMapContext(conf, taskAttemptid, rReader, null, null, null, isplit);
        // stub for client
        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        // token for authorization
        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
        PipesMapper<FloatWritable, NullWritable, IntWritable, Text> mapper = new PipesMapper<FloatWritable, NullWritable, IntWritable, Text>(
                context);

        initStdOut(conf);
        mapper.run(context);
        String stdOut = readStdOut(conf);

        // test part of translated data. As common file for client and test -
        // clients stdOut
        // check version
        assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0"));
        // check key and value classes
        assertTrue(stdOut.contains("Key class:org.apache.hadoop.io.FloatWritable"));
        assertTrue(stdOut.contains("Value class:org.apache.hadoop.io.NullWritable"));
        // test have sent all data from reader
        assertTrue(stdOut.contains("value:0.0"));
        assertTrue(stdOut.contains("value:9.0"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.Application
 * test a internal functions: /*w  w  w. j  ava  2s.  c  o  m*/
 *     MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
 *
 * @throws Throwable
 */

@Test
public void testApplication() throws Throwable {

    System.err.println("testApplication");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationStub");
        //getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(isplit, tcontext);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        MapContextImpl<IntWritable, Text, IntWritable, Text> context = new MapContextImpl<IntWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, null, writer, null, reporter, null);

        System.err.println("ready to launch application");
        Application<IntWritable, Text, IntWritable, Text> application = new Application<IntWritable, Text, IntWritable, Text>(
                context, reader);
        System.err.println("done");

        application.getDownlink().flush();
        application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));
        application.getDownlink().flush();
        application.waitForFinish();

        // test getDownlink().mapItem();
        String stdOut = readStdOut(conf);
        assertTrue(stdOut.contains("key:3"));
        assertTrue(stdOut.contains("value:txt"));

        assertEquals(0.0, context.getProgress(), 0.01);
        assertNotNull(context.getCounter("group", "name"));

        // test status MessageType.STATUS
        assertEquals(context.getStatus(), "PROGRESS");
        // check MessageType.PROGRESS
        assertEquals(0.55f, reader.getProgress(), 0.001);
        application.getDownlink().close();
        // test MessageType.OUTPUT
        stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile"));
        assertTrue(stdOut.contains("key:123"));
        assertTrue(stdOut.contains("value:value"));
        try {
            // try to abort
            application.abort(new Throwable());
            fail();
        } catch (IOException e) {
            // abort works ?
            assertEquals("pipe child exception", e.getMessage());
        }
    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.PipesReducer
 * test the transfer of data: key and value
 *
 * @throws Exception/*from   w ww . j a  v a  2  s  .  com*/
 */
@Test
public void testPipesReducer() throws Exception {
    System.err.println("testPipesReducer");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        BooleanWritable bw = new BooleanWritable(true);
        List<Text> texts = new ArrayList<Text>();
        texts.add(new Text("first"));
        texts.add(new Text("second"));
        texts.add(new Text("third"));

        DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator();

        ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class,
                Text.class);

        PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
        reducer.setup(context);

        initStdOut(conf);
        reducer.reduce(bw, texts, context);
        reducer.cleanup(context);
        String stdOut = readStdOut(conf);

        // test data: key
        assertTrue(stdOut.contains("reducer key :true"));
        // and values
        assertTrue(stdOut.contains("reduce value  :first"));
        assertTrue(stdOut.contains("reduce value  :second"));
        assertTrue(stdOut.contains("reduce value  :third"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipesNonJavaInputFormat.java

License:Apache License

/**
 *  test PipesNonJavaInputFormat/*from www . jav a 2  s. c o  m*/
  */

@Test
public void testFormat() throws IOException, InterruptedException {
    JobID jobId = new JobID("201408272347", 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

    Job job = new Job(new Configuration());
    job.setJobID(jobId);
    Configuration conf = job.getConfiguration();

    TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

    PipesNonJavaInputFormat input_format = new PipesNonJavaInputFormat();

    DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(new FileSplit(), tcontext);
    assertEquals(0.0f, reader.getProgress(), 0.001);

    // input and output files
    File input1 = new File(workSpace + File.separator + "input1");
    if (!input1.getParentFile().exists()) {
        Assert.assertTrue(input1.getParentFile().mkdirs());
    }

    if (!input1.exists()) {
        Assert.assertTrue(input1.createNewFile());
    }

    File input2 = new File(workSpace + File.separator + "input2");
    if (!input2.exists()) {
        Assert.assertTrue(input2.createNewFile());
    }

    // THIS fill fail without hdfs support.
    // // set data for splits
    // conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
    //          StringUtils.escapeString(input1.getAbsolutePath()) + ","
    //          + StringUtils.escapeString(input2.getAbsolutePath()));
    // List<InputSplit> splits = input_format.getSplits(job);
    // assertTrue(splits.size() >= 2);

    PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader(
            new FileSplit(), tcontext);
    // empty dummyRecordReader
    assertEquals(0.0, dummyRecordReader.getProgress(), 0.001);
    // test method next
    assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get()));
    assertEquals(2.0, dummyRecordReader.getProgress(), 0.001);
    dummyRecordReader.close();
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java

License:Apache License

/**
 * Creates new setup {@link TaskAttemptContext} from hadoop {@link Configuration} and {@link
 * JobID}.//www  .  j a  v a2 s  .  c  om
 *
 * @param conf hadoop {@link Configuration}
 * @param jobID jobId of the created {@link TaskAttemptContext}
 * @return new setup {@link TaskAttemptContext}
 */
static TaskAttemptContext createSetupTaskContext(Configuration conf, JobID jobID) {
    final TaskID taskId = new TaskID(jobID, TaskType.JOB_SETUP, 0);
    return createTaskAttemptContext(conf, new TaskAttemptID(taskId, 0));
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java

License:Apache License

/**
 * Creates new {@link TaskAttemptID}.//from  w  w  w .jav a 2 s  .  c om
 *
 * @param jobID jobId
 * @param taskId taskId
 * @param attemptId attemptId
 * @return new {@link TaskAttemptID}
 */
static TaskAttemptID createTaskAttemptID(JobID jobID, int taskId, int attemptId) {
    final TaskID tId = createTaskID(jobID, taskId);
    return new TaskAttemptID(tId, attemptId);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java

License:Apache License

/**
 * Creates cleanup {@link TaskAttemptContext} for given {@link JobID}.
 *
 * @param conf hadoop configuration/* ww  w . j a  v a 2s. c  o m*/
 * @param jobID jobId of the created {@link TaskID}
 * @return new cleanup {@link TaskID} for given {@link JobID}
 */
static TaskAttemptContext createCleanupTaskContext(Configuration conf, JobID jobID) {
    final TaskID taskId = new TaskID(jobID, TaskType.JOB_CLEANUP, 0);
    return createTaskAttemptContext(conf, new TaskAttemptID(taskId, 0));
}