Example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID.

Prototype

public TaskAttemptID(TaskID taskId, int id) 

Source Link

Document

Constructs a TaskAttemptID object from given TaskID .

Usage

From source file:com.splout.db.hadoop.TupleSampler.java

License:Apache License

/**
 * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit
 * without using a Job.//from  www.ja v a2  s .  c  om
 * The output is SequenceFile with keys.
 *
 * @return The number of retrieved samples
 */
private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits,
        Map<InputSplit, TableSpec> splitToTableSpec,
        Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat,
        Map<InputSplit, Map<String, String>> specificHadoopConf,
        Map<InputSplit, RecordProcessor> recordProcessorPerSplit,
        Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException {

    // Instantiate the writer we will write samples to
    FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf);

    if (splits.size() == 0) {
        throw new IllegalArgumentException("There are no splits to sample from!");
    }

    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class,
            NullWritable.class);

    logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: "
            + sampleSize + ", total number of splits: " + splits.size());
    int blocks = Math.min(maxSplitsToVisit, splits.size());
    blocks = Math.min((int) sampleSize, blocks);
    long recordsPerSample = sampleSize / blocks;
    int sampleStep = splits.size() / blocks;

    long records = 0;

    CounterInterface counterInterface = new CounterInterface(null) {

        public Counter getCounter(String group, String name) {
            return Mockito.mock(Counter.class);
        }

        ;
    };

    // Take N samples from different parts of the input
    for (int i = 0; i < blocks; ++i) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);

        TaskAttemptContext attemptContext = null;
        try {
            attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        InputSplit split = splits.get(sampleStep * i);
        if (specificHadoopConf.get(split) != null) {
            for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) {
                attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue());
            }
        }
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = null;
        try {
            reader = splitToFormat.get(split).createRecordReader(split, attemptContext);
            reader.initialize(split, attemptContext);

            RecordProcessor processor = recordProcessorPerSplit.get(split);
            Text key = new Text();
            while (reader.nextKeyValue()) {
                //
                ITuple tuple = reader.getCurrentKey();

                ITuple uTuple;
                try {
                    uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface);
                } catch (Throwable e) {
                    throw new RuntimeException(e);
                }
                if (uTuple != null) { // user may have filtered the record
                    try {
                        key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split),
                                splitToJsEngine.get(split)));
                    } catch (Throwable e) {
                        throw new RuntimeException("Error when determining partition key.", e);
                    }

                    writer.append(key, NullWritable.get());
                    records += 1;
                    if ((i + 1) * recordsPerSample <= records) {
                        break;
                    }
                }
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }

    }

    writer.close();
    return records;
}

From source file:com.twitter.hraven.hadoopJobMonitor.AppStatusCheckerTest.java

License:Apache License

public AppStatusCheckerTest() throws ConfigurationAccessException, RestException, SAXException, IOException,
        ParserConfigurationException, YarnException {
    appId = new MyApplicationId();
    appId.setId(oldJobId.getId());//  ww  w  .  ja  v a  2s  .c o m
    appId.setClusterTimestamp(Long.parseLong(oldJobId.getJtIdentifier()));

    taskId = new TaskID(oldJobId, TaskType.MAP, 0);
    taskAttemptId = new TaskAttemptID(taskId, 0);

    vConf.setFloat(HadoopJobMonitorConfiguration.TASK_PROGRESS_THRESHOLD, 0.2f);
    vConf.getInt(HadoopJobMonitorConfiguration.MAX_CACHED_TASK_PROGRESSES, 10);
    vConf.getInt(HadoopJobMonitorConfiguration.MAX_CACHED_APP_CONFS, 10);
    AppConfCache.init(vConf);
    ProgressCache.init(vConf);
    HadoopJobMonitorMetrics.initSingleton(vConf);
    taskProgressCache = ProgressCache.getTaskProgressCache();
    attemptProgressCache = ProgressCache.getAttemptProgressCache();

    when(clientCache.getClient(any(JobID.class))).thenReturn(clientService);
    appReport = mock(ApplicationReport.class);
    when(appReport.getApplicationId()).thenReturn(appId);
    appStatusChecker = new AppStatusChecker(vConf, appReport, clientCache, rm, new AppCheckerProgress() {
        @Override
        public void finished() {
        }
    });

    mockStatic(RestClient.class);
    restClient = mock(RestClient.class);
    when(RestClient.getInstance()).thenReturn(restClient);
}

From source file:io.ssc.trackthetrackers.extraction.hadoop.util.Compaction.java

License:Open Source License

public static void main(String[] args) throws IOException, InterruptedException {

    if (args.length != 2) {
        System.out.println("Usage: <input folder> <output file>");
        System.exit(-1);//w w w  .j av  a2s .  c om
    }

    String inputPath = args[0];
    String outputFile = args[1];

    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);

    FileStatus[] input = fs.listStatus(new Path(inputPath), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.toString().endsWith(".parquet");
        }
    });

    Path output = new Path(outputFile);

    fs.delete(output, true);

    ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder> inputFormat = new ProtoParquetInputFormat<ParsedPageProtos.ParsedPageOrBuilder>();
    inputFormat.setReadSupportClass(new JobConf(conf), ProtoReadSupport.class);

    Job job = new Job(conf);
    ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage> outputFormat = new ProtoParquetOutputFormat<ParsedPageProtos.ParsedPage>(
            ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setProtobufClass(job, ParsedPageProtos.ParsedPage.class);
    ProtoParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
    ProtoParquetOutputFormat.setEnableDictionary(job, true);

    RecordWriter<Void, ParsedPageProtos.ParsedPage> recordWriter = outputFormat.getRecordWriter(conf, output,
            CompressionCodecName.SNAPPY);

    List<ParquetInputSplit> splits = new ArrayList<ParquetInputSplit>();

    for (FileStatus fileStatus : input) {
        System.out.println(fileStatus.getPath().toString());
        splits.addAll(inputFormat.getSplits(conf, ParquetFileReader.readFooters(conf, fileStatus)));
    }

    int splitIndex = 0;
    for (ParquetInputSplit split : splits) {

        System.out.println("Processing split: " + split.getPath().toString() + "(" + splitIndex + " of "
                + splits.size() + ")");

        TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", splitIndex, true, splitIndex),
                splitIndex);
        TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

        RecordReader<Void, ParsedPageProtos.ParsedPageOrBuilder> reader = inputFormat.createRecordReader(split,
                ctx);
        reader.initialize(split, ctx);

        while (reader.nextKeyValue()) {

            ParsedPageProtos.ParsedPageOrBuilder record = reader.getCurrentValue();

            ParsedPageProtos.ParsedPage.Builder builder = ParsedPageProtos.ParsedPage.newBuilder();

            builder.setUrl(record.getUrl());
            builder.setArchiveTime(record.getArchiveTime());

            builder.addAllScripts(record.getScriptsList());
            builder.addAllIframes(record.getIframesList());
            builder.addAllLinks(record.getLinksList());
            builder.addAllImages(record.getImagesList());

            recordWriter.write(null, builder.build());
        }

        if (reader != null) {
            reader.close();
        }

        splitIndex++;
    }

    TaskAttemptID taskAttemptID = new TaskAttemptID(new TaskID("identifier", 1, true, 1), 1);
    TaskAttemptContext ctx = new org.apache.hadoop.mapreduce.TaskAttemptContext(conf, taskAttemptID);

    if (recordWriter != null) {
        recordWriter.close(ctx);
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test PipesMapRunner    test the transfer data from reader
 *
 * @throws Exception/*from   w w  w. ja v a2s  .  c o  m*/
 */
@Test
public void testRunner() throws Exception {
    // clean old password files
    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(Submitter.IS_JAVA_RR, "true");
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        job.setInputFormatClass(DummyInputFormat.class);
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);

        InputSplit isplit = isplits.get(0);

        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordReader<FloatWritable, NullWritable> rReader = input_format.createRecordReader(isplit, tcontext);

        TestMapContext context = new TestMapContext(conf, taskAttemptid, rReader, null, null, null, isplit);
        // stub for client
        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        // token for authorization
        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
        PipesMapper<FloatWritable, NullWritable, IntWritable, Text> mapper = new PipesMapper<FloatWritable, NullWritable, IntWritable, Text>(
                context);

        initStdOut(conf);
        mapper.run(context);
        String stdOut = readStdOut(conf);

        // test part of translated data. As common file for client and test -
        // clients stdOut
        // check version
        assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0"));
        // check key and value classes
        assertTrue(stdOut.contains("Key class:org.apache.hadoop.io.FloatWritable"));
        assertTrue(stdOut.contains("Value class:org.apache.hadoop.io.NullWritable"));
        // test have sent all data from reader
        assertTrue(stdOut.contains("value:0.0"));
        assertTrue(stdOut.contains("value:9.0"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.Application
 * test a internal functions: /*w  w  w. j  ava  2s.  c  o  m*/
 *     MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
 *
 * @throws Throwable
 */

@Test
public void testApplication() throws Throwable {

    System.err.println("testApplication");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationStub");
        //getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(isplit, tcontext);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        MapContextImpl<IntWritable, Text, IntWritable, Text> context = new MapContextImpl<IntWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, null, writer, null, reporter, null);

        System.err.println("ready to launch application");
        Application<IntWritable, Text, IntWritable, Text> application = new Application<IntWritable, Text, IntWritable, Text>(
                context, reader);
        System.err.println("done");

        application.getDownlink().flush();
        application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));
        application.getDownlink().flush();
        application.waitForFinish();

        // test getDownlink().mapItem();
        String stdOut = readStdOut(conf);
        assertTrue(stdOut.contains("key:3"));
        assertTrue(stdOut.contains("value:txt"));

        assertEquals(0.0, context.getProgress(), 0.01);
        assertNotNull(context.getCounter("group", "name"));

        // test status MessageType.STATUS
        assertEquals(context.getStatus(), "PROGRESS");
        // check MessageType.PROGRESS
        assertEquals(0.55f, reader.getProgress(), 0.001);
        application.getDownlink().close();
        // test MessageType.OUTPUT
        stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile"));
        assertTrue(stdOut.contains("key:123"));
        assertTrue(stdOut.contains("value:value"));
        try {
            // try to abort
            application.abort(new Throwable());
            fail();
        } catch (IOException e) {
            // abort works ?
            assertEquals("pipe child exception", e.getMessage());
        }
    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.PipesReducer
 * test the transfer of data: key and value
 *
 * @throws Exception/*from   w ww . j a  v a  2  s  .  com*/
 */
@Test
public void testPipesReducer() throws Exception {
    System.err.println("testPipesReducer");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        BooleanWritable bw = new BooleanWritable(true);
        List<Text> texts = new ArrayList<Text>();
        texts.add(new Text("first"));
        texts.add(new Text("second"));
        texts.add(new Text("third"));

        DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator();

        ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class,
                Text.class);

        PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
        reducer.setup(context);

        initStdOut(conf);
        reducer.reduce(bw, texts, context);
        reducer.cleanup(context);
        String stdOut = readStdOut(conf);

        // test data: key
        assertTrue(stdOut.contains("reducer key :true"));
        // and values
        assertTrue(stdOut.contains("reduce value  :first"));
        assertTrue(stdOut.contains("reduce value  :second"));
        assertTrue(stdOut.contains("reduce value  :third"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipesNonJavaInputFormat.java

License:Apache License

/**
 *  test PipesNonJavaInputFormat/*from www . jav a 2  s. c o  m*/
  */

@Test
public void testFormat() throws IOException, InterruptedException {
    JobID jobId = new JobID("201408272347", 0);
    TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
    TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

    Job job = new Job(new Configuration());
    job.setJobID(jobId);
    Configuration conf = job.getConfiguration();

    TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

    PipesNonJavaInputFormat input_format = new PipesNonJavaInputFormat();

    DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(new FileSplit(), tcontext);
    assertEquals(0.0f, reader.getProgress(), 0.001);

    // input and output files
    File input1 = new File(workSpace + File.separator + "input1");
    if (!input1.getParentFile().exists()) {
        Assert.assertTrue(input1.getParentFile().mkdirs());
    }

    if (!input1.exists()) {
        Assert.assertTrue(input1.createNewFile());
    }

    File input2 = new File(workSpace + File.separator + "input2");
    if (!input2.exists()) {
        Assert.assertTrue(input2.createNewFile());
    }

    // THIS fill fail without hdfs support.
    // // set data for splits
    // conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
    //          StringUtils.escapeString(input1.getAbsolutePath()) + ","
    //          + StringUtils.escapeString(input2.getAbsolutePath()));
    // List<InputSplit> splits = input_format.getSplits(job);
    // assertTrue(splits.size() >= 2);

    PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader(
            new FileSplit(), tcontext);
    // empty dummyRecordReader
    assertEquals(0.0, dummyRecordReader.getProgress(), 0.001);
    // test method next
    assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get()));
    assertEquals(2.0, dummyRecordReader.getProgress(), 0.001);
    dummyRecordReader.close();
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java

License:Apache License

/**
 * Creates new setup {@link TaskAttemptContext} from hadoop {@link Configuration} and {@link
 * JobID}.//www  .  j a  v a2 s  .  c  om
 *
 * @param conf hadoop {@link Configuration}
 * @param jobID jobId of the created {@link TaskAttemptContext}
 * @return new setup {@link TaskAttemptContext}
 */
static TaskAttemptContext createSetupTaskContext(Configuration conf, JobID jobID) {
    final TaskID taskId = new TaskID(jobID, TaskType.JOB_SETUP, 0);
    return createTaskAttemptContext(conf, new TaskAttemptID(taskId, 0));
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java

License:Apache License

/**
 * Creates new {@link TaskAttemptID}.//from  w  w  w .jav a 2 s  .  c om
 *
 * @param jobID jobId
 * @param taskId taskId
 * @param attemptId attemptId
 * @return new {@link TaskAttemptID}
 */
static TaskAttemptID createTaskAttemptID(JobID jobID, int taskId, int attemptId) {
    final TaskID tId = createTaskID(jobID, taskId);
    return new TaskAttemptID(tId, attemptId);
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormats.java

License:Apache License

/**
 * Creates cleanup {@link TaskAttemptContext} for given {@link JobID}.
 *
 * @param conf hadoop configuration/* ww  w . j a  v a 2s. c  o m*/
 * @param jobID jobId of the created {@link TaskID}
 * @return new cleanup {@link TaskID} for given {@link JobID}
 */
static TaskAttemptContext createCleanupTaskContext(Configuration conf, JobID jobID) {
    final TaskID taskId = new TaskID(jobID, TaskType.JOB_CLEANUP, 0);
    return createTaskAttemptContext(conf, new TaskAttemptID(taskId, 0));
}