Example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl.

Prototype

public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId) 

Source Link

Usage

From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample/*from  ww w  . ja  v a2 s.c o  m*/
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            @Override
            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
// TODO: make this generic. This should be extensible to test all the input
// formats we support. How do we do this?
public void testReadNation() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    try {//from   w ww.  j a va 2s  . co m
        RecordServiceConfig.setInputTable(config, null, "tpch.nation");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;
        reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));

        int numRows = 0;
        while (reader.nextKeyValue()) {
            RecordServiceRecord value = reader.getCurrentValue();
            ++numRows;

            if (numRows == 10) {
                assertEquals("INDONESIA", value.getColumnValue(1).toString());
            }
        }
        assertFalse(reader.nextKeyValue());
        assertFalse(reader.nextRecord());
        assertEquals(25, numRows);

        config.clear();
        RecordServiceConfig.setInputTable(config, "tpch", "nation", "n_comment");
        splits = PlanUtil.getSplits(config, new Credentials()).splits;
        reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
        numRows = 0;
        while (reader.nextKeyValue()) {
            RecordServiceRecord value = reader.getCurrentValue();
            if (numRows == 12) {
                assertEquals("ously. final, express gifts cajole a", value.getColumnValue(0).toString());
            }
            ++numRows;
        }
        assertEquals(25, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testReadAllTypes() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    format.setTimeZone(TimeZone.getTimeZone("GMT"));

    try {// w w w .j  a  va  2s .c  om
        RecordServiceConfig.setInputTable(config, null, "rs.alltypes");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;

        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                RecordServiceRecord value = reader.getCurrentValue();
                if (((BooleanWritable) value.getColumnValue(0)).get()) {
                    assertEquals(0, ((ByteWritable) value.getColumnValue(1)).get());
                    assertEquals(1, ((ShortWritable) value.getColumnValue(2)).get());
                    assertEquals(2, ((IntWritable) value.getColumnValue(3)).get());
                    assertEquals(3, ((LongWritable) value.getColumnValue(4)).get());
                    assertEquals(4.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1);
                    assertEquals(5.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1);
                    assertEquals("hello", value.getColumnValue(7).toString());
                    assertEquals("vchar1", value.getColumnValue(8).toString());
                    assertEquals("char1", value.getColumnValue(9).toString());
                    assertEquals("2015-01-01", format
                            .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp()));
                    assertEquals(new BigDecimal("3.1415920000"),
                            ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal());
                } else {
                    assertEquals(6, ((ByteWritable) value.getColumnValue(1)).get());
                    assertEquals(7, ((ShortWritable) value.getColumnValue(2)).get());
                    assertEquals(8, ((IntWritable) value.getColumnValue(3)).get());
                    assertEquals(9, ((LongWritable) value.getColumnValue(4)).get());
                    assertEquals(10.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1);
                    assertEquals(11.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1);
                    assertEquals("world", value.getColumnValue(7).toString());
                    assertEquals("vchar2", value.getColumnValue(8).toString());
                    assertEquals("char2", value.getColumnValue(9).toString());
                    assertEquals("2016-01-01", format
                            .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp()));
                    assertEquals(new BigDecimal("1234.5678900000"),
                            ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal());
                }
                ++numRows;
            }
        }
        assertEquals(2, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testReadAllTypesNull() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    try {/*  ww  w .  j  a va 2 s  .c o m*/
        RecordServiceConfig.setInputTable(config, null, "rs.alltypes_null");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;

        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                RecordServiceRecord value = reader.getCurrentValue();
                for (int i = 0; i < value.getSchema().getNumColumns(); ++i) {
                    assertTrue(value.getColumnValue(i) == null);
                }
                ++numRows;
            }
        }
        assertEquals(1, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testCountStar() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    TextInputFormat.TextRecordReader reader = new TextInputFormat.TextRecordReader();

    try {/*  www .j  ava 2s. c o m*/
        RecordServiceConfig.setInputQuery(config, "select count(*) from tpch.nation");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;
        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                ++numRows;
            }
        }
        assertEquals(25, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Generate random data, compress it, index and md5 hash the data.
 * Then read it all back and md5 that too, to verify that it all went ok.
 * /* w w w .ja  v  a 2  s.  co  m*/
 * @param testWithIndex Should we index or not?
 * @param charsToOutput How many characters of random data should we output.
 * @throws IOException
 * @throws NoSuchAlgorithmException
 * @throws InterruptedException
 */
private void runTest(boolean testWithIndex, int charsToOutput)
        throws IOException, NoSuchAlgorithmException, InterruptedException {

    if (!GPLNativeCodeLoader.isNativeCodeLoaded()) {
        LOG.warn("Cannot run this test without the native lzo libraries");
        return;
    }

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir, true);
    localFs.mkdirs(outputDir);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
        Path lzoFile = new Path(outputDir, lzoFileName);
        LzoTextInputFormat.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir);

    List<InputSplit> is = inputFormat.getSplits(job);
    //verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
        assertEquals(3, is.size());
    } else {
        assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
        RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext);
        rr.initialize(inputSplit, attemptContext);

        while (rr.nextKeyValue()) {
            Text value = rr.getCurrentValue();

            md5.update(value.getBytes(), 0, value.getLength());
        }

        rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5.digest()));
}

From source file:com.knewton.mapreduce.SSTableColumnRecordReaderTest.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext() {
    conf.set(PropertyConstants.COLUMN_COMPARATOR.txt, LongType.class.getName());
    conf.set(PropertyConstants.PARTITIONER.txt, RandomPartitioner.class.getName());
    return new TaskAttemptContextImpl(conf, attemptId);
}

From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(boolean setColumnComparator, boolean setPartitioner,
        boolean setSubComparator) throws Exception {

    if (setColumnComparator) {
        SSTableInputFormat.setComparatorClass(LongType.class.getName(), job);
    }/*  w w w.j a  va2 s . c o  m*/
    if (setPartitioner) {
        SSTableInputFormat.setPartitionerClass(RandomPartitioner.class.getName(), job);
    }
    if (setSubComparator) {
        SSTableInputFormat.setSubComparatorClass(LongType.class.getName(), job);
    }

    return new TaskAttemptContextImpl(conf, attemptId);
}

From source file:com.marcolotz.lung.debug.InputTester.java

License:Creative Commons License

/***
 * Method used for local testing the record reader and the Input format. It
 * generates an input split from the local file system file.
 * //from   w  w w.j  a  v a 2  s .c  om
 * @param filePath
 */
public void localTest(String filePath) {
    DICOM image;
    Configuration testConf = new Configuration(false);

    /* Reads the local file system */
    testConf.set("fs.default.name", "file:///");

    File testFile = new File(filePath);

    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils
            .newInstance(WholeFileInputFormat.class, testConf);
    TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID());

    try {
        RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context);
        while (reader.nextKeyValue()) {
            /* get the bytes array */
            BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue();
            byte[] inputContent = inputBytesWritable.getBytes();

            /* Check for Correct value */
            // generateLocalOutput("path/to/output");

            InputStream is = new ByteArrayInputStream(inputContent);

            image = new DICOM(is);
            image.run("Dicom Test");

            /* Prints the bytes as an ImagePlus image */
            ImageViewer debug = new ImageViewer();
            debug.setImage(image);
        }
    } catch (Exception e) {

    }
}

From source file:com.moz.fiji.mapreduce.platform.CDH5FijiMRBridge.java

License:Apache License

/** {@inheritDoc} */
@Override/*from  ww  w  .  j ava2  s  .co  m*/
public TaskAttemptContext newTaskAttemptContext(Configuration conf, TaskAttemptID id) {
    // In CDH4+, TaskAttemptContext and its implementation are separated.
    return new TaskAttemptContextImpl(conf, id);
}