Example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl.

Prototype

public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)

Source Link

Usage

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeNewRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);/* w  w w.j  a  v  a  2  s  . c o  m*/
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertFalse(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue());
    assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00002")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMultipleLayouts() throws Exception {
    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);

    final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    writer.write(defaultEntry, NW);/*from  www  .  j  a v  a 2s  .  c o  m*/
    final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024));
    writer.write(inMemoryEntry, NW);

    try {
        // Test with an invalid locality group ID:
        final ColumnId invalid = new ColumnId(1234);
        assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid));
        writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW);
        fail("Output format did not fail on unknown locality group IDs.");
    } catch (IllegalArgumentException iae) {
        LOG.info("Expected error: " + iae);
    }

    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue());
    assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue());

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.platform.CDH4MR1KijiMRBridge.java

License:Apache License

/** {@inheritDoc} */
@Override/*from ww w  .j  av  a  2  s  . c om*/
public TaskAttemptContext newTaskAttemptContext(Configuration conf, TaskAttemptID id) {
    // In CDH4, TaskAttemptContext and its implementation are separated.
    return new TaskAttemptContextImpl(conf, id);
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetInputFormat.java

License:Apache License

@Override
public IPentahoRecordReader createRecordReader(IPentahoInputSplit split) throws Exception {
    return inClassloader(() -> {
        PentahoInputSplitImpl pentahoInputSplit = (PentahoInputSplitImpl) split;
        InputSplit inputSplit = pentahoInputSplit.getInputSplit();

        ReadSupport<RowMetaAndData> readSupport = new PentahoParquetReadSupport();

        ParquetRecordReader<RowMetaAndData> nativeRecordReader = new ParquetRecordReader<RowMetaAndData>(
                readSupport, ParquetInputFormat.getFilter(job.getConfiguration()));
        TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
        nativeRecordReader.initialize(inputSplit, task);

        return new PentahoParquetRecordReader(nativeRecordReader);
    });// w  w  w  .  jav a  2  s .  co m
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetOutputFormat.java

License:Apache License

@Override
public IPentahoRecordWriter createRecordWriter() throws Exception {
    if (outputFile == null) {
        throw new RuntimeException("Output file is not defined");
    }/*from  w ww  .  ja  v  a 2  s.  c  o m*/
    if ((outputFields == null) || (outputFields.size() == 0)) {
        throw new RuntimeException("Schema is not defined");
    }

    return inClassloader(() -> {
        FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat(
                new PentahoParquetWriteSupport(outputFields));

        TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);
        TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
        try {

            ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat
                    .getRecordWriter(task);
            return new PentahoParquetRecordWriter(recordWriter, task);
        } catch (IOException e) {
            throw new RuntimeException("Some error accessing parquet files", e);
        } catch (InterruptedException e) {
            // logging here
            e.printStackTrace();
            throw new RuntimeException("This should never happen " + e);
        }
    });
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordReaderTest.java

License:Apache License

private void initializeRecordReader() throws Exception {
    InputSplit inputSplit = nativeParquetInputFormat.getSplits(job).get(0);
    TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    nativeRecordReader.initialize(inputSplit, task);
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordWriterTest.java

License:Apache License

@Before
public void setUp() throws Exception {

    ConfigurationProxy conf = new ConfigurationProxy();
    conf.set("fs.defaultFS", "file:///");
    Job job = Job.getInstance(conf);/* w  w  w . jav a  2  s  .  com*/

    tempFile = Files.createTempDirectory("parquet");

    org.apache.hadoop.fs.Path outputFile = new org.apache.hadoop.fs.Path(tempFile + PARQUET_FILE_NAME);

    ParquetOutputFormat.setOutputPath(job, outputFile.getParent());

    TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);

    task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
}

From source file:org.pentaho.hadoop.shim.common.format.PentahoParquetOutputFormat.java

License:Apache License

@Override
public IPentahoRecordWriter createRecordWriter() throws Exception {
    if (outputFile == null) {
        throw new RuntimeException("Output file is not defined");
    }//  www  .  j av a2s .c  o m
    if (schema == null) {
        throw new RuntimeException("Schema is not defined");
    }

    return inClassloader(() -> {
        FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat(
                new PentahoParquetWriteSupport(schema));

        TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);
        TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
        try {

            ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat
                    .getRecordWriter(task);
            return new PentahoParquetRecordWriter(recordWriter, task);
        } catch (IOException e) {
            throw new RuntimeException("Some error accessing parquet files", e);
        } catch (InterruptedException e) {
            // logging here
            e.printStackTrace();
            throw new RuntimeException("This should never happen " + e);
        }
    });
}

From source file:org.seqdoop.hadoop_bam.TestVCFInputFormatStringency.java

License:Open Source License

public void checkReading(ValidationStringency validationStringency) throws Exception {
    String filename = "invalid_info_field.vcf";
    Configuration conf = new Configuration();
    String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
    conf.set("mapred.input.dir", "file://" + input_file);

    if (validationStringency != null) {
        VCFRecordReader.setValidationStringency(conf, validationStringency);
    }/*from w  w  w.j a va2 s.c o  m*/

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
    JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());

    VCFInputFormat inputFormat = new VCFInputFormat(conf);
    List<InputSplit> splits = inputFormat.getSplits(ctx);
    assertEquals(1, splits.size());
    RecordReader<LongWritable, VariantContextWritable> reader = inputFormat.createRecordReader(splits.get(0),
            taskAttemptContext);
    int counter = 0;
    while (reader.nextKeyValue()) {
        VariantContextWritable writable = reader.getCurrentValue();
        assertNotNull(writable);
        VariantContext vc = writable.get();
        assertNotNull(vc);
        String value = vc.toString();
        assertNotNull(value);
        counter++;
    }
    assertEquals(4, counter);
}

From source file:org.warcbase.io.GenericArchiveRecordWritableTest.java

License:Apache License

@Test
public void testArcInputFormat() throws Exception {
    String arcFile = Resources.getResource("arc/example.arc.gz").getPath();

    Configuration conf = new Configuration(false);
    conf.set("fs.defaultFS", "file:///");

    File testFile = new File(arcFile);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<LongWritable, GenericArchiveRecordWritable> inputFormat = ReflectionUtils
            .newInstance(WacGenericInputFormat.class, conf);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    RecordReader<LongWritable, GenericArchiveRecordWritable> reader = inputFormat.createRecordReader(split,
            context);//from  w w  w .j  a  v a2s  . co  m

    reader.initialize(split, context);

    int cnt = 0;
    while (reader.nextKeyValue()) {
        GenericArchiveRecordWritable record = reader.getCurrentValue();
        cnt++;

        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        DataOutputStream dataOut = new DataOutputStream(bytesOut);

        record.write(dataOut);

        GenericArchiveRecordWritable reconstructed = new GenericArchiveRecordWritable();

        reconstructed.setFormat(ArchiveFormat.ARC);
        reconstructed.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray())));

        boolean isArc = (record.getFormat() == ArchiveFormat.ARC);
        assertEquals(isArc, true);
        if (isArc) {
            assertEquals(((ARCRecord) record.getRecord()).getMetaData().getUrl(),
                    ((ARCRecord) reconstructed.getRecord()).getMetaData().getUrl());
        }
    }

    assertEquals(300, cnt);
}