Example usage for org.apache.hadoop.mapreduce RecordWriter close

List of usage examples for org.apache.hadoop.mapreduce RecordWriter close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter close.

Prototype

public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;

Source Link

Document

Close this RecordWriter to future operations.

Usage

From source file:org.apache.mnemonic.mapreduce.MneMapreduceBufferDataTest.java

License:Apache License

@Test(enabled = true)
public void testWriteBufferData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<DurableBuffer<?>> sess = new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext,
            null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<DurableBuffer<?>> mdvalue = new MneDurableOutputValue<DurableBuffer<?>>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>();
    RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer = outputFormat
            .getRecordWriter(m_tacontext);
    DurableBuffer<?> dbuf = null;// w  w w  . ja  va2  s .  c  o  m
    Checksum cs = new CRC32();
    cs.reset();
    for (int i = 0; i < m_reccnt; ++i) {
        dbuf = genupdDurableBuffer(sess, cs);
        Assert.assertNotNull(dbuf);
        writer.write(nada, mdvalue.of(dbuf));
    }
    m_checksum = cs.getValue();
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java

License:Apache License

@Test(enabled = true)
public void testWriteChunkData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<DurableChunk<?>> sess = new MneDurableOutputSession<DurableChunk<?>>(m_tacontext,
            null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<DurableChunk<?>> mdvalue = new MneDurableOutputValue<DurableChunk<?>>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>();
    RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer = outputFormat
            .getRecordWriter(m_tacontext);
    DurableChunk<?> dchunk = null;
    Checksum cs = new CRC32();
    cs.reset();/*from w  ww. ja  v a 2s  .com*/
    for (int i = 0; i < m_reccnt; ++i) {
        dchunk = genupdDurableChunk(sess, cs);
        Assert.assertNotNull(dchunk);
        writer.write(nada, mdvalue.of(dchunk));
    }
    m_checksum = cs.getValue();
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.mnemonic.mapreduce.MneMapreduceLongDataTest.java

License:Apache License

@Test(enabled = true)
public void testWriteLongData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<Long> sess = new MneDurableOutputSession<Long>(m_tacontext, null,
            MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<Long> mdvalue = new MneDurableOutputValue<Long>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Long>>();
    RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer = outputFormat.getRecordWriter(m_tacontext);
    Long val = null;
    for (int i = 0; i < m_reccnt; ++i) {
        val = m_rand.nextLong();
        m_sum += val;
        writer.write(nada, mdvalue.of(val));
    }//ww w.j ava2 s  . c  om
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java

License:Apache License

@Test(enabled = true)
public void testWritePersonData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<Person<Long>> sess = new MneDurableOutputSession<Person<Long>>(m_tacontext, null,
            MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<Person<Long>> mdvalue = new MneDurableOutputValue<Person<Long>>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Person<Long>>>();
    RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer = outputFormat
            .getRecordWriter(m_tacontext);
    Person<Long> person = null;
    for (int i = 0; i < m_reccnt; ++i) {
        person = sess.newDurableObjectRecord();
        person.setAge((short) m_rand.nextInt(50));
        person.setName(String.format("Name: [%s]", Utils.genRandomString()), true);
        m_sumage += person.getAge();/*from   w ww  . ja va  2 s . com*/
        writer.write(nada, mdvalue.of(person));
    }
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testPredicatePushdown() throws Exception {
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    final String typeStr = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 4000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new Text(Integer.toBinaryString(r)));
        writer.write(nada, row);/* ww  w  .ja  v  a 2  s. c om*/
    }
    writer.close(attemptContext);

    OrcInputFormat.setSearchArgument(conf,
            SearchArgumentFactory.newBuilder()
                    .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(),
            new String[] { null, "i", "s" });
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 1000; r < 2000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testColumnSelection() throws Exception {
    String typeStr = "struct<i:int,j:int,k:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new IntWritable(r * 2));
        row.setFieldValue(2, new IntWritable(r * 3));
        writer.write(nada, row);/* w ww .j av a2s  .c o  m*/
    }
    writer.close(attemptContext);

    conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2");
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 0; r < 3000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(null, row.getFieldValue(1));
        assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcKey
 * @throws Exception//  www. ja v  a 2 s  . c  om
 */
@Test
public void testOrcKey() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcKey key = new OrcKey(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 2000; ++r) {
        ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r)));
        writer.write(nada, key);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcValue
 * @throws Exception//from  w  ww .java 2  s  .co  m
 */
@Test
public void testOrcValue() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);

    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcValue value = new OrcValue(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        ((OrcStruct) value.value).setAllFields(new IntWritable(r));
        writer.write(nada, value);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(3000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.parquet.hadoop.TestMemoryManager.java

License:Apache License

@Test
public void testMemoryManager() throws Exception {
    long poolSize = ParquetOutputFormat.getMemoryManager().getTotalMemoryPool();
    long rowGroupSize = poolSize / 2;
    conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);

    Assert.assertTrue("Pool should hold 2 full row groups", (2 * rowGroupSize) <= poolSize);
    Assert.assertTrue("Pool should not hold 3 full row groups", poolSize < (3 * rowGroupSize));

    Assert.assertEquals("Allocations should start out at 0", 0, getTotalAllocation());

    RecordWriter writer1 = createWriter(1);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("First writer should be limited by row group size", rowGroupSize, getTotalAllocation());

    RecordWriter writer2 = createWriter(2);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("Second writer should be limited by row group size", 2 * rowGroupSize,
            getTotalAllocation());//from w  w w  .j  ava  2 s  .c  o  m

    RecordWriter writer3 = createWriter(3);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);

    writer1.close(null);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("Allocations should be increased to the row group size", 2 * rowGroupSize,
            getTotalAllocation());

    writer2.close(null);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("Allocations should be increased to the row group size", rowGroupSize,
            getTotalAllocation());

    writer3.close(null);
    Assert.assertEquals("Allocations should be increased to the row group size", 0, getTotalAllocation());
}

From source file:org.apache.parquet.hadoop.TestMemoryManager.java

License:Apache License

@Test
public void testReallocationCallback() throws Exception {
    // validate assumptions
    long poolSize = ParquetOutputFormat.getMemoryManager().getTotalMemoryPool();
    long rowGroupSize = poolSize / 2;
    conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);

    Assert.assertTrue("Pool should hold 2 full row groups", (2 * rowGroupSize) <= poolSize);
    Assert.assertTrue("Pool should not hold 3 full row groups", poolSize < (3 * rowGroupSize));

    Runnable callback = new Runnable() {
        @Override//from   w w  w  .  j ava  2s  .  c o m
        public void run() {
            counter++;
        }
    };

    // first-time registration should succeed
    ParquetOutputFormat.getMemoryManager().registerScaleCallBack("increment-test-counter", callback);

    try {
        ParquetOutputFormat.getMemoryManager().registerScaleCallBack("increment-test-counter", callback);
        Assert.fail("Duplicated registering callback should throw duplicates exception.");
    } catch (IllegalArgumentException e) {
        // expected
    }

    // hit the limit once and clean up
    RecordWriter writer1 = createWriter(1);
    RecordWriter writer2 = createWriter(2);
    RecordWriter writer3 = createWriter(3);
    writer1.close(null);
    writer2.close(null);
    writer3.close(null);

    //Verify Callback mechanism
    Assert.assertEquals("Allocations should be adjusted once", 1, counter);
    Assert.assertEquals("Should not allow duplicate callbacks", 1,
            ParquetOutputFormat.getMemoryManager().getScaleCallBacks().size());
}