Example usage for org.apache.hadoop.mapreduce RecordWriter close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter close.

Prototype

public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;

Source Link

Document

Close this RecordWriter to future operations.

Usage

From source file:org.apache.mnemonic.mapreduce.MneMapreduceBufferDataTest.java

License:Apache License

@Test(enabled = true)
public void testWriteBufferData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<DurableBuffer<?>> sess = new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext,
            null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<DurableBuffer<?>> mdvalue = new MneDurableOutputValue<DurableBuffer<?>>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>();
    RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer = outputFormat
            .getRecordWriter(m_tacontext);
    DurableBuffer<?> dbuf = null;// w  w w  . ja  va2  s .  c  o  m
    Checksum cs = new CRC32();
    cs.reset();
    for (int i = 0; i < m_reccnt; ++i) {
        dbuf = genupdDurableBuffer(sess, cs);
        Assert.assertNotNull(dbuf);
        writer.write(nada, mdvalue.of(dbuf));
    }
    m_checksum = cs.getValue();
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java

License:Apache License

@Test(enabled = true)
public void testWriteChunkData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<DurableChunk<?>> sess = new MneDurableOutputSession<DurableChunk<?>>(m_tacontext,
            null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<DurableChunk<?>> mdvalue = new MneDurableOutputValue<DurableChunk<?>>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>();
    RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer = outputFormat
            .getRecordWriter(m_tacontext);
    DurableChunk<?> dchunk = null;
    Checksum cs = new CRC32();
    cs.reset();/*from w  ww. ja  v a 2s  .com*/
    for (int i = 0; i < m_reccnt; ++i) {
        dchunk = genupdDurableChunk(sess, cs);
        Assert.assertNotNull(dchunk);
        writer.write(nada, mdvalue.of(dchunk));
    }
    m_checksum = cs.getValue();
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.mnemonic.mapreduce.MneMapreduceLongDataTest.java

License:Apache License

@Test(enabled = true)
public void testWriteLongData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<Long> sess = new MneDurableOutputSession<Long>(m_tacontext, null,
            MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<Long> mdvalue = new MneDurableOutputValue<Long>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Long>>();
    RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer = outputFormat.getRecordWriter(m_tacontext);
    Long val = null;
    for (int i = 0; i < m_reccnt; ++i) {
        val = m_rand.nextLong();
        m_sum += val;
        writer.write(nada, mdvalue.of(val));
    }//ww w.j ava2 s  . c  om
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java

License:Apache License

@Test(enabled = true)
public void testWritePersonData() throws Exception {
    NullWritable nada = NullWritable.get();
    MneDurableOutputSession<Person<Long>> sess = new MneDurableOutputSession<Person<Long>>(m_tacontext, null,
            MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
    MneDurableOutputValue<Person<Long>> mdvalue = new MneDurableOutputValue<Person<Long>>(sess);
    OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Person<Long>>>();
    RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer = outputFormat
            .getRecordWriter(m_tacontext);
    Person<Long> person = null;
    for (int i = 0; i < m_reccnt; ++i) {
        person = sess.newDurableObjectRecord();
        person.setAge((short) m_rand.nextInt(50));
        person.setName(String.format("Name: [%s]", Utils.genRandomString()), true);
        m_sumage += person.getAge();/*from   w ww  . ja va  2 s . com*/
        writer.write(nada, mdvalue.of(person));
    }
    writer.close(m_tacontext);
    sess.close();
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testPredicatePushdown() throws Exception {
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    final String typeStr = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 4000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new Text(Integer.toBinaryString(r)));
        writer.write(nada, row);/* ww  w  .ja  v  a 2  s. c om*/
    }
    writer.close(attemptContext);

    OrcInputFormat.setSearchArgument(conf,
            SearchArgumentFactory.newBuilder()
                    .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(),
            new String[] { null, "i", "s" });
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 1000; r < 2000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testColumnSelection() throws Exception {
    String typeStr = "struct<i:int,j:int,k:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new IntWritable(r * 2));
        row.setFieldValue(2, new IntWritable(r * 3));
        writer.write(nada, row);/* w ww .j av a2s  .c o  m*/
    }
    writer.close(attemptContext);

    conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2");
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 0; r < 3000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(null, row.getFieldValue(1));
        assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcKey
 * @throws Exception//  www. ja v  a 2 s  . c  om
 */
@Test
public void testOrcKey() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcKey key = new OrcKey(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 2000; ++r) {
        ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r)));
        writer.write(nada, key);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcValue
 * @throws Exception//from  w  ww .java 2  s  .co  m
 */
@Test
public void testOrcValue() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);

    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcValue value = new OrcValue(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        ((OrcStruct) value.value).setAllFields(new IntWritable(r));
        writer.write(nada, value);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(3000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.parquet.hadoop.TestMemoryManager.java

License:Apache License

@Test
public void testMemoryManager() throws Exception {
    long poolSize = ParquetOutputFormat.getMemoryManager().getTotalMemoryPool();
    long rowGroupSize = poolSize / 2;
    conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);

    Assert.assertTrue("Pool should hold 2 full row groups", (2 * rowGroupSize) <= poolSize);
    Assert.assertTrue("Pool should not hold 3 full row groups", poolSize < (3 * rowGroupSize));

    Assert.assertEquals("Allocations should start out at 0", 0, getTotalAllocation());

    RecordWriter writer1 = createWriter(1);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("First writer should be limited by row group size", rowGroupSize, getTotalAllocation());

    RecordWriter writer2 = createWriter(2);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("Second writer should be limited by row group size", 2 * rowGroupSize,
            getTotalAllocation());//from w  w w  .j  ava  2 s  .c  o  m

    RecordWriter writer3 = createWriter(3);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);

    writer1.close(null);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("Allocations should be increased to the row group size", 2 * rowGroupSize,
            getTotalAllocation());

    writer2.close(null);
    Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize);
    Assert.assertEquals("Allocations should be increased to the row group size", rowGroupSize,
            getTotalAllocation());

    writer3.close(null);
    Assert.assertEquals("Allocations should be increased to the row group size", 0, getTotalAllocation());
}

From source file:org.apache.parquet.hadoop.TestMemoryManager.java

License:Apache License

@Test
public void testReallocationCallback() throws Exception {
    // validate assumptions
    long poolSize = ParquetOutputFormat.getMemoryManager().getTotalMemoryPool();
    long rowGroupSize = poolSize / 2;
    conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize);

    Assert.assertTrue("Pool should hold 2 full row groups", (2 * rowGroupSize) <= poolSize);
    Assert.assertTrue("Pool should not hold 3 full row groups", poolSize < (3 * rowGroupSize));

    Runnable callback = new Runnable() {
        @Override//from   w w  w  .  j ava  2s  .  c o m
        public void run() {
            counter++;
        }
    };

    // first-time registration should succeed
    ParquetOutputFormat.getMemoryManager().registerScaleCallBack("increment-test-counter", callback);

    try {
        ParquetOutputFormat.getMemoryManager().registerScaleCallBack("increment-test-counter", callback);
        Assert.fail("Duplicated registering callback should throw duplicates exception.");
    } catch (IllegalArgumentException e) {
        // expected
    }

    // hit the limit once and clean up
    RecordWriter writer1 = createWriter(1);
    RecordWriter writer2 = createWriter(2);
    RecordWriter writer3 = createWriter(3);
    writer1.close(null);
    writer2.close(null);
    writer3.close(null);

    //Verify Callback mechanism
    Assert.assertEquals("Allocations should be adjusted once", 1, counter);
    Assert.assertEquals("Should not allow duplicate callbacks", 1,
            ParquetOutputFormat.getMemoryManager().getScaleCallBacks().size());
}