List of usage examples for org.apache.hadoop.mapreduce RecordWriter close
public abstract void close(TaskAttemptContext context) throws IOException, InterruptedException;
RecordWriter
to future operations. From source file:org.apache.mnemonic.mapreduce.MneMapreduceBufferDataTest.java
License:Apache License
@Test(enabled = true) public void testWriteBufferData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableBuffer<?>> sess = new MneDurableOutputSession<DurableBuffer<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableBuffer<?>> mdvalue = new MneDurableOutputValue<DurableBuffer<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableBuffer<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableBuffer<?>>> writer = outputFormat .getRecordWriter(m_tacontext); DurableBuffer<?> dbuf = null;// w w w . ja va2 s . c o m Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dbuf = genupdDurableBuffer(sess, cs); Assert.assertNotNull(dbuf); writer.write(nada, mdvalue.of(dbuf)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java
License:Apache License
@Test(enabled = true) public void testWriteChunkData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableChunk<?>> sess = new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableChunk<?>> mdvalue = new MneDurableOutputValue<DurableChunk<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer = outputFormat .getRecordWriter(m_tacontext); DurableChunk<?> dchunk = null; Checksum cs = new CRC32(); cs.reset();/*from w ww. ja v a 2s .com*/ for (int i = 0; i < m_reccnt; ++i) { dchunk = genupdDurableChunk(sess, cs); Assert.assertNotNull(dchunk); writer.write(nada, mdvalue.of(dchunk)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); }
From source file:org.apache.mnemonic.mapreduce.MneMapreduceLongDataTest.java
License:Apache License
@Test(enabled = true) public void testWriteLongData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Long> sess = new MneDurableOutputSession<Long>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Long> mdvalue = new MneDurableOutputValue<Long>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Long>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Long>>(); RecordWriter<NullWritable, MneDurableOutputValue<Long>> writer = outputFormat.getRecordWriter(m_tacontext); Long val = null; for (int i = 0; i < m_reccnt; ++i) { val = m_rand.nextLong(); m_sum += val; writer.write(nada, mdvalue.of(val)); }//ww w.j ava2 s . c om writer.close(m_tacontext); sess.close(); }
From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java
License:Apache License
@Test(enabled = true) public void testWritePersonData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<Person<Long>> sess = new MneDurableOutputSession<Person<Long>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<Person<Long>> mdvalue = new MneDurableOutputValue<Person<Long>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<Person<Long>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<Person<Long>>>(); RecordWriter<NullWritable, MneDurableOutputValue<Person<Long>>> writer = outputFormat .getRecordWriter(m_tacontext); Person<Long> person = null; for (int i = 0; i < m_reccnt; ++i) { person = sess.newDurableObjectRecord(); person.setAge((short) m_rand.nextInt(50)); person.setName(String.format("Name: [%s]", Utils.genRandomString()), true); m_sumage += person.getAge();/*from w ww . ja va 2 s . com*/ writer.write(nada, mdvalue.of(person)); } writer.close(m_tacontext); sess.close(); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testPredicatePushdown() throws Exception { TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); final String typeStr = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 4000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new Text(Integer.toBinaryString(r))); writer.write(nada, row);/* ww w .ja v a 2 s. c om*/ } writer.close(attemptContext); OrcInputFormat.setSearchArgument(conf, SearchArgumentFactory.newBuilder() .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(), new String[] { null, "i", "s" }); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 1000; r < 2000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testColumnSelection() throws Exception { String typeStr = "struct<i:int,j:int,k:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new IntWritable(r * 2)); row.setFieldValue(2, new IntWritable(r * 3)); writer.write(nada, row);/* w ww .j av a2s .c o m*/ } writer.close(attemptContext); conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2"); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 0; r < 3000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(null, row.getFieldValue(1)); assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcKey * @throws Exception// www. ja v a 2 s . c om */ @Test public void testOrcKey() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); String TYPE_STRING = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcKey key = new OrcKey(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext); NullWritable nada = NullWritable.get(); for (int r = 0; r < 2000; ++r) { ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r))); writer.write(nada, key); } writer.close(attemptContext); Path path = new Path(workDir, "part-m-00000.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(2000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcValue * @throws Exception//from w ww .java 2 s .co m */ @Test public void testOrcValue() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); String TYPE_STRING = "struct<i:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcValue value = new OrcValue(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { ((OrcStruct) value.value).setAllFields(new IntWritable(r)); writer.write(nada, value); } writer.close(attemptContext); Path path = new Path(workDir, "part-m-00000.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(3000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.parquet.hadoop.TestMemoryManager.java
License:Apache License
@Test public void testMemoryManager() throws Exception { long poolSize = ParquetOutputFormat.getMemoryManager().getTotalMemoryPool(); long rowGroupSize = poolSize / 2; conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize); Assert.assertTrue("Pool should hold 2 full row groups", (2 * rowGroupSize) <= poolSize); Assert.assertTrue("Pool should not hold 3 full row groups", poolSize < (3 * rowGroupSize)); Assert.assertEquals("Allocations should start out at 0", 0, getTotalAllocation()); RecordWriter writer1 = createWriter(1); Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize); Assert.assertEquals("First writer should be limited by row group size", rowGroupSize, getTotalAllocation()); RecordWriter writer2 = createWriter(2); Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize); Assert.assertEquals("Second writer should be limited by row group size", 2 * rowGroupSize, getTotalAllocation());//from w w w .j ava 2 s .c o m RecordWriter writer3 = createWriter(3); Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize); writer1.close(null); Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize); Assert.assertEquals("Allocations should be increased to the row group size", 2 * rowGroupSize, getTotalAllocation()); writer2.close(null); Assert.assertTrue("Allocations should never exceed pool size", getTotalAllocation() <= poolSize); Assert.assertEquals("Allocations should be increased to the row group size", rowGroupSize, getTotalAllocation()); writer3.close(null); Assert.assertEquals("Allocations should be increased to the row group size", 0, getTotalAllocation()); }
From source file:org.apache.parquet.hadoop.TestMemoryManager.java
License:Apache License
@Test public void testReallocationCallback() throws Exception { // validate assumptions long poolSize = ParquetOutputFormat.getMemoryManager().getTotalMemoryPool(); long rowGroupSize = poolSize / 2; conf.setLong(ParquetOutputFormat.BLOCK_SIZE, rowGroupSize); Assert.assertTrue("Pool should hold 2 full row groups", (2 * rowGroupSize) <= poolSize); Assert.assertTrue("Pool should not hold 3 full row groups", poolSize < (3 * rowGroupSize)); Runnable callback = new Runnable() { @Override//from w w w . j ava 2s . c o m public void run() { counter++; } }; // first-time registration should succeed ParquetOutputFormat.getMemoryManager().registerScaleCallBack("increment-test-counter", callback); try { ParquetOutputFormat.getMemoryManager().registerScaleCallBack("increment-test-counter", callback); Assert.fail("Duplicated registering callback should throw duplicates exception."); } catch (IllegalArgumentException e) { // expected } // hit the limit once and clean up RecordWriter writer1 = createWriter(1); RecordWriter writer2 = createWriter(2); RecordWriter writer3 = createWriter(3); writer1.close(null); writer2.close(null); writer3.close(null); //Verify Callback mechanism Assert.assertEquals("Allocations should be adjusted once", 1, counter); Assert.assertEquals("Should not allow duplicate callbacks", 1, ParquetOutputFormat.getMemoryManager().getScaleCallBacks().size()); }