List of usage examples for org.apache.hadoop.io ArrayWritable get
public Writable[] get()
From source file:parquet.hive.TestMapredParquetInputFormat.java
License:Apache License
private void readParquetHiveInputFormat(final String schemaRequested, final Integer[] arrCheckIndexValues) throws Exception { final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath())); final MessageType schema = readFooter.getFileMetaData().getSchema(); long size = 0; final List<BlockMetaData> blocks = readFooter.getBlocks(); for (final BlockMetaData block : blocks) { size += block.getTotalByteSize(); }//from w w w .jav a 2 s. com final FileInputFormat<Void, ArrayWritable> format = new MapredParquetInputFormat(); final String[] locations = new String[] { "localhost" }; final String schemaToString = schema.toString(); System.out.println(schemaToString); final String specificSchema = schemaRequested == null ? schemaToString : schemaRequested; // Set the configuration parameters final String columnsStr = "message customer {\n" + " optional int32 c_custkey;\n" + " optional binary c_name;\n" + " optional binary c_address;\n" + " optional int32 c_nationkey;\n" + " optional binary c_phone;\n" + " optional double c_acctbal;\n" + " optional binary c_mktsegment;\n" + " optional binary c_comment;\n" + " optional group c_map (MAP_KEY_VALUE) {\n" + " repeated group map {\n" + " required binary key;\n" + " optional binary value;\n" + " }\n" + " }\n" + " optional group c_list (LIST) {\n" + " repeated group bag {\n" + " optional int32 array_element;\n" + " }\n" + " }\n" + " optional int32 unknown;\n" + "}"; final Map<String, String> readSupportMetaData = new HashMap<String, String>(); readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr); final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size, locations, blocks, schemaToString, specificSchema, readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData); final MapredParquetInputFormat.InputSplitWrapper splitWrapper = new InputSplitWrapper(realSplit); // construct the record reader final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter); // create key/value final Void key = reader.createKey(); final ArrayWritable value = reader.createValue(); int count = 0; final int sizeExpected = mapData.size(); while (reader.next(key, value)) { assertTrue(count < sizeExpected); assertTrue(key == null); final Writable[] arrValue = value.get(); final ArrayWritable expected = mapData.get(((IntWritable) arrValue[0]).get()); final Writable[] arrExpected = expected.get(); assertEquals(arrValue.length, arrExpected.length); final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected, arrCheckIndexValues); assertTrue(deepEquals); count++; } System.out.println("nb lines " + count); reader.close(); assertEquals("Number of lines found and data written don't match", count, sizeExpected); }
From source file:parquet.hive.TestMapredParquetOuputFormat.java
License:Apache License
private void checkWrite() throws IOException, InterruptedException { final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath())); final MessageType schema = readFooter.getFileMetaData().getSchema(); long size = 0; final List<BlockMetaData> blocks = readFooter.getBlocks(); for (final BlockMetaData block : blocks) { size += block.getTotalByteSize(); }/*from w w w .j a va 2s . c o m*/ final FileInputFormat<Void, ArrayWritable> format = new MapredParquetInputFormat(); final String[] locations = new String[] { "localhost" }; final String schemaToString = schema.toString(); final String columnsStr = "message customer {\n" + " optional int32 c_custkey;\n" + " optional binary c_name;\n" + " optional binary c_address;\n" + " optional int32 c_nationkey;\n" + " optional binary c_phone;\n" + " optional double c_acctbal;\n" + " optional binary c_mktsegment;\n" + " optional binary c_comment;\n" + " optional group c_map (MAP_KEY_VALUE) {\n" + " repeated group map {\n" + " required binary key;\n" + " optional binary value;\n" + " }\n" + " }\n" + " optional group c_list (LIST) {\n" + " repeated group bag {\n" + " optional int32 array_element;\n" + " }\n" + " }\n" + "}"; final Map<String, String> readSupportMetaData = new HashMap<String, String>(); readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr); final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size, locations, blocks, schemaToString, schemaToString, readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData); final MapredParquetInputFormat.InputSplitWrapper splitWrapper = new MapredParquetInputFormat.InputSplitWrapper( realSplit); // construct the record reader final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter); // create key/value final Void key = reader.createKey(); final ArrayWritable value = reader.createValue(); int count = 0; while (reader.next(key, value)) { assertTrue(count < mapData.size()); assertTrue(key == null); final Writable[] arrValue = value.get(); final Writable[] writableArr = arrValue; final ArrayWritable expected = mapData.get(((IntWritable) writableArr[0]).get()); final Writable[] arrExpected = expected.get(); assertEquals(arrValue.length, 10); final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected, new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); assertTrue(deepEquals); count++; } reader.close(); assertEquals("Number of lines found and data written don't match", count, mapData.size()); }
From source file:parquet.hive.TestParquetSerDe.java
License:Apache License
private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException { // Get the row structure final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); // Deserialize final Object row = serDe.deserialize(t); assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class); assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length); assertEquals("deserialization gives the wrong object", t, row); // Serialize/*from w ww . j a v a 2 s .c o m*/ final ArrayWritable serializedArr = (ArrayWritable) serDe.serialize(row, oi); assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), serializedArr.get().length); assertTrue("serialized object should be equal to starting object", UtilitiesTestMethods.arrayWritableEquals(t, serializedArr)); }
From source file:parquet.hive.write.DataWritableWriter.java
License:Apache License
private void writeData(final ArrayWritable arr, final GroupType type) { if (arr == null) { return;//from w w w . j av a 2 s .c o m } final int fieldCount = type.getFieldCount(); Writable[] values = arr.get(); for (int field = 0; field < fieldCount; ++field) { final Type fieldType = type.getType(field); final String fieldName = fieldType.getName(); final Writable value = values[field]; if (value == null) { continue; } recordConsumer.startField(fieldName, field); if (fieldType.isPrimitive()) { writePrimitive(value); } else { recordConsumer.startGroup(); if (value instanceof ArrayWritable) { if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) { writeArray((ArrayWritable) value, fieldType.asGroupType()); } else { writeData((ArrayWritable) value, fieldType.asGroupType()); } } else if (value != null) { throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value); } recordConsumer.endGroup(); } recordConsumer.endField(fieldName, field); } }
From source file:parquet.hive.write.DataWritableWriter.java
License:Apache License
private void writeArray(final ArrayWritable array, final GroupType type) { if (array == null) { return;/*from ww w . jav a2 s . c o m*/ } final Writable[] subValues = array.get(); final int fieldCount = type.getFieldCount(); for (int field = 0; field < fieldCount; ++field) { final Type subType = type.getType(field); recordConsumer.startField(subType.getName(), field); for (int i = 0; i < subValues.length; ++i) { final Writable subValue = subValues[i]; if (subValue != null) { if (subType.isPrimitive()) { if (subValue instanceof ArrayWritable) { writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ? } else { writePrimitive(subValue); } } else { if (!(subValue instanceof ArrayWritable)) { throw new RuntimeException("This should be a ArrayWritable: " + subValue); } else { recordConsumer.startGroup(); writeData((ArrayWritable) subValue, subType.asGroupType()); recordConsumer.endGroup(); } } } } recordConsumer.endField(subType.getName(), field); } }