Example usage for org.apache.hadoop.io ArrayWritable get

List of usage examples for org.apache.hadoop.io ArrayWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io ArrayWritable get.

Prototype

public Writable[] get() 

Source Link

Usage

From source file:parquet.hive.TestMapredParquetInputFormat.java

License:Apache License

private void readParquetHiveInputFormat(final String schemaRequested, final Integer[] arrCheckIndexValues)
        throws Exception {
    final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath()));
    final MessageType schema = readFooter.getFileMetaData().getSchema();

    long size = 0;
    final List<BlockMetaData> blocks = readFooter.getBlocks();
    for (final BlockMetaData block : blocks) {
        size += block.getTotalByteSize();
    }//from   w w  w .jav  a  2  s.  com

    final FileInputFormat<Void, ArrayWritable> format = new MapredParquetInputFormat();
    final String[] locations = new String[] { "localhost" };
    final String schemaToString = schema.toString();
    System.out.println(schemaToString);

    final String specificSchema = schemaRequested == null ? schemaToString : schemaRequested;

    // Set the configuration parameters
    final String columnsStr = "message customer {\n" + "  optional int32 c_custkey;\n"
            + "  optional binary c_name;\n" + "  optional binary c_address;\n"
            + "  optional int32 c_nationkey;\n" + "  optional binary c_phone;\n"
            + "  optional double c_acctbal;\n" + "  optional binary c_mktsegment;\n"
            + "  optional binary c_comment;\n" + "  optional group c_map (MAP_KEY_VALUE) {\n"
            + "    repeated group map {\n" + "      required binary key;\n" + "      optional binary value;\n"
            + "    }\n" + "  }\n" + "  optional group c_list (LIST) {\n" + "    repeated group bag {\n"
            + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "  optional int32 unknown;\n"
            + "}";

    final Map<String, String> readSupportMetaData = new HashMap<String, String>();
    readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr);
    final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size,
            locations, blocks, schemaToString, specificSchema,
            readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData);

    final MapredParquetInputFormat.InputSplitWrapper splitWrapper = new InputSplitWrapper(realSplit);

    // construct the record reader
    final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter);

    // create key/value
    final Void key = reader.createKey();
    final ArrayWritable value = reader.createValue();

    int count = 0;
    final int sizeExpected = mapData.size();
    while (reader.next(key, value)) {
        assertTrue(count < sizeExpected);
        assertTrue(key == null);
        final Writable[] arrValue = value.get();
        final ArrayWritable expected = mapData.get(((IntWritable) arrValue[0]).get());
        final Writable[] arrExpected = expected.get();
        assertEquals(arrValue.length, arrExpected.length);

        final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected,
                arrCheckIndexValues);

        assertTrue(deepEquals);
        count++;
    }
    System.out.println("nb lines " + count);
    reader.close();

    assertEquals("Number of lines found and data written don't match", count, sizeExpected);
}

From source file:parquet.hive.TestMapredParquetOuputFormat.java

License:Apache License

private void checkWrite() throws IOException, InterruptedException {
    final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath()));
    final MessageType schema = readFooter.getFileMetaData().getSchema();

    long size = 0;
    final List<BlockMetaData> blocks = readFooter.getBlocks();
    for (final BlockMetaData block : blocks) {
        size += block.getTotalByteSize();
    }/*from w w w  .j a  va  2s . c o m*/

    final FileInputFormat<Void, ArrayWritable> format = new MapredParquetInputFormat();
    final String[] locations = new String[] { "localhost" };
    final String schemaToString = schema.toString();
    final String columnsStr = "message customer {\n" + "  optional int32 c_custkey;\n"
            + "  optional binary c_name;\n" + "  optional binary c_address;\n"
            + "  optional int32 c_nationkey;\n" + "  optional binary c_phone;\n"
            + "  optional double c_acctbal;\n" + "  optional binary c_mktsegment;\n"
            + "  optional binary c_comment;\n" + "  optional group c_map (MAP_KEY_VALUE) {\n"
            + "    repeated group map {\n" + "      required binary key;\n" + "      optional binary value;\n"
            + "    }\n" + "  }\n" + "  optional group c_list (LIST) {\n" + "    repeated group bag {\n"
            + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "}";

    final Map<String, String> readSupportMetaData = new HashMap<String, String>();
    readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr);
    final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size,
            locations, blocks, schemaToString, schemaToString,
            readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData);

    final MapredParquetInputFormat.InputSplitWrapper splitWrapper = new MapredParquetInputFormat.InputSplitWrapper(
            realSplit);

    // construct the record reader
    final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter);

    // create key/value
    final Void key = reader.createKey();
    final ArrayWritable value = reader.createValue();

    int count = 0;
    while (reader.next(key, value)) {
        assertTrue(count < mapData.size());
        assertTrue(key == null);
        final Writable[] arrValue = value.get();
        final Writable[] writableArr = arrValue;
        final ArrayWritable expected = mapData.get(((IntWritable) writableArr[0]).get());
        final Writable[] arrExpected = expected.get();
        assertEquals(arrValue.length, 10);

        final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected,
                new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });

        assertTrue(deepEquals);
        count++;
    }
    reader.close();

    assertEquals("Number of lines found and data written don't match", count, mapData.size());

}

From source file:parquet.hive.TestParquetSerDe.java

License:Apache License

private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t)
        throws SerDeException {

    // Get the row structure
    final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();

    // Deserialize
    final Object row = serDe.deserialize(t);
    assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class);
    assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length);
    assertEquals("deserialization gives the wrong object", t, row);

    // Serialize/*from   w ww  .  j  a  v a  2  s  .c  o m*/
    final ArrayWritable serializedArr = (ArrayWritable) serDe.serialize(row, oi);
    assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(),
            serializedArr.get().length);
    assertTrue("serialized object should be equal to starting object",
            UtilitiesTestMethods.arrayWritableEquals(t, serializedArr));
}

From source file:parquet.hive.write.DataWritableWriter.java

License:Apache License

private void writeData(final ArrayWritable arr, final GroupType type) {

    if (arr == null) {
        return;//from  w  w  w . j  av  a  2 s .c o  m
    }

    final int fieldCount = type.getFieldCount();
    Writable[] values = arr.get();
    for (int field = 0; field < fieldCount; ++field) {
        final Type fieldType = type.getType(field);
        final String fieldName = fieldType.getName();
        final Writable value = values[field];
        if (value == null) {
            continue;
        }
        recordConsumer.startField(fieldName, field);

        if (fieldType.isPrimitive()) {
            writePrimitive(value);
        } else {
            recordConsumer.startGroup();
            if (value instanceof ArrayWritable) {
                if (fieldType.asGroupType().getRepetition().equals(Type.Repetition.REPEATED)) {
                    writeArray((ArrayWritable) value, fieldType.asGroupType());
                } else {
                    writeData((ArrayWritable) value, fieldType.asGroupType());
                }
            } else if (value != null) {
                throw new ParquetEncodingException("This should be an ArrayWritable or MapWritable: " + value);
            }

            recordConsumer.endGroup();
        }

        recordConsumer.endField(fieldName, field);
    }
}

From source file:parquet.hive.write.DataWritableWriter.java

License:Apache License

private void writeArray(final ArrayWritable array, final GroupType type) {
    if (array == null) {
        return;/*from  ww  w  .  jav a2  s . c o  m*/
    }

    final Writable[] subValues = array.get();

    final int fieldCount = type.getFieldCount();
    for (int field = 0; field < fieldCount; ++field) {
        final Type subType = type.getType(field);
        recordConsumer.startField(subType.getName(), field);
        for (int i = 0; i < subValues.length; ++i) {
            final Writable subValue = subValues[i];
            if (subValue != null) {
                if (subType.isPrimitive()) {
                    if (subValue instanceof ArrayWritable) {
                        writePrimitive(((ArrayWritable) subValue).get()[field]);// 0 ?
                    } else {
                        writePrimitive(subValue);
                    }
                } else {
                    if (!(subValue instanceof ArrayWritable)) {
                        throw new RuntimeException("This should be a ArrayWritable: " + subValue);
                    } else {
                        recordConsumer.startGroup();
                        writeData((ArrayWritable) subValue, subType.asGroupType());
                        recordConsumer.endGroup();
                    }
                }
            }
        }
        recordConsumer.endField(subType.getName(), field);
    }

}