Example usage for org.apache.hadoop.io ArrayWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io ArrayWritable get.

Prototype

public Writable[] get()

Source Link

Usage

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java

License:Apache License

@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184")
@Test/*from   w  w  w.j  a va  2 s. c o  m*/
public void writeExcelOutputFormatExcel2013SingleSheetOneLinkedWorkbook()
        throws IOException, InterruptedException {
    // write linkedworkbook1
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // write
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    String linkedWB1FileName = "excel2013linkedwb1";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB1FileName);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb1a1);
    writer.write(null, wb1b1);
    writer.write(null, wb1c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write mainworkbook
    linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix;
    String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]";
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "[" + linkedWB1FileName + ".xlsx]Sheet1!B1", "B1",
            "Sheet1"); // should be test2 in the end
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();

    String mainWBfileName = "excel2013singlesheetlinkedwbtestout";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", mainWBfileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context);
    assertNotNull(writerMain, "Format returned  null RecordWriter");
    writerMain.write(null, a1);
    writerMain.write(null, b1);
    writerMain.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // try to read it again
    job = Job.getInstance(conf);
    mainWBfileName = mainWBfileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // enable option to read linked workbooks
    conf.setBoolean("hadoopoffice.read.linkedworkbooks", true);
    conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false);
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\"");
    assertEquals(2, spreadSheetValue.get().length,
            "Input Split for Excel file contains row 1 with 2 columns for Sheet1");
    assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test4\"");
    // this comes from the external workbook
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java

License:Apache License

@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184")
@Test/*from w  ww .  j a  va  2 s  .  co  m*/
public void writeExcelOutputFormatExcel2013SingleSheetTwoLinkedWorkbooks()
        throws IOException, InterruptedException {
    // write linkedworkbook1
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // write
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    String linkedWB1FileName = "excel2013linkedwb1";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB1FileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb1a1);
    writer.write(null, wb1b1);
    writer.write(null, wb1c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write linkedworkbook2
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb2a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb2b1 = new SpreadSheetCellDAO("test5", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb2c1 = new SpreadSheetCellDAO("test6", "", "", "C1", "Sheet1");
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();
    String linkedWB2FileName = "excel2013linkedwb2";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB2FileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.commitTask(context);
    committer.commitJob(jContext);
    outputFormat = new ExcelFileOutputFormat();
    writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb2a1);
    writer.write(null, wb2b1);
    writer.write(null, wb2c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write mainworkbook
    linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix;
    linkedWB2FileName = linkedWB2FileName + this.outputbaseAppendix;
    String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]:[" + tmpDir
            + File.separator + linkedWB2FileName + ".xlsx]";
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test7", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB1FileName + ".xlsx]Sheet1'!B1", "B1",
            "Sheet1"); // should be test2 in the end
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB2FileName + ".xlsx]Sheet1'!B1", "B1",
            "Sheet1"); // should be test5 in the end
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();
    String mainWBfileName = "excel2013singlesheetlinkedwbtestout";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", mainWBfileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);

    outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context);
    assertNotNull(writerMain, "Format returned  null RecordWriter");
    writerMain.write(null, a1);
    writerMain.write(null, b1);
    writerMain.write(null, c1);
    writerMain.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // try to read it again
    job = Job.getInstance(conf);
    mainWBfileName = mainWBfileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // enable option to read linked workbooks
    conf.setBoolean("hadoopoffice.read.linkedworkbooks", true);
    conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false);
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\"");
    assertEquals(2, spreadSheetValue.get().length,
            "Input Split for Excel file contains row 1 with 2 columns for Sheet1");
    assertEquals("test7", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test7\"");
    // this comes from the external workbook
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test5\"");
}

From source file:parquet.hive.serde.AbstractParquetMapInspector.java

License:Apache License

@Override
public Map<?, ?> getMap(final Object data) {
    if (data == null) {
        return null;
    }//  w  w w.  ja v a  2 s .com

    if (data instanceof ArrayWritable) {
        final Writable[] mapContainer = ((ArrayWritable) data).get();

        if (mapContainer == null || mapContainer.length == 0) {
            return null;
        }

        final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();
        final Map<Writable, Writable> map = new HashMap<Writable, Writable>();

        for (final Writable obj : mapArray) {
            final ArrayWritable mapObj = (ArrayWritable) obj;
            final Writable[] arr = mapObj.get();
            map.put(arr[0], arr[1]);
        }

        return map;
    }

    if (data instanceof Map) {
        return (Map) data;
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.serde.ArrayWritableObjectInspector.java

License:Apache License

@Override
public Object getStructFieldData(final Object data, final StructField fieldRef) {
    if (data == null) {
        return null;
    }//from w  w  w.  j  a v  a  2s .co  m

    if (data instanceof ArrayWritable) {
        final ArrayWritable arr = (ArrayWritable) data;
        return arr.get()[((StructFieldImpl) fieldRef).getIndex()];
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.serde.DeepParquetHiveMapInspector.java

License:Apache License

@Override
public Object getMapValueElement(final Object data, final Object key) {
    if (data == null || key == null) {
        return null;
    }/*from w ww .  j a  v  a 2  s .  com*/

    if (data instanceof ArrayWritable) {
        final Writable[] mapContainer = ((ArrayWritable) data).get();

        if (mapContainer == null || mapContainer.length == 0) {
            return null;
        }

        final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();

        for (final Writable obj : mapArray) {
            final ArrayWritable mapObj = (ArrayWritable) obj;
            final Writable[] arr = mapObj.get();
            if (key.equals(arr[0])
                    || key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(arr[0]))
                    || key.equals(
                            ((PrimitiveObjectInspector) keyInspector).getPrimitiveWritableObject(arr[0]))) {
                return arr[1];
            }
        }

        return null;
    }

    if (data instanceof Map) {
        final Map<?, ?> map = (Map<?, ?>) data;

        if (map.containsKey(key)) {
            return map.get(key);
        }

        for (final Map.Entry<?, ?> entry : map.entrySet()) {
            if (key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(entry.getKey()))
                    || key.equals(((PrimitiveObjectInspector) keyInspector)
                            .getPrimitiveWritableObject(entry.getKey()))) {
                return entry.getValue();
            }
        }

        return null;
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.serde.ParquetHiveMapInspector.java

License:Apache License

@Override
public Object getMapValueElement(final Object data, final Object key) {

    if (data == null) {
        return null;
    }/*from ww w.j  a v a 2s  . c  o m*/

    final Writable[] mapContainer = ((ArrayWritable) data).get();
    final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();

    for (final Writable obj : mapArray) {
        final ArrayWritable mapObj = (ArrayWritable) obj;
        final Writable[] arr = mapObj.get();
        if (arr[0] == key) {
            return arr[1];
        }
    }

    return null;
}

From source file:parquet.hive.serde.ParquetHiveMapInspector.java

License:Apache License

@Override
public Map<?, ?> getMap(final Object data) {

    if (data == null) {
        return null;
    }/*  ww w .  j  a v  a2  s .com*/

    final Writable[] mapContainer = ((ArrayWritable) data).get();
    final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();
    final Map<Writable, Writable> map = new HashMap<Writable, Writable>();

    for (final Writable obj : mapArray) {
        final ArrayWritable mapObj = (ArrayWritable) obj;
        final Writable[] arr = mapObj.get();
        map.put(arr[0], arr[1]);
    }

    return map;
}

From source file:parquet.hive.serde.StandardParquetHiveMapInspector.java

License:Apache License

@Override
public Object getMapValueElement(final Object data, final Object key) {
    if (data == null || key == null) {
        return null;
    }// w w  w.ja  va  2  s. com

    if (data instanceof ArrayWritable) {
        final Writable[] mapContainer = ((ArrayWritable) data).get();

        if (mapContainer == null || mapContainer.length == 0) {
            return null;
        }

        final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();

        for (final Writable obj : mapArray) {
            final ArrayWritable mapObj = (ArrayWritable) obj;
            final Writable[] arr = mapObj.get();
            if (key.equals(arr[0])) {
                return arr[1];
            }
        }

        return null;
    }

    if (data instanceof Map) {
        return ((Map) data).get(key);
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.TestDeprecatedParquetInputFormat.java

License:Apache License

private void readParquetHiveInputFormat(final String schemaRequested, final Integer[] arrCheckIndexValues)
        throws Exception {
    final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath()));
    final MessageType schema = readFooter.getFileMetaData().getSchema();

    long size = 0;
    final List<BlockMetaData> blocks = readFooter.getBlocks();
    for (final BlockMetaData block : blocks) {
        size += block.getTotalByteSize();
    }/* w  w w .j ava2s  .c  o m*/

    final FileInputFormat<Void, ArrayWritable> format = new DeprecatedParquetInputFormat();
    final String[] locations = new String[] { "localhost" };
    final String schemaToString = schema.toString();
    System.out.println(schemaToString);

    final String specificSchema = schemaRequested == null ? schemaToString : schemaRequested;

    // Set the configuration parameters
    final String columnsStr = "message customer {\n" + "  optional int32 c_custkey;\n"
            + "  optional binary c_name;\n" + "  optional binary c_address;\n"
            + "  optional int32 c_nationkey;\n" + "  optional binary c_phone;\n"
            + "  optional double c_acctbal;\n" + "  optional binary c_mktsegment;\n"
            + "  optional binary c_comment;\n" + "  optional group c_map (MAP_KEY_VALUE) {\n"
            + "    repeated group map {\n" + "      required binary key;\n" + "      optional binary value;\n"
            + "    }\n" + "  }\n" + "  optional group c_list (LIST) {\n" + "    repeated group bag {\n"
            + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "  optional int32 unknown;\n"
            + "}";

    final Map<String, String> readSupportMetaData = new HashMap<String, String>();
    readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr);
    final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size,
            locations, blocks, schemaToString, specificSchema,
            readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData);

    final DeprecatedParquetInputFormat.InputSplitWrapper splitWrapper = new InputSplitWrapper(realSplit);

    // construct the record reader
    final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter);

    // create key/value
    final Void key = reader.createKey();
    final ArrayWritable value = reader.createValue();

    int count = 0;
    final int sizeExpected = mapData.size();
    while (reader.next(key, value)) {
        assertTrue(count < sizeExpected);
        assertTrue(key == null);
        final Writable[] arrValue = value.get();
        final ArrayWritable expected = mapData.get(((IntWritable) arrValue[0]).get());
        final Writable[] arrExpected = expected.get();
        assertEquals(arrValue.length, arrExpected.length);

        final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected,
                arrCheckIndexValues);

        assertTrue(deepEquals);
        count++;
    }
    System.out.println("nb lines " + count);
    reader.close();

    assertEquals("Number of lines found and data written don't match", count, sizeExpected);
}

From source file:parquet.hive.TestDeprecatedParquetOuputFormat.java

License:Apache License

private void checkWrite() throws IOException, InterruptedException {
    final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath()));
    final MessageType schema = readFooter.getFileMetaData().getSchema();

    long size = 0;
    final List<BlockMetaData> blocks = readFooter.getBlocks();
    for (final BlockMetaData block : blocks) {
        size += block.getTotalByteSize();
    }/* ww  w.java 2  s. co m*/

    final FileInputFormat<Void, ArrayWritable> format = new DeprecatedParquetInputFormat();
    final String[] locations = new String[] { "localhost" };
    final String schemaToString = schema.toString();
    final String columnsStr = "message customer {\n" + "  optional int32 c_custkey;\n"
            + "  optional binary c_name;\n" + "  optional binary c_address;\n"
            + "  optional int32 c_nationkey;\n" + "  optional binary c_phone;\n"
            + "  optional double c_acctbal;\n" + "  optional binary c_mktsegment;\n"
            + "  optional binary c_comment;\n" + "  optional group c_map (MAP_KEY_VALUE) {\n"
            + "    repeated group map {\n" + "      required binary key;\n" + "      optional binary value;\n"
            + "    }\n" + "  }\n" + "  optional group c_list (LIST) {\n" + "    repeated group bag {\n"
            + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "}";

    final Map<String, String> readSupportMetaData = new HashMap<String, String>();
    readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr);
    final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size,
            locations, blocks, schemaToString, schemaToString,
            readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData);

    final DeprecatedParquetInputFormat.InputSplitWrapper splitWrapper = new DeprecatedParquetInputFormat.InputSplitWrapper(
            realSplit);

    // construct the record reader
    final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter);

    // create key/value
    final Void key = reader.createKey();
    final ArrayWritable value = reader.createValue();

    int count = 0;
    while (reader.next(key, value)) {
        assertTrue(count < mapData.size());
        assertTrue(key == null);
        final Writable[] arrValue = value.get();
        final Writable[] writableArr = arrValue;
        final ArrayWritable expected = mapData.get(((IntWritable) writableArr[0]).get());
        final Writable[] arrExpected = expected.get();
        assertEquals(arrValue.length, 10);

        final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected,
                new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });

        assertTrue(deepEquals);
        count++;
    }
    reader.close();

    assertEquals("Number of lines found and data written don't match", count, mapData.size());

}