Example usage for org.apache.hadoop.io ArrayWritable get

List of usage examples for org.apache.hadoop.io ArrayWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io ArrayWritable get.

Prototype

public Writable[] get() 

Source Link

Usage

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java

License:Apache License

@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184")
@Test/*from   w  w  w.j  a va  2 s. c o  m*/
public void writeExcelOutputFormatExcel2013SingleSheetOneLinkedWorkbook()
        throws IOException, InterruptedException {
    // write linkedworkbook1
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // write
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    String linkedWB1FileName = "excel2013linkedwb1";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB1FileName);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb1a1);
    writer.write(null, wb1b1);
    writer.write(null, wb1c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write mainworkbook
    linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix;
    String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]";
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "[" + linkedWB1FileName + ".xlsx]Sheet1!B1", "B1",
            "Sheet1"); // should be test2 in the end
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();

    String mainWBfileName = "excel2013singlesheetlinkedwbtestout";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", mainWBfileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context);
    assertNotNull(writerMain, "Format returned  null RecordWriter");
    writerMain.write(null, a1);
    writerMain.write(null, b1);
    writerMain.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // try to read it again
    job = Job.getInstance(conf);
    mainWBfileName = mainWBfileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // enable option to read linked workbooks
    conf.setBoolean("hadoopoffice.read.linkedworkbooks", true);
    conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false);
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\"");
    assertEquals(2, spreadSheetValue.get().length,
            "Input Split for Excel file contains row 1 with 2 columns for Sheet1");
    assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test4\"");
    // this comes from the external workbook
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java

License:Apache License

@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184")
@Test/*from w  ww .  j a  va  2 s  .  co  m*/
public void writeExcelOutputFormatExcel2013SingleSheetTwoLinkedWorkbooks()
        throws IOException, InterruptedException {
    // write linkedworkbook1
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1");
    // write
    Job job = Job.getInstance();
    Configuration conf = job.getConfiguration();
    String linkedWB1FileName = "excel2013linkedwb1";
    String tmpDir = tmpPath.toString();
    Path outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB1FileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);
    ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb1a1);
    writer.write(null, wb1b1);
    writer.write(null, wb1c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write linkedworkbook2
    // one row string and three columns ("test1","test2","test3")
    // (String formattedValue, String comment, String formula, String address,String
    // sheetName)
    SpreadSheetCellDAO wb2a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO wb2b1 = new SpreadSheetCellDAO("test5", "", "", "B1", "Sheet1");
    SpreadSheetCellDAO wb2c1 = new SpreadSheetCellDAO("test6", "", "", "C1", "Sheet1");
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();
    String linkedWB2FileName = "excel2013linkedwb2";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", linkedWB2FileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.commitTask(context);
    committer.commitJob(jContext);
    outputFormat = new ExcelFileOutputFormat();
    writer = outputFormat.getRecordWriter(context);
    assertNotNull(writer, "Format returned  null RecordWriter");
    writer.write(null, wb2a1);
    writer.write(null, wb2b1);
    writer.write(null, wb2c1);
    writer.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // write mainworkbook
    linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix;
    linkedWB2FileName = linkedWB2FileName + this.outputbaseAppendix;
    String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]:[" + tmpDir
            + File.separator + linkedWB2FileName + ".xlsx]";
    SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test7", "", "", "A1", "Sheet1");
    SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB1FileName + ".xlsx]Sheet1'!B1", "B1",
            "Sheet1"); // should be test2 in the end
    SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB2FileName + ".xlsx]Sheet1'!B1", "B1",
            "Sheet1"); // should be test5 in the end
    // write
    job = Job.getInstance();
    conf = job.getConfiguration();
    String mainWBfileName = "excel2013singlesheetlinkedwbtestout";
    outputPath = new Path(tmpDir);
    FileOutputFormat.setOutputPath(job, outputPath);
    conf.set("mapreduce.output.basename", mainWBfileName);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    conf.set("hadoopoffice.write.mimeType",
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new
    // Excel
    // format,
    // anyway
    // default,
    // but
    // here
    // for
    // illustrative
    // purposes
    conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);
    conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1);
    FileOutputFormat.setOutputPath(job, outputPath);
    jContext = new JobContextImpl(conf, taskID.getJobID());

    context = new TaskAttemptContextImpl(conf, taskID);
    committer = new FileOutputCommitter(outputPath, context);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(context);

    outputFormat = new ExcelFileOutputFormat();
    RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context);
    assertNotNull(writerMain, "Format returned  null RecordWriter");
    writerMain.write(null, a1);
    writerMain.write(null, b1);
    writerMain.write(null, c1);
    writerMain.close(context);
    committer.commitTask(context);
    committer.commitJob(jContext);
    // try to read it again
    job = Job.getInstance(conf);
    mainWBfileName = mainWBfileName + this.outputbaseAppendix;
    Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx");
    FileInputFormat.setInputPaths(job, inputFile);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // enable option to read linked workbooks
    conf.setBoolean("hadoopoffice.read.linkedworkbooks", true);
    conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false);
    ExcelFileInputFormat inputFormat = new ExcelFileInputFormat();
    context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\"");
    assertEquals(2, spreadSheetValue.get().length,
            "Input Split for Excel file contains row 1 with 2 columns for Sheet1");
    assertEquals("test7", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test7\"");
    // this comes from the external workbook
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test5\"");
}

From source file:parquet.hive.serde.AbstractParquetMapInspector.java

License:Apache License

@Override
public Map<?, ?> getMap(final Object data) {
    if (data == null) {
        return null;
    }//  w  w w.  ja v a  2 s .com

    if (data instanceof ArrayWritable) {
        final Writable[] mapContainer = ((ArrayWritable) data).get();

        if (mapContainer == null || mapContainer.length == 0) {
            return null;
        }

        final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();
        final Map<Writable, Writable> map = new HashMap<Writable, Writable>();

        for (final Writable obj : mapArray) {
            final ArrayWritable mapObj = (ArrayWritable) obj;
            final Writable[] arr = mapObj.get();
            map.put(arr[0], arr[1]);
        }

        return map;
    }

    if (data instanceof Map) {
        return (Map) data;
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.serde.ArrayWritableObjectInspector.java

License:Apache License

@Override
public Object getStructFieldData(final Object data, final StructField fieldRef) {
    if (data == null) {
        return null;
    }//from w  w  w.  j  a v  a  2s .co  m

    if (data instanceof ArrayWritable) {
        final ArrayWritable arr = (ArrayWritable) data;
        return arr.get()[((StructFieldImpl) fieldRef).getIndex()];
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.serde.DeepParquetHiveMapInspector.java

License:Apache License

@Override
public Object getMapValueElement(final Object data, final Object key) {
    if (data == null || key == null) {
        return null;
    }/*from w ww .  j a  v  a 2  s .  com*/

    if (data instanceof ArrayWritable) {
        final Writable[] mapContainer = ((ArrayWritable) data).get();

        if (mapContainer == null || mapContainer.length == 0) {
            return null;
        }

        final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();

        for (final Writable obj : mapArray) {
            final ArrayWritable mapObj = (ArrayWritable) obj;
            final Writable[] arr = mapObj.get();
            if (key.equals(arr[0])
                    || key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(arr[0]))
                    || key.equals(
                            ((PrimitiveObjectInspector) keyInspector).getPrimitiveWritableObject(arr[0]))) {
                return arr[1];
            }
        }

        return null;
    }

    if (data instanceof Map) {
        final Map<?, ?> map = (Map<?, ?>) data;

        if (map.containsKey(key)) {
            return map.get(key);
        }

        for (final Map.Entry<?, ?> entry : map.entrySet()) {
            if (key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(entry.getKey()))
                    || key.equals(((PrimitiveObjectInspector) keyInspector)
                            .getPrimitiveWritableObject(entry.getKey()))) {
                return entry.getValue();
            }
        }

        return null;
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.serde.ParquetHiveMapInspector.java

License:Apache License

@Override
public Object getMapValueElement(final Object data, final Object key) {

    if (data == null) {
        return null;
    }/*from ww w.j  a v a 2s  . c  o m*/

    final Writable[] mapContainer = ((ArrayWritable) data).get();
    final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();

    for (final Writable obj : mapArray) {
        final ArrayWritable mapObj = (ArrayWritable) obj;
        final Writable[] arr = mapObj.get();
        if (arr[0] == key) {
            return arr[1];
        }
    }

    return null;
}

From source file:parquet.hive.serde.ParquetHiveMapInspector.java

License:Apache License

@Override
public Map<?, ?> getMap(final Object data) {

    if (data == null) {
        return null;
    }/*  ww w .  j  a v  a2  s .com*/

    final Writable[] mapContainer = ((ArrayWritable) data).get();
    final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();
    final Map<Writable, Writable> map = new HashMap<Writable, Writable>();

    for (final Writable obj : mapArray) {
        final ArrayWritable mapObj = (ArrayWritable) obj;
        final Writable[] arr = mapObj.get();
        map.put(arr[0], arr[1]);
    }

    return map;
}

From source file:parquet.hive.serde.StandardParquetHiveMapInspector.java

License:Apache License

@Override
public Object getMapValueElement(final Object data, final Object key) {
    if (data == null || key == null) {
        return null;
    }// w w  w.ja  va  2  s. com

    if (data instanceof ArrayWritable) {
        final Writable[] mapContainer = ((ArrayWritable) data).get();

        if (mapContainer == null || mapContainer.length == 0) {
            return null;
        }

        final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();

        for (final Writable obj : mapArray) {
            final ArrayWritable mapObj = (ArrayWritable) obj;
            final Writable[] arr = mapObj.get();
            if (key.equals(arr[0])) {
                return arr[1];
            }
        }

        return null;
    }

    if (data instanceof Map) {
        return ((Map) data).get(key);
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}

From source file:parquet.hive.TestDeprecatedParquetInputFormat.java

License:Apache License

private void readParquetHiveInputFormat(final String schemaRequested, final Integer[] arrCheckIndexValues)
        throws Exception {
    final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath()));
    final MessageType schema = readFooter.getFileMetaData().getSchema();

    long size = 0;
    final List<BlockMetaData> blocks = readFooter.getBlocks();
    for (final BlockMetaData block : blocks) {
        size += block.getTotalByteSize();
    }/* w  w w .j ava2s  .c  o m*/

    final FileInputFormat<Void, ArrayWritable> format = new DeprecatedParquetInputFormat();
    final String[] locations = new String[] { "localhost" };
    final String schemaToString = schema.toString();
    System.out.println(schemaToString);

    final String specificSchema = schemaRequested == null ? schemaToString : schemaRequested;

    // Set the configuration parameters
    final String columnsStr = "message customer {\n" + "  optional int32 c_custkey;\n"
            + "  optional binary c_name;\n" + "  optional binary c_address;\n"
            + "  optional int32 c_nationkey;\n" + "  optional binary c_phone;\n"
            + "  optional double c_acctbal;\n" + "  optional binary c_mktsegment;\n"
            + "  optional binary c_comment;\n" + "  optional group c_map (MAP_KEY_VALUE) {\n"
            + "    repeated group map {\n" + "      required binary key;\n" + "      optional binary value;\n"
            + "    }\n" + "  }\n" + "  optional group c_list (LIST) {\n" + "    repeated group bag {\n"
            + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "  optional int32 unknown;\n"
            + "}";

    final Map<String, String> readSupportMetaData = new HashMap<String, String>();
    readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr);
    final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size,
            locations, blocks, schemaToString, specificSchema,
            readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData);

    final DeprecatedParquetInputFormat.InputSplitWrapper splitWrapper = new InputSplitWrapper(realSplit);

    // construct the record reader
    final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter);

    // create key/value
    final Void key = reader.createKey();
    final ArrayWritable value = reader.createValue();

    int count = 0;
    final int sizeExpected = mapData.size();
    while (reader.next(key, value)) {
        assertTrue(count < sizeExpected);
        assertTrue(key == null);
        final Writable[] arrValue = value.get();
        final ArrayWritable expected = mapData.get(((IntWritable) arrValue[0]).get());
        final Writable[] arrExpected = expected.get();
        assertEquals(arrValue.length, arrExpected.length);

        final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected,
                arrCheckIndexValues);

        assertTrue(deepEquals);
        count++;
    }
    System.out.println("nb lines " + count);
    reader.close();

    assertEquals("Number of lines found and data written don't match", count, sizeExpected);
}

From source file:parquet.hive.TestDeprecatedParquetOuputFormat.java

License:Apache License

private void checkWrite() throws IOException, InterruptedException {
    final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath()));
    final MessageType schema = readFooter.getFileMetaData().getSchema();

    long size = 0;
    final List<BlockMetaData> blocks = readFooter.getBlocks();
    for (final BlockMetaData block : blocks) {
        size += block.getTotalByteSize();
    }/* ww  w.java 2  s. co m*/

    final FileInputFormat<Void, ArrayWritable> format = new DeprecatedParquetInputFormat();
    final String[] locations = new String[] { "localhost" };
    final String schemaToString = schema.toString();
    final String columnsStr = "message customer {\n" + "  optional int32 c_custkey;\n"
            + "  optional binary c_name;\n" + "  optional binary c_address;\n"
            + "  optional int32 c_nationkey;\n" + "  optional binary c_phone;\n"
            + "  optional double c_acctbal;\n" + "  optional binary c_mktsegment;\n"
            + "  optional binary c_comment;\n" + "  optional group c_map (MAP_KEY_VALUE) {\n"
            + "    repeated group map {\n" + "      required binary key;\n" + "      optional binary value;\n"
            + "    }\n" + "  }\n" + "  optional group c_list (LIST) {\n" + "    repeated group bag {\n"
            + "      optional int32 array_element;\n" + "    }\n" + "  }\n" + "}";

    final Map<String, String> readSupportMetaData = new HashMap<String, String>();
    readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr);
    final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size,
            locations, blocks, schemaToString, schemaToString,
            readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData);

    final DeprecatedParquetInputFormat.InputSplitWrapper splitWrapper = new DeprecatedParquetInputFormat.InputSplitWrapper(
            realSplit);

    // construct the record reader
    final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter);

    // create key/value
    final Void key = reader.createKey();
    final ArrayWritable value = reader.createValue();

    int count = 0;
    while (reader.next(key, value)) {
        assertTrue(count < mapData.size());
        assertTrue(key == null);
        final Writable[] arrValue = value.get();
        final Writable[] writableArr = arrValue;
        final ArrayWritable expected = mapData.get(((IntWritable) writableArr[0]).get());
        final Writable[] arrExpected = expected.get();
        assertEquals(arrValue.length, 10);

        final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected,
                new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });

        assertTrue(deepEquals);
        count++;
    }
    reader.close();

    assertEquals("Number of lines found and data written don't match", count, mapData.size());

}