List of usage examples for org.apache.hadoop.io ArrayWritable get
public Writable[] get()
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184") @Test/*from w w w.j a va 2 s. c o m*/ public void writeExcelOutputFormatExcel2013SingleSheetOneLinkedWorkbook() throws IOException, InterruptedException { // write linkedworkbook1 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String linkedWB1FileName = "excel2013linkedwb1"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", linkedWB1FileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb1a1); writer.write(null, wb1b1); writer.write(null, wb1c1); writer.close(context); committer.commitTask(context); committer.commitJob(jContext); // write mainworkbook linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix; String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]"; SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "[" + linkedWB1FileName + ".xlsx]Sheet1!B1", "B1", "Sheet1"); // should be test2 in the end // write job = Job.getInstance(); conf = job.getConfiguration(); String mainWBfileName = "excel2013singlesheetlinkedwbtestout"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", mainWBfileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); jContext = new JobContextImpl(conf, taskID.getJobID()); context = new TaskAttemptContextImpl(conf, taskID); committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context); assertNotNull(writerMain, "Format returned null RecordWriter"); writerMain.write(null, a1); writerMain.write(null, b1); writerMain.close(context); committer.commitTask(context); committer.commitJob(jContext); // try to read it again job = Job.getInstance(conf); mainWBfileName = mainWBfileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // enable option to read linked workbooks conf.setBoolean("hadoopoffice.read.linkedworkbooks", true); conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns for Sheet1"); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test4\""); // this comes from the external workbook assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184") @Test/*from w ww . j a va 2 s . co m*/ public void writeExcelOutputFormatExcel2013SingleSheetTwoLinkedWorkbooks() throws IOException, InterruptedException { // write linkedworkbook1 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String linkedWB1FileName = "excel2013linkedwb1"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", linkedWB1FileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb1a1); writer.write(null, wb1b1); writer.write(null, wb1c1); writer.close(context); committer.commitTask(context); committer.commitJob(jContext); // write linkedworkbook2 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb2a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb2b1 = new SpreadSheetCellDAO("test5", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb2c1 = new SpreadSheetCellDAO("test6", "", "", "C1", "Sheet1"); // write job = Job.getInstance(); conf = job.getConfiguration(); String linkedWB2FileName = "excel2013linkedwb2"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", linkedWB2FileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); jContext = new JobContextImpl(conf, taskID.getJobID()); context = new TaskAttemptContextImpl(conf, taskID); committer = new FileOutputCommitter(outputPath, context); // setup committer.commitTask(context); committer.commitJob(jContext); outputFormat = new ExcelFileOutputFormat(); writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb2a1); writer.write(null, wb2b1); writer.write(null, wb2c1); writer.close(context); committer.commitTask(context); committer.commitJob(jContext); // write mainworkbook linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix; linkedWB2FileName = linkedWB2FileName + this.outputbaseAppendix; String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xlsx]:[" + tmpDir + File.separator + linkedWB2FileName + ".xlsx]"; SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test7", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB1FileName + ".xlsx]Sheet1'!B1", "B1", "Sheet1"); // should be test2 in the end SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB2FileName + ".xlsx]Sheet1'!B1", "B1", "Sheet1"); // should be test5 in the end // write job = Job.getInstance(); conf = job.getConfiguration(); String mainWBfileName = "excel2013singlesheetlinkedwbtestout"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", mainWBfileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); jContext = new JobContextImpl(conf, taskID.getJobID()); context = new TaskAttemptContextImpl(conf, taskID); committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context); assertNotNull(writerMain, "Format returned null RecordWriter"); writerMain.write(null, a1); writerMain.write(null, b1); writerMain.write(null, c1); writerMain.close(context); committer.commitTask(context); committer.commitJob(jContext); // try to read it again job = Job.getInstance(conf); mainWBfileName = mainWBfileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // enable option to read linked workbooks conf.setBoolean("hadoopoffice.read.linkedworkbooks", true); conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns for Sheet1"); assertEquals("test7", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test7\""); // this comes from the external workbook assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test5\""); }
From source file:parquet.hive.serde.AbstractParquetMapInspector.java
License:Apache License
@Override public Map<?, ?> getMap(final Object data) { if (data == null) { return null; }// w w w. ja v a 2 s .com if (data instanceof ArrayWritable) { final Writable[] mapContainer = ((ArrayWritable) data).get(); if (mapContainer == null || mapContainer.length == 0) { return null; } final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); final Map<Writable, Writable> map = new HashMap<Writable, Writable>(); for (final Writable obj : mapArray) { final ArrayWritable mapObj = (ArrayWritable) obj; final Writable[] arr = mapObj.get(); map.put(arr[0], arr[1]); } return map; } if (data instanceof Map) { return (Map) data; } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
From source file:parquet.hive.serde.ArrayWritableObjectInspector.java
License:Apache License
@Override public Object getStructFieldData(final Object data, final StructField fieldRef) { if (data == null) { return null; }//from w w w. j a v a 2s .co m if (data instanceof ArrayWritable) { final ArrayWritable arr = (ArrayWritable) data; return arr.get()[((StructFieldImpl) fieldRef).getIndex()]; } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
From source file:parquet.hive.serde.DeepParquetHiveMapInspector.java
License:Apache License
@Override public Object getMapValueElement(final Object data, final Object key) { if (data == null || key == null) { return null; }/*from w ww . j a v a 2 s . com*/ if (data instanceof ArrayWritable) { final Writable[] mapContainer = ((ArrayWritable) data).get(); if (mapContainer == null || mapContainer.length == 0) { return null; } final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); for (final Writable obj : mapArray) { final ArrayWritable mapObj = (ArrayWritable) obj; final Writable[] arr = mapObj.get(); if (key.equals(arr[0]) || key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(arr[0])) || key.equals( ((PrimitiveObjectInspector) keyInspector).getPrimitiveWritableObject(arr[0]))) { return arr[1]; } } return null; } if (data instanceof Map) { final Map<?, ?> map = (Map<?, ?>) data; if (map.containsKey(key)) { return map.get(key); } for (final Map.Entry<?, ?> entry : map.entrySet()) { if (key.equals(((PrimitiveObjectInspector) keyInspector).getPrimitiveJavaObject(entry.getKey())) || key.equals(((PrimitiveObjectInspector) keyInspector) .getPrimitiveWritableObject(entry.getKey()))) { return entry.getValue(); } } return null; } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
From source file:parquet.hive.serde.ParquetHiveMapInspector.java
License:Apache License
@Override public Object getMapValueElement(final Object data, final Object key) { if (data == null) { return null; }/*from ww w.j a v a 2s . c o m*/ final Writable[] mapContainer = ((ArrayWritable) data).get(); final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); for (final Writable obj : mapArray) { final ArrayWritable mapObj = (ArrayWritable) obj; final Writable[] arr = mapObj.get(); if (arr[0] == key) { return arr[1]; } } return null; }
From source file:parquet.hive.serde.ParquetHiveMapInspector.java
License:Apache License
@Override public Map<?, ?> getMap(final Object data) { if (data == null) { return null; }/* ww w . j a v a2 s .com*/ final Writable[] mapContainer = ((ArrayWritable) data).get(); final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); final Map<Writable, Writable> map = new HashMap<Writable, Writable>(); for (final Writable obj : mapArray) { final ArrayWritable mapObj = (ArrayWritable) obj; final Writable[] arr = mapObj.get(); map.put(arr[0], arr[1]); } return map; }
From source file:parquet.hive.serde.StandardParquetHiveMapInspector.java
License:Apache License
@Override public Object getMapValueElement(final Object data, final Object key) { if (data == null || key == null) { return null; }// w w w.ja va 2 s. com if (data instanceof ArrayWritable) { final Writable[] mapContainer = ((ArrayWritable) data).get(); if (mapContainer == null || mapContainer.length == 0) { return null; } final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get(); for (final Writable obj : mapArray) { final ArrayWritable mapObj = (ArrayWritable) obj; final Writable[] arr = mapObj.get(); if (key.equals(arr[0])) { return arr[1]; } } return null; } if (data instanceof Map) { return ((Map) data).get(key); } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); }
From source file:parquet.hive.TestDeprecatedParquetInputFormat.java
License:Apache License
private void readParquetHiveInputFormat(final String schemaRequested, final Integer[] arrCheckIndexValues) throws Exception { final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath())); final MessageType schema = readFooter.getFileMetaData().getSchema(); long size = 0; final List<BlockMetaData> blocks = readFooter.getBlocks(); for (final BlockMetaData block : blocks) { size += block.getTotalByteSize(); }/* w w w .j ava2s .c o m*/ final FileInputFormat<Void, ArrayWritable> format = new DeprecatedParquetInputFormat(); final String[] locations = new String[] { "localhost" }; final String schemaToString = schema.toString(); System.out.println(schemaToString); final String specificSchema = schemaRequested == null ? schemaToString : schemaRequested; // Set the configuration parameters final String columnsStr = "message customer {\n" + " optional int32 c_custkey;\n" + " optional binary c_name;\n" + " optional binary c_address;\n" + " optional int32 c_nationkey;\n" + " optional binary c_phone;\n" + " optional double c_acctbal;\n" + " optional binary c_mktsegment;\n" + " optional binary c_comment;\n" + " optional group c_map (MAP_KEY_VALUE) {\n" + " repeated group map {\n" + " required binary key;\n" + " optional binary value;\n" + " }\n" + " }\n" + " optional group c_list (LIST) {\n" + " repeated group bag {\n" + " optional int32 array_element;\n" + " }\n" + " }\n" + " optional int32 unknown;\n" + "}"; final Map<String, String> readSupportMetaData = new HashMap<String, String>(); readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr); final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size, locations, blocks, schemaToString, specificSchema, readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData); final DeprecatedParquetInputFormat.InputSplitWrapper splitWrapper = new InputSplitWrapper(realSplit); // construct the record reader final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter); // create key/value final Void key = reader.createKey(); final ArrayWritable value = reader.createValue(); int count = 0; final int sizeExpected = mapData.size(); while (reader.next(key, value)) { assertTrue(count < sizeExpected); assertTrue(key == null); final Writable[] arrValue = value.get(); final ArrayWritable expected = mapData.get(((IntWritable) arrValue[0]).get()); final Writable[] arrExpected = expected.get(); assertEquals(arrValue.length, arrExpected.length); final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected, arrCheckIndexValues); assertTrue(deepEquals); count++; } System.out.println("nb lines " + count); reader.close(); assertEquals("Number of lines found and data written don't match", count, sizeExpected); }
From source file:parquet.hive.TestDeprecatedParquetOuputFormat.java
License:Apache License
private void checkWrite() throws IOException, InterruptedException { final ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, new Path(testFile.getAbsolutePath())); final MessageType schema = readFooter.getFileMetaData().getSchema(); long size = 0; final List<BlockMetaData> blocks = readFooter.getBlocks(); for (final BlockMetaData block : blocks) { size += block.getTotalByteSize(); }/* ww w.java 2 s. co m*/ final FileInputFormat<Void, ArrayWritable> format = new DeprecatedParquetInputFormat(); final String[] locations = new String[] { "localhost" }; final String schemaToString = schema.toString(); final String columnsStr = "message customer {\n" + " optional int32 c_custkey;\n" + " optional binary c_name;\n" + " optional binary c_address;\n" + " optional int32 c_nationkey;\n" + " optional binary c_phone;\n" + " optional double c_acctbal;\n" + " optional binary c_mktsegment;\n" + " optional binary c_comment;\n" + " optional group c_map (MAP_KEY_VALUE) {\n" + " repeated group map {\n" + " required binary key;\n" + " optional binary value;\n" + " }\n" + " }\n" + " optional group c_list (LIST) {\n" + " repeated group bag {\n" + " optional int32 array_element;\n" + " }\n" + " }\n" + "}"; final Map<String, String> readSupportMetaData = new HashMap<String, String>(); readSupportMetaData.put(DataWritableReadSupport.HIVE_SCHEMA_KEY, columnsStr); final ParquetInputSplit realSplit = new ParquetInputSplit(new Path(testFile.getAbsolutePath()), 0, size, locations, blocks, schemaToString, schemaToString, readFooter.getFileMetaData().getKeyValueMetaData(), readSupportMetaData); final DeprecatedParquetInputFormat.InputSplitWrapper splitWrapper = new DeprecatedParquetInputFormat.InputSplitWrapper( realSplit); // construct the record reader final RecordReader<Void, ArrayWritable> reader = format.getRecordReader(splitWrapper, job, reporter); // create key/value final Void key = reader.createKey(); final ArrayWritable value = reader.createValue(); int count = 0; while (reader.next(key, value)) { assertTrue(count < mapData.size()); assertTrue(key == null); final Writable[] arrValue = value.get(); final Writable[] writableArr = arrValue; final ArrayWritable expected = mapData.get(((IntWritable) writableArr[0]).get()); final Writable[] arrExpected = expected.get(); assertEquals(arrValue.length, 10); final boolean deepEquals = UtilitiesTestMethods.smartCheckArray(arrValue, arrExpected, new Integer[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); assertTrue(deepEquals); count++; } reader.close(); assertEquals("Number of lines found and data written don't match", count, mapData.size()); }