List of usage examples for org.apache.hadoop.io ArrayWritable get
public Writable[] get()
From source file:org.zuinnote.hadoop.office.format.mapred.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetEncryptedPositiveSignedPositive() throws IOException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write//from w w w .jav a 2 s .c o m JobConf job = new JobConf(defaultConf); String fileName = "excel2013singlesheettestoutencryptedpositivesignedpositive"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); // set generic outputformat settings job.set(JobContext.TASK_ATTEMPT_ID, attempt); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); job.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // excel // format // security // for the new Excel format you need to decide on your own which algorithms are // secure job.set("hadoopoffice.write.security.crypt.encrypt.mode", "agile"); job.set("hadoopoffice.write.security.crypt.encrypt.algorithm", "aes256"); job.set("hadoopoffice.write.security.crypt.chain.mode", "cbc"); job.set("hadoopoffice.write.security.crypt.hash.algorithm", "sha512"); job.set("hadoopoffice.write.security.crypt.password", "test"); /// signature String pkFileName = "testsigning.pfx"; // private key ClassLoader classLoader = getClass().getClassLoader(); String fileNameKeyStore = classLoader.getResource(pkFileName).getFile(); job.set("hadoopoffice.write.security.sign.keystore.file", fileNameKeyStore); job.set("hadoopoffice.write.security.sign.keystore.type", "PKCS12"); job.set("hadoopoffice.write.security.sign.keystore.password", "changeit"); job.set("hadoopoffice.write.security.sign.keystore.alias", "testalias"); job.set("hadoopoffice.write.security.sign.hash.algorithm", "sha512"); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, fileName, null); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(reporter); // try to read it again job = new JobConf(defaultConf); Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + "_temporary" + File.separator + attempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // you just need to provide the password to read encrypted data job.set("hadoopoffice.read.security.crypt.password", "test"); job.set("hadoopoffice.read.security.sign.verifysignature", "true"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); inputFormat.configure(job); InputSplit[] inputSplits = inputFormat.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter); assertNotNull(reader, "Format returned null RecordReader"); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 1"); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 2"); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 3"); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 4"); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }
From source file:org.zuinnote.hadoop.office.format.mapred.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184") @Test/* ww w . ja va 2s .c o m*/ public void writeExcelOutputFormatExcel2013SingleSheetOneLinkedWorkbook() throws IOException { // write linkedworkbook1 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write JobConf job = new JobConf(defaultConf); String linkedWB1FileName = "excel2013linkedwb1"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); // set generic outputformat settings job.set(JobContext.TASK_ATTEMPT_ID, attempt); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); job.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, linkedWB1FileName, null); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb1a1); writer.write(null, wb1b1); writer.write(null, wb1c1); writer.close(reporter); // write mainworkbook String linkedWorkbookFilename = "[" + tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + "_temporary" + File.separator + attempt + File.separator + linkedWB1FileName + ".xlsx]"; SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "[" + linkedWB1FileName + ".xlsx]Sheet1!B1", "B1", "Sheet1"); // should be test2 in the end // write job = new JobConf(defaultConf); String mainWBfileName = "excel2013singlesheetlinkedwbtestout"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); // set generic outputformat settings job.set(JobContext.TASK_ATTEMPT_ID, attempt); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); job.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes job.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename); outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(null, job, mainWBfileName, null); assertNotNull(writerMain, "Format returned null RecordWriter"); writerMain.write(null, a1); writerMain.write(null, b1); writerMain.close(reporter); // try to read it again job = new JobConf(defaultConf); Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + "_temporary" + File.separator + attempt + File.separator + mainWBfileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // enable option to read linked workbooks job.setBoolean("hadoopoffice.read.linkedworkbooks", true); job.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); inputFormat.configure(job); InputSplit[] inputSplits = inputFormat.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter); assertNotNull(reader, "Format returned null RecordReader"); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 1 Sheet1"); assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns for Sheet1"); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test4\""); // this comes from the external workbook assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); }
From source file:org.zuinnote.hadoop.office.format.mapred.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Disabled("This does not work yet due to a bug in Apache POI that prevents writing correct workbooks containing external references: https://bz.apache.org/bugzilla/show_bug.cgi?id=57184") @Test// w w w . j a v a2s .c om public void writeExcelOutputFormatExcel2013SingleSheetTwoLinkedWorkbooks() throws IOException { // write linkedworkbook1 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write JobConf job = new JobConf(defaultConf); String linkedWB1FileName = "excel2013linkedwb1"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); // set generic outputformat settings job.set(JobContext.TASK_ATTEMPT_ID, attempt); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); job.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(null, job, linkedWB1FileName, null); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb1a1); writer.write(null, wb1b1); writer.write(null, wb1c1); writer.close(reporter); // write linkedworkbook2 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb2a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb2b1 = new SpreadSheetCellDAO("test5", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb2c1 = new SpreadSheetCellDAO("test6", "", "", "C1", "Sheet1"); // write job = new JobConf(defaultConf); String linkedWB2FileName = "excel2013linkedwb2"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); // set generic outputformat settings job.set(JobContext.TASK_ATTEMPT_ID, attempt); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); job.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes outputFormat = new ExcelFileOutputFormat(); writer = outputFormat.getRecordWriter(null, job, linkedWB2FileName, null); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb2a1); writer.write(null, wb2b1); writer.write(null, wb2c1); writer.close(reporter); // write mainworkbook String linkedWorkbookFilename = "[" + tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + "_temporary" + File.separator + attempt + File.separator + linkedWB1FileName + ".xlsx]:[" + tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + "_temporary" + File.separator + attempt + File.separator + linkedWB2FileName + ".xlsx]"; SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test7", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB1FileName + ".xlsx]Sheet1'!B1", "B1", "Sheet1"); // should be test2 in the end SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("", "", "'[" + linkedWB2FileName + ".xlsx]Sheet1'!B1", "B1", "Sheet1"); // should be test5 in the end // write job = new JobConf(defaultConf); String mainWBfileName = "excel2013singlesheetlinkedwbtestout"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); // set generic outputformat settings job.set(JobContext.TASK_ATTEMPT_ID, attempt); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); job.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes job.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename); outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(null, job, mainWBfileName, null); assertNotNull(writerMain, "Format returned null RecordWriter"); writerMain.write(null, a1); writerMain.write(null, b1); writerMain.write(null, c1); writerMain.close(reporter); // try to read it again job = new JobConf(defaultConf); Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + "_temporary" + File.separator + attempt + File.separator + mainWBfileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // enable option to read linked workbooks job.setBoolean("hadoopoffice.read.linkedworkbooks", true); job.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); inputFormat.configure(job); InputSplit[] inputSplits = inputFormat.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.getRecordReader(inputSplits[0], job, reporter); assertNotNull(reader, "Format returned null RecordReader"); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 1 Sheet1"); assertEquals("[" + mainWBfileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xlsx]Sheet1!A1\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns for Sheet1"); assertEquals("test7", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test7\""); // this comes from the external workbook assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test5\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.AbstractSpreadSheetDocumentRecordWriter.java
License:Apache License
/** * * Write SpreadSheetDAO into a table document. Note this does not necessarily mean it is already written in the OutputStream, but usually the in-memory representation. * @param key is ignored//w ww . ja va2s . co m * @param value is a SpreadSheet Cell to be inserted into the table document * */ @Override public synchronized void write(NullWritable key, K value) throws IOException { try { if (value == null) { return; } if (value instanceof ArrayWritable) { ArrayWritable row = (ArrayWritable) value; Writable[] rowCellDAO = row.get(); for (int i = 0; i < rowCellDAO.length; i++) { this.officeWriter.write(rowCellDAO[i]); } } else { this.officeWriter.write(value); } } catch (OfficeWriterException e) { LOG.error(e); } }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void readExcelInputFormatExcel2013MultiSheetHeaderLowFootPrint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "multisheetheader.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.locale.bcp47", "us"); conf.set("hadoopoffice.read.header.read", "true"); conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true"); conf.set("hadoopoffice.read.lowFootprint", "true"); conf.set("hadoopoffice.read.lowFootprint.parser", "sax"); Job job = Job.getInstance(conf);// w ww.j ava 2s . c om FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); assertEquals("column1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0], " header column 1 correctly read"); assertEquals("column2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1], " header column 2 correctly read"); assertEquals("column3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2], " header column 3 correctly read"); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); // First Sheet assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A2 = \"1\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula"); assertEquals("A2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "B2 = \"test1\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula"); assertEquals("B2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet"); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C2 = \"10\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula"); assertEquals("C2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A3 = \"2\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula"); assertEquals("A3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet"); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "B3 = \"test3\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula"); assertEquals("B3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet"); assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C3 = \"15\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula"); assertEquals("C3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A4 = \"10\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula"); assertEquals("A4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet"); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "B4 = \"test2\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula"); assertEquals("B4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet"); assertEquals("20", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C4 = \"20\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula"); assertEquals("C4", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address"); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 (Second Sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("50", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A2 = \"50\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula"); assertEquals("A2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "B2 = \"test1\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula"); assertEquals("B2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet"); assertEquals("80", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C2 = \"80\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula"); assertEquals("C2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2 (Second Sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("60", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A3 = \"60\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula"); assertEquals("A3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet"); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "B3 = \"test3\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula"); assertEquals("B3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet"); assertEquals("90", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C3 = \"90\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula"); assertEquals("C3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3 (Second Sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("70", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A4 = \"70\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula"); assertEquals("A4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet"); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "B4 = \"test2\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula"); assertEquals("B4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet"); assertEquals("10000", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C4 = \"10000\""); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment"); assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula"); assertEquals("C4", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address"); assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet"); assertFalse(reader.nextKeyValue(), "Input Split for Excel file contains no further row"); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void readExcelInputFormatExcel2003SingleSheetLowFootPrint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2003test.xls"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); Job job = Job.getInstance(conf);/*from w ww .j av a 2s . co m*/ FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[excel2003test.xls]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2003test.xls]Sheet1!A1\""); assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 4 == \"test4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column"); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 1 == \"4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns"); assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\""); assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"5\""); assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null"); assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null"); assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 5 == \"null\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 4 with cell 1 == \"1\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns"); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 1 == \"2\""); assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 2== \"6\""); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 3== \"10\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 1 == \"3\""); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 2== \"4\""); assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 3== \"15\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void readExcelInputFormatExcel2003MultiSheetAllLowFootPrint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2003testmultisheet.xls"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); Job job = Job.getInstance(conf);// w w w . jav a2 s . c o m FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[excel2003testmultisheet.xls]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2003testmultisheet.xls]Sheet1!A1\""); assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 4 == \"test4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column"); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 1 == \"4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns"); assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\""); assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"5\""); assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null"); assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null"); assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 5 == \"null\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 4 with cell 1 == \"1\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns"); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 1 == \"2\""); assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 2== \"6\""); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 3== \"10\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 1 == \"3\""); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 2== \"4\""); assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 3== \"15\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 7 (second sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("8", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 7 with cell 1 == \"8\""); assertEquals("99", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 7 with cell 2 == \"99\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 7 with 2 columns"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 8 (second sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 8 with 1 column"); assertEquals("test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 8 with cell 1 == \"test\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 9 (second sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 9 with 3 columns"); assertNull(spreadSheetValue.get()[0], "Input Split for Excel file contains row 9 with cell 1 == null"); assertNull(spreadSheetValue.get()[1], "Input Split for Excel file contains row 9 with cell 2 == null"); assertEquals("seven", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 9 with cell 3 == \"seven\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void readExcelInputFormatExcel2013SingleSheetLowFootPrint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013test.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); conf.set("hadoopoffice.read.lowFootprint.parser", "sax"); Job job = Job.getInstance(conf);//from w w w .j av a 2 s .c om FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[excel2013test.xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2013test.xlsx]Sheet1!A1\""); assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 4 == \"test4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column"); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 1 == \"4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns"); assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\""); assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"5\""); assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null"); assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null"); assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 5 == \"null\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 4 with cell 1 == \"1\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns"); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 1 == \"2\""); assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 2== \"6\""); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 3== \"10\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 1 == \"3\""); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 2== \"4\""); assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 3== \"15\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void readExcelInputFormatExcel2013MultiSheetAllLowFootPrint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013testmultisheet.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); conf.set("hadoopoffice.read.lowFootprint.parser", "sax"); Job job = Job.getInstance(conf);/* w ww .j av a 2 s.co m*/ FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[excel2013testmultisheet.xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2013testmultisheet.xlsx]Sheet1!A1\""); assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 4 == \"test4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column"); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 1 == \"4\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns"); assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\""); assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"5\""); assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null"); assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null"); assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 5 == \"null\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 4 with cell 1 == \"1\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns"); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 1 == \"2\""); assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 2== \"6\""); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 3== \"10\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6 (first sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 1 == \"3\""); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 2== \"4\""); assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 3== \"15\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 7 (second sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("8", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 7 with cell 1 == \"8\""); assertEquals("99", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 7 with cell 2 == \"99\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 7 with 2 columns"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 8 (second sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 8 with 1 column"); assertEquals("test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 8 with cell 1 == \"test\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 9 (second sheet)"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 9 with 3 columns"); assertNull(spreadSheetValue.get()[0], "Input Split for Excel file contains row 9 with cell 1 == null"); assertNull(spreadSheetValue.get()[1], "Input Split for Excel file contains row 9 with cell 2 == null"); assertEquals("seven", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 9 with cell 3 == \"seven\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void readExcelInputFormatExcel2003SingleSheetEncryptedPositiveLowFootprint() throws IOException, InterruptedException { Configuration conf = new Configuration(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2003encrypt.xls"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.read.lowFootprint", "true"); // for decryption simply set the password conf.set("hadoopoffice.read.security.crypt.password", "test"); Job job = Job.getInstance(conf);/*ww w . ja v a 2 s . c o m*/ FileInputFormat.setInputPaths(job, file); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); ExcelFileInputFormat format = new ExcelFileInputFormat(); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[excel2003encrypt.xls]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2003encrypt.xls]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); }