List of usage examples for org.apache.hadoop.mapreduce MRJobConfig TASK_ATTEMPT_ID
String TASK_ATTEMPT_ID
To view the source code for org.apache.hadoop.mapreduce MRJobConfig TASK_ATTEMPT_ID.
Click Source Link
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetMetaDataMatchOnceNegative() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // the idea here is to have some content although we only evaluate metadata SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write/* w w w .j a v a 2 s . c o m*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheetmetadatanativeoncetestout"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes // set all the meta data including to custom properties conf.set("hadoopoffice.write.metadata.category", "dummycategory"); conf.set("hadoopoffice.write.metadata.contentstatus", "dummycontentstatus"); conf.set("hadoopoffice.write.metadata.contenttype", "dummycontenttype"); conf.set("hadoopoffice.write.metadata.created", "12:00:00 01.01.2016"); conf.set("hadoopoffice.write.metadata.creator", "dummycreator"); conf.set("hadoopoffice.write.metadata.description", "dummydescription"); conf.set("hadoopoffice.write.metadata.identifier", "dummyidentifier"); conf.set("hadoopoffice.write.metadata.keywords", "dummykeywords"); conf.set("hadoopoffice.write.metadata.lastmodifiedbyuser", "dummylastmodifiedbyuser"); conf.set("hadoopoffice.write.metadata.lastprinted", "12:00:00 01.01.2016"); conf.set("hadoopoffice.write.metadata.modified", "12:00:00 01.01.2016"); conf.set("hadoopoffice.write.metadata.lastmodifiedbyuser", "dummylastmodifiedbyuser"); conf.set("hadoopoffice.write.metadata.revision", "2"); conf.set("hadoopoffice.write.metadata.subject", "dummysubject"); conf.set("hadoopoffice.write.metadata.title", "dummytitle"); conf.set("hadoopoffice.write.metadata.custom.mycustomproperty1", "dummymycustomproperty1"); conf.set("hadoopoffice.write.metadata.custom.mycustomproperty2", "dummymycustomproperty2"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // set metadata to match all conf.set("hadoopoffice.read.filter.metadata.matchAll", "false"); // following filter conf.set("hadoopoffice.read.filter.metadata.category", "dummycategory2"); conf.set("hadoopoffice.read.filter.metadata.contentstatus", "dummycontentstatus2"); conf.set("hadoopoffice.read.filter.metadata.contenttype", "dummycontenttype2"); conf.set("hadoopoffice.read.filter.metadata.created", "12:00:00 01.01.2017"); conf.set("hadoopoffice.read.filter.metadata.creator", "dummycreator2"); conf.set("hadoopoffice.read.filter.metadata.description", "dummydescription2"); conf.set("hadoopoffice.read.filter.metadata.identifier", "dummyidentifier2"); conf.set("hadoopoffice.read.filter.metadata.keywords", "dummykeywords2"); conf.set("hadoopoffice.read.filter.metadata.lastmodifiedbyuser", "dummylastmodifiedbyuser2"); conf.set("hadoopoffice.read.filter.metadata.lastprinted", "12:00:00 01.01.2017"); conf.set("hadoopoffice.read.filter.metadata.modified", "12:00:00 01.01.2017"); conf.set("hadoopoffice.read.filter.metadata.lastmodifiedbyuser", "dummylastmodifiedbyuser2"); conf.set("hadoopoffice.read.filter.metadata.revision", "3"); conf.set("hadoopoffice.read.filter.metadata.subject", "dummysubject2"); conf.set("hadoopoffice.read.filter.metadata.title", "dummytitle2"); conf.set("hadoopoffice.read.filter.metadata.custom.mycustomproperty1", "dummymycustomproperty12"); conf.set("hhadoopoffice.read.filter.metadata.custom.mycustomproperty2", "dummymycustomproperty22"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); // if following assertion is true that means the document has (wrongly) NOT been // filtered out assertFalse(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2003SingleSheetMetaDataMatchOnceNegative() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // the idea here is to have some content although we only evaluate metadata SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write//from w ww .j a va2s .c om Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2003singlesheetmetadatanegativeoncetestout"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.ms-excel"); // old Excel format // set all the meta data conf.set("hadoopoffice.write.metadata.applicationname", "dummyapplicationname"); conf.set("hadoopoffice.write.metadata.author", "dummyauthor"); conf.set("hadoopoffice.write.metadata.charcount", "1"); conf.set("hadoopoffice.write.metadata.comments", "dummycomments"); conf.set("hadoopoffice.write.metadata.createdatetime", "12:00:00 01.01.2016"); conf.set("hadoopoffice.write.metadata.edittime", "0"); conf.set("hadoopoffice.write.metadata.keywords", "dummykeywords"); conf.set("hadoopoffice.write.metadata.lastauthor", "dummylastauthor"); conf.set("hadoopoffice.write.metadata.lastprinted", "12:00:00 01.01.2016"); conf.set("hadoopoffice.write.metadata.lastsavedatetime", "12:00:00 01.01.2016"); conf.set("hadoopoffice.write.metadata.pagecount", "1"); conf.set("hadoopoffice.write.metadata.revnumber", "1"); conf.set("hadoopoffice.write.metadata.security", "0"); conf.set("hadoopoffice.write.metadata.subject", "dummysubject"); conf.set("hadoopoffice.write.metadata.template", "dummytemplate"); conf.set("hadoopoffice.write.metadata.title", "dummytitle"); conf.set("hadoopoffice.write.metadata.wordcount", "1"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xls"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // set metadata to match all conf.set("hadoopoffice.read.filter.metadata.matchAll", "false"); // following filter conf.set("hadoopoffice.read.filter.metadata.applicationname", "dummyapplicationname2"); conf.set("hadoopoffice.read.filter.metadata.metadata.author", "dummyautho2r"); conf.set("hadoopoffice.read.filter.metadata.metadata.charcount", "2"); conf.set("hadoopoffice.read.filter.metadata.metadata.comments", "dummycomments2"); conf.set("hadoopoffice.read.filter.metadata.metadata.createdatetime", "12:00:00 01.01.2017"); conf.set("hadoopoffice.read.filter.metadata.metadata.edittime", "1"); conf.set("hadoopoffice.read.filter.metadata.metadata.keywords", "dummykeywords2"); conf.set("hadoopoffice.read.filter.metadata.metadata.lastauthor", "dummylastauthor2"); conf.set("hadoopoffice.read.filter.metadata.metadata.lastprinted", "12:00:00 01.01.2017"); conf.set("hadoopoffice.read.filter.metadata.metadata.lastsavedatetime", "12:00:00 01.01.2017"); conf.set("hadoopoffice.read.filter.metadata.metadata.pagecount", "2"); conf.set("hadoopoffice.read.filter.metadata.metadata.revnumber", "2"); conf.set("hadoopoffice.read.filter.metadata.metadata.security", "1"); conf.set("hadoopoffice.read.filter.metadata.metadata.subject", "dummysubject2"); conf.set("hadoopoffice.read.filter.metadata.metadata.template", "dummytemplate2"); conf.set("hadoopoffice.read.filter.metadata.metadata.title", "dummytitle2"); conf.set("hadoopoffice.read.filter.metadata.metadata.wordcount", "2"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); // if following assertion is true that means the document has (wrongly) NOT been // filtered out assertFalse(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetGZipCompressed() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write// ww w . jav a 2 s . c o m Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheetcompressedtestout"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", fileName); conf.setBoolean("mapreduce.output.fileoutputformat.compress", true); conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.GzipCodec"); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx.gz"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx.gz]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx.gz]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetComment() throws IOException, InterruptedException { // 2nd cell with a comment // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "This is a test", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write/* w ww.ja va 2 s . com*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheetcommenttestout"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("This is a test", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Input Split for Excel file contains row 1 with cell 2 comment == \"This is a test\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013MultiSheet() throws IOException, InterruptedException { // one sheet "Sheet1" // one row string and three columns ("test1","test2","test3") SpreadSheetCellDAO sheet1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO sheet1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO sheet1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // one sheet "Sheet2" // one row string and three columns ("test4","test5","test6") SpreadSheetCellDAO sheet2a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet2"); SpreadSheetCellDAO sheet2b1 = new SpreadSheetCellDAO("test5", "", "", "B1", "Sheet2"); SpreadSheetCellDAO sheet2c1 = new SpreadSheetCellDAO("test6", "", "", "C1", "Sheet2"); // write// w ww . j av a2s . co m Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013multisheettestout"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, sheet1a1); writer.write(null, sheet1b1); writer.write(null, sheet1c1); writer.write(null, sheet2a1); writer.write(null, sheet2b1); writer.write(null, sheet2c1); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns for Sheet1"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet2!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet2!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns for Sheet1"); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test4\""); assertEquals("test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test5\""); assertEquals("test6", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test6\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2003SingleSheetOneLinkedWorkbook() throws IOException, InterruptedException { // write linkedworkbook1 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write/*from w ww .jav a 2 s. co m*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String linkedWB1FileName = "excel2003linkedwb1"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", linkedWB1FileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.ms-excel"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb1a1); writer.write(null, wb1b1); writer.write(null, wb1c1); writer.close(context); committer.commitTask(context); committer.commitJob(jContext); // write mainworkbook linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix; String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xls]"; SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "[" + linkedWB1FileName + ".xls]Sheet1!B1", "B1", "Sheet1"); // should be test2 in the end // write job = Job.getInstance(); conf = job.getConfiguration(); String mainWBfileName = "excel2003singlesheetlinkedwbtestout"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", mainWBfileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.ms-excel"); conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); jContext = new JobContextImpl(conf, taskID.getJobID()); context = new TaskAttemptContextImpl(conf, taskID); committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context); assertNotNull(writerMain, "Format returned null RecordWriter"); writerMain.write(null, a1); writerMain.write(null, b1); writerMain.close(context); committer.commitTask(context); committer.commitJob(jContext); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); mainWBfileName = mainWBfileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xls"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // enable option to read linked workbooks conf.setBoolean("hadoopoffice.read.linkedworkbooks", true); conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + mainWBfileName + ".xls]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xls]Sheet1!A1\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns for Sheet1"); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test4\""); // this comes from the external workbook assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2003SingleSheetTwoLinkedWorkbooks() throws IOException, InterruptedException { // write linkedworkbook1 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb1a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb1b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb1c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // write//from w w w.j a v a 2 s .c o m Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String linkedWB1FileName = "excel2003linkedwb1b"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", linkedWB1FileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.ms-excel"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb1a1); writer.write(null, wb1b1); writer.write(null, wb1c1); writer.close(context); committer.commitTask(context); committer.commitJob(jContext); // write linkedworkbook2 // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO wb2a1 = new SpreadSheetCellDAO("test4", "", "", "A1", "Sheet1"); SpreadSheetCellDAO wb2b1 = new SpreadSheetCellDAO("test5", "", "", "B1", "Sheet1"); SpreadSheetCellDAO wb2c1 = new SpreadSheetCellDAO("test6", "", "", "C1", "Sheet1"); // write job = Job.getInstance(); conf = job.getConfiguration(); String linkedWB2FileName = "excel2003linkedwb2b"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", linkedWB2FileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.ms-excel"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); jContext = new JobContextImpl(conf, taskID.getJobID()); context = new TaskAttemptContextImpl(conf, taskID); committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); outputFormat = new ExcelFileOutputFormat(); writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, wb2a1); writer.write(null, wb2b1); writer.write(null, wb2c1); writer.close(context); committer.commitTask(context); committer.commitJob(jContext); // write mainworkbook linkedWB1FileName = linkedWB1FileName + this.outputbaseAppendix; linkedWB2FileName = linkedWB2FileName + this.outputbaseAppendix; String linkedWorkbookFilename = "[" + tmpDir + File.separator + linkedWB1FileName + ".xls]:[" + tmpDir + File.separator + linkedWB2FileName + ".xls]"; SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test7", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("", "", "[" + linkedWB1FileName + ".xls]Sheet1!B1", "B1", "Sheet1"); // should be test2 in the end SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("", "", "[" + linkedWB2FileName + ".xls]Sheet1!B1", "C1", "Sheet1"); // should be test5 in the end // write job = Job.getInstance(); conf = job.getConfiguration(); String mainWBfileName = "excel2003singlesheetlinkedwb2testout"; outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", mainWBfileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.ms-excel"); conf.set("hadoopoffice.write.linkedworkbooks", linkedWorkbookFilename); conf.set("hadoopoffice.write.mimeType", "application/vnd.ms-excel"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); jContext = new JobContextImpl(conf, taskID.getJobID()); context = new TaskAttemptContextImpl(conf, taskID); committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writerMain = outputFormat.getRecordWriter(context); assertNotNull(writerMain, "Format returned null RecordWriter"); writerMain.write(null, a1); writerMain.write(null, b1); writerMain.write(null, c1); writerMain.close(context); committer.commitTask(context); committer.commitJob(jContext); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); mainWBfileName = mainWBfileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + mainWBfileName + ".xls"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // enable option to read linked workbooks conf.setBoolean("hadoopoffice.read.linkedworkbooks", true); conf.setBoolean("hadoopoffice.read.ignoremissinglinkedworkbooks", false); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 Sheet1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + mainWBfileName + ".xls]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + mainWBfileName + ".xls]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns for Sheet1"); assertEquals("test7", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test7\""); // this comes from the external workbook assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test5\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013TemplateSingleSheet() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // change the cell A4 from Test4 to Test5 from the template SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("Test5", "", "", "A4", "Table1"); // change b4 from 10 to 60 SpreadSheetCellDAO b4 = new SpreadSheetCellDAO("", "", "60", "B4", "Table1"); // write// w ww. j a va2s .com Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013basedontemplate"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // excel // format // template ClassLoader classLoader = getClass().getClassLoader(); String fileNameTemplate = classLoader.getResource("templatetest1.xlsx").getFile(); conf.set("hadoopoffice.write.template.file", fileNameTemplate); // conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a4); writer.write(null, b4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Table1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Table1!A1\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns"); assertEquals("Test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"Test\""); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"10\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 2 columns"); assertEquals("Test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 1 == \"Test2\""); assertEquals("50", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 2 == \"50\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 2 columns"); assertEquals("Test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"Test3\""); assertEquals("20", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"20\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 2 columns"); assertEquals("Test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"Test5\""); assertEquals("60", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"60\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013TemplateEncryptedSingleSheetPositive() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // change the cell A4 from Test4 to Test5 from the template SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("Test5", "", "", "A4", "Table1"); // change b4 from 10 to 60 SpreadSheetCellDAO b4 = new SpreadSheetCellDAO("", "", "60", "B4", "Table1"); // write/*w ww .j av a 2 s. c om*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013basedontemplateencrypted"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // excel // format // template ClassLoader classLoader = getClass().getClassLoader(); String fileNameTemplate = classLoader.getResource("templatetest1encrypt.xlsx").getFile(); conf.set("hadoopoffice.write.template.file", fileNameTemplate); conf.set("hadoopoffice.write.template.password", "test"); // conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a4); writer.write(null, b4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Table1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Table1!A1\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 2 columns"); assertEquals("Test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"Test\""); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"10\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 2 columns"); assertEquals("Test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 1 == \"Test2\""); assertEquals("50", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 2 == \"50\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 2 columns"); assertEquals("Test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"Test3\""); assertEquals("20", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"20\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 2 columns"); assertEquals("Test5", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"Test5\""); assertEquals("60", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"60\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelNormalTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013TemplateEncryptedSingleSheetNegative() throws IOException { // one row string and three columns ("test1","test2","test3") // change the cell A4 from Test4 to Test5 from the template SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("Test5", "", "", "A4", "Table1"); // change b4 from 10 to 60 SpreadSheetCellDAO b4 = new SpreadSheetCellDAO("", "", "60", "B4", "Table1"); // write//from w w w . j ava 2 s . co m Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013basedontemplateencrypted"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // excel // format // template ClassLoader classLoader = getClass().getClassLoader(); String fileNameTemplate = classLoader.getResource("templatetest1encrypt.xlsx").getFile(); conf.set("hadoopoffice.write.template.file", fileNameTemplate); conf.set("hadoopoffice.write.template.password", "test2"); // conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNull(writer, "Format returned null RecordWriter"); }