List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter commitTask
@Override public void commitTask(TaskAttemptContext context) throws IOException
From source file:mlbench.bayes.test.BayesTest.java
License:Apache License
@SuppressWarnings("deprecation") public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException { parseArgs(args);//from ww w . j av a 2 s . c o m HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); NaiveBayesModel model = NaiveBayesModel.materialize(modelPath, config); classifier = new StandardNaiveBayesClassifier(model); MPI_D.COMM_BIPARTITE_O.Barrier(); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, (JobConf) config, inDir, rank); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config, fsplit); Text key = kvrr.createKey(); VectorWritable value = kvrr.createValue(); while (kvrr.next(key, value)) { Vector result = classifier.classifyFull(value.get()); MPI_D.Send(new Text(SLASH.split(key.toString())[1]), new VectorWritable(result)); } } } else if (MPI_D.COMM_BIPARTITE_A != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); config.set(MAPRED_OUTPUT_DIR, outDir); config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString()); ((JobConf) config).setOutputKeyClass(Text.class); ((JobConf) config).setOutputValueClass(VectorWritable.class); TaskAttemptContext taskContext = new TaskAttemptContextImpl(config, DataMPIUtil.getHadoopTaskAttemptID()); SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>(); FileSystem fs = FileSystem.get(config); Path output = new Path(config.get(MAPRED_OUTPUT_DIR)); FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext); RecordWriter<Text, VectorWritable> outrw = null; try { fcommitter.setupJob(taskContext); outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null); } catch (IOException e) { e.printStackTrace(); System.err.println("ERROR: Please set the HDFS configuration properly\n"); System.exit(-1); } Text key = null; VectorWritable point = null; Vector vector = null; Object[] vals = MPI_D.Recv(); while (vals != null) { key = (Text) vals[0]; point = (VectorWritable) vals[1]; if (key != null && point != null) { vector = point.get(); outrw.write(key, new VectorWritable(vector)); } vals = MPI_D.Recv(); } outrw.close(null); if (fcommitter.needsTaskCommit(taskContext)) { fcommitter.commitTask(taskContext); } MPI_D.COMM_BIPARTITE_A.Barrier(); if (rank == 0) { // load the labels Map<Integer, String> labelMap = BayesUtils.readLabelIndex(config, labPath); // loop over the results and create the confusion matrix SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>( output, PathType.LIST, PathFilters.partFilter(), config); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer); } } MPI_D.Finalize(); }
From source file:mlbench.bayes.train.IndexInstances.java
License:Apache License
@SuppressWarnings({ "deprecation" }) public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException { parseArgs(args);//from w w w .ja v a 2 s.c o m HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { System.out.println(IndexInstances.class.getSimpleName() + " O start."); createLabelIndex(labPath); } HadoopUtil.cacheFiles(labPath, config); MPI_D.COMM_BIPARTITE_O.Barrier(); OpenObjectIntHashMap<String> labelIndex = BayesUtils.readIndexFromCache(config); if (MPI_D.COMM_BIPARTITE_O != null) { // O communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, (JobConf) config, inDir, rank); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config, fsplit); Text labelText = kvrr.createKey(); VectorWritable instance = kvrr.createValue(); while (kvrr.next(labelText, instance)) { String label = SLASH.split(labelText.toString())[1]; if (labelIndex.containsKey(label)) { MPI_D.Send(new IntWritable(labelIndex.get(label)), instance); } } } } } else if (MPI_D.COMM_BIPARTITE_A != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); config.set(MAPRED_OUTPUT_DIR, outDir); config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString()); ((JobConf) config).setOutputKeyClass(IntWritable.class); ((JobConf) config).setOutputValueClass(VectorWritable.class); TaskAttemptContext taskContext = new TaskAttemptContextImpl(config, DataMPIUtil.getHadoopTaskAttemptID()); SequenceFileOutputFormat<IntWritable, VectorWritable> outfile = new SequenceFileOutputFormat<>(); FileSystem fs = FileSystem.get(config); Path output = new Path(config.get(MAPRED_OUTPUT_DIR)); FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext); RecordWriter<IntWritable, VectorWritable> outrw = null; try { fcommitter.setupJob(taskContext); outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null); } catch (IOException e) { e.printStackTrace(); System.err.println("ERROR: Please set the HDFS configuration properly\n"); System.exit(-1); } IntWritable key = null, newKey = null; VectorWritable point = null, newPoint = null; Vector vector = null; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (IntWritable) vals[0]; newPoint = (VectorWritable) vals[1]; if (key == null && point == null) { } else if (!key.equals(newKey)) { outrw.write(key, new VectorWritable(vector)); vector = null; } if (vector == null) { vector = newPoint.get(); } else { vector.assign(newPoint.get(), Functions.PLUS); } key = newKey; point = newPoint; vals = MPI_D.Recv(); } if (newKey != null && newPoint != null) { outrw.write(key, new VectorWritable(vector)); } outrw.close(null); if (fcommitter.needsTaskCommit(taskContext)) { fcommitter.commitTask(taskContext); } } MPI_D.Finalize(); }
From source file:mlbench.bayes.train.WeightSummer.java
License:Apache License
@SuppressWarnings("deprecation") public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException { parseArgs(args);/*from w w w . j av a 2s . c o m*/ HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, (JobConf) config, inDir, rank); Vector weightsPerFeature = null; Vector weightsPerLabel = new DenseVector(labNum); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config, fsplit); IntWritable index = kvrr.createKey(); VectorWritable value = kvrr.createValue(); while (kvrr.next(index, value)) { Vector instance = value.get(); if (weightsPerFeature == null) { weightsPerFeature = new RandomAccessSparseVector(instance.size(), instance.getNumNondefaultElements()); } int label = index.get(); weightsPerFeature.assign(instance, Functions.PLUS); weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum()); } } if (weightsPerFeature != null) { MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature)); MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel)); } } else if (MPI_D.COMM_BIPARTITE_A != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); config.set(MAPRED_OUTPUT_DIR, outDirW); config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString()); ((JobConf) config).setOutputKeyClass(Text.class); ((JobConf) config).setOutputValueClass(VectorWritable.class); TaskAttemptContext taskContext = new TaskAttemptContextImpl(config, DataMPIUtil.getHadoopTaskAttemptID()); SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>(); FileSystem fs = FileSystem.get(config); Path output = new Path(config.get(MAPRED_OUTPUT_DIR)); FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext); RecordWriter<Text, VectorWritable> outrw = null; try { fcommitter.setupJob(taskContext); outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null); } catch (IOException e) { e.printStackTrace(); System.err.println("ERROR: Please set the HDFS configuration properly\n"); System.exit(-1); } Text key = null, newKey = null; VectorWritable point = null, newPoint = null; Vector vector = null; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (Text) vals[0]; newPoint = (VectorWritable) vals[1]; if (key == null && point == null) { } else if (!key.equals(newKey)) { outrw.write(key, new VectorWritable(vector)); vector = null; } if (vector == null) { vector = newPoint.get(); } else { vector.assign(newPoint.get(), Functions.PLUS); } key = newKey; point = newPoint; vals = MPI_D.Recv(); } if (newKey != null && newPoint != null) { outrw.write(key, new VectorWritable(vector)); } outrw.close(null); if (fcommitter.needsTaskCommit(taskContext)) { fcommitter.commitTask(taskContext); } MPI_D.COMM_BIPARTITE_A.Barrier(); if (rank == 0) { Path resOut = new Path(outDir); NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config); naiveBayesModel.serialize(resOut, config); } } MPI_D.Finalize(); }
From source file:org.goldenorb.OrbPartition.java
License:Apache License
private void dumpData() { Configuration conf = new Configuration(); Job job = null;/*from www. j ava 2 s. c om*/ JobContext jobContext = null; TaskAttemptContext tao = null; RecordWriter rw; VertexWriter vw; FileOutputFormat outputFormat; boolean tryAgain = true; int count = 0; while (tryAgain && count < 15) try { count++; tryAgain = false; if (job == null) { job = new Job(conf); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(new String(getOrbConf().getNameNode() + getOrbConf().getFileOutputPath()))); } if (jobContext == null) { jobContext = new JobContext(job.getConfiguration(), new JobID()); } System.out.println(jobContext.getConfiguration().get("mapred.output.dir")); tao = new TaskAttemptContext(jobContext.getConfiguration(), new TaskAttemptID(new TaskID(jobContext.getJobID(), true, getPartitionID()), 0)); outputFormat = (FileOutputFormat) tao.getOutputFormatClass().newInstance(); rw = outputFormat.getRecordWriter(tao); vw = (VertexWriter) getOrbConf().getVertexOutputFormatClass().newInstance(); for (Vertex v : vertices.values()) { OrbContext oc = vw.vertexWrite(v); rw.write(oc.getKey(), oc.getValue()); // orbLogger.info("Partition: " + Integer.toString(partitionId) + "writing: " + // oc.getKey().toString() + ", " + oc.getValue().toString()); } rw.close(tao); FileOutputCommitter cm = (FileOutputCommitter) outputFormat.getOutputCommitter(tao); if (cm.needsTaskCommit(tao)) { cm.commitTask(tao); cm.cleanupJob(jobContext); } else { cm.cleanupJob(jobContext); tryAgain = true; } } catch (IOException e) { tryAgain = true; e.printStackTrace(); } catch (InstantiationException e) { tryAgain = true; e.printStackTrace(); } catch (IllegalAccessException e) { tryAgain = true; e.printStackTrace(); } catch (ClassNotFoundException e) { tryAgain = true; e.printStackTrace(); } catch (InterruptedException e) { tryAgain = true; e.printStackTrace(); } if (tryAgain) { synchronized (this) { try { wait(1000); } catch (InterruptedException e) { e.printStackTrace(); } } } }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetLowFootprint() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write/* w w w. ja v a2 s .co m*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestout"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.write.lowFootprint", "true"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetEncryptedPositiveLowFootprint() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write/* w ww . jav a2s . c om*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestoutencryptedpositivelowfootprint"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set generic outputformat settings // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.write.lowFootprint", "true"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // excel // format // security // for the new Excel format you need to decide on your own which algorithms are // secure conf.set("hadoopoffice.write.security.crypt.encrypt.mode", "agile"); conf.set("hadoopoffice.write.security.crypt.encrypt.algorithm", "aes256"); conf.set("hadoopoffice.write.security.crypt.chain.mode", "cbc"); conf.set("hadoopoffice.write.security.crypt.hash.algorithm", "sha512"); conf.set("hadoopoffice.write.security.crypt.password", "test"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // you just need to provide the password to read encrypted data conf.set("hadoopoffice.read.security.crypt.password", "test"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write/*from ww w. ja v a 2 s .c o m*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestoutencryptednegativelowfootprint"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); FileOutputFormat.setOutputPath(job, outputPath); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.write.lowFootprint", "true"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // excel // format // security // for the new Excel format you need to decide on your own which algorithms are // secure conf.set("hadoopoffice.write.security.crypt.encrypt.mode", "agile"); conf.set("hadoopoffice.write.security.crypt.encrypt.algorithm", "aes256"); conf.set("hadoopoffice.write.security.crypt.chain.mode", "cbc"); conf.set("hadoopoffice.write.security.crypt.hash.algorithm", "sha512"); conf.set("hadoopoffice.write.security.crypt.password", "test"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // you just need to provide the password to read encrypted data conf.set("hadoopoffice.read.security.crypt.password", "test2"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); TaskAttemptContext context2 = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context2); InterruptedException ex = assertThrows(InterruptedException.class, () -> reader.initialize(splits.get(0), context2), "Exception is thrown in case of wrong password"); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetSignedPositive() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write/* w ww . java 2 s.c om*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestoutsignedpositive"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes /// signature String pkFileName = "testsigning.pfx"; // private key ClassLoader classLoader = getClass().getClassLoader(); String fileNameKeyStore = classLoader.getResource(pkFileName).getFile(); conf.set("hadoopoffice.write.security.sign.keystore.file", fileNameKeyStore); conf.set("hadoopoffice.write.security.sign.keystore.type", "PKCS12"); conf.set("hadoopoffice.write.security.sign.keystore.password", "changeit"); conf.set("hadoopoffice.write.security.sign.keystore.alias", "testalias"); conf.set("hadoopoffice.write.security.sign.hash.algorithm", "sha512"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.read.security.sign.verifysignature", "true"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetLowFootprintSignedPositive() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write//from w w w . j a v a2 s . c o m Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestoutsignedpositive"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.write.lowFootprint", "true"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes /// signature String pkFileName = "testsigning.pfx"; // private key ClassLoader classLoader = getClass().getClassLoader(); String fileNameKeyStore = classLoader.getResource(pkFileName).getFile(); conf.set("hadoopoffice.write.security.sign.keystore.file", fileNameKeyStore); conf.set("hadoopoffice.write.security.sign.keystore.type", "PKCS12"); conf.set("hadoopoffice.write.security.sign.keystore.password", "changeit"); conf.set("hadoopoffice.write.security.sign.keystore.alias", "testalias"); conf.set("hadoopoffice.write.security.sign.hash.algorithm", "sha512"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.read.security.sign.verifysignature", "true"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetLowFootprintSignedNegative() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write/*from www . j a va 2 s. c o m*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestoutsignednegative"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.write.lowFootprint", "true"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); conf.set("hadoopoffice.read.security.sign.verifysignature", "true"); // will fail no signature present ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); TaskAttemptContext context2 = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context2); InterruptedException ex = assertThrows(InterruptedException.class, () -> reader.initialize(splits.get(0), context2), "Exception is thrown in case signature cannot be verified"); }