List of usage examples for org.apache.hadoop.mapreduce.task JobContextImpl JobContextImpl
public JobContextImpl(Configuration conf, JobID jobId)
From source file:org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims.java
License:Apache License
static public JobContext createJobContext(Configuration conf, JobID jobId) { if (conf instanceof JobConf) { return new JobContextImpl(new JobConf(conf), jobId); } else {// w ww . j a va2s . c o m return new JobContextImpl(conf, jobId); } }
From source file:org.apache.rya.accumulo.mr.GraphXEdgeInputFormatTest.java
License:Apache License
@SuppressWarnings("rawtypes") @Test/*from ww w . j a va2s. c o m*/ public void testInputFormat() throws Exception { RyaStatement input = RyaStatement.builder().setSubject(new RyaURI("http://www.google.com")) .setPredicate(new RyaURI("http://some_other_uri")).setObject(new RyaURI("http://www.yahoo.com")) .setColumnVisibility(new byte[0]).setValue(new byte[0]).build(); apiImpl.add(input); Job jobConf = Job.getInstance(); GraphXEdgeInputFormat.setMockInstance(jobConf, instance.getInstanceName()); GraphXEdgeInputFormat.setConnectorInfo(jobConf, username, password); GraphXEdgeInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO); GraphXEdgeInputFormat.setInputTableName(jobConf, table); GraphXEdgeInputFormat.setInputTableName(jobConf, table); GraphXEdgeInputFormat.setScanIsolation(jobConf, false); GraphXEdgeInputFormat.setLocalIterators(jobConf, false); GraphXEdgeInputFormat.setOfflineTableScan(jobConf, false); GraphXEdgeInputFormat inputFormat = new GraphXEdgeInputFormat(); JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID()); List<InputSplit> splits = inputFormat.getSplits(context); Assert.assertEquals(1, splits.size()); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1)); RecordReader reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext); RecordReader ryaStatementRecordReader = (RecordReader) reader; ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext); List<Edge> results = new ArrayList<Edge>(); while (ryaStatementRecordReader.nextKeyValue()) { Edge writable = (Edge) ryaStatementRecordReader.getCurrentValue(); long srcId = writable.srcId(); long destId = writable.dstId(); RyaTypeWritable rtw = null; Object text = ryaStatementRecordReader.getCurrentKey(); Edge<RyaTypeWritable> edge = new Edge<RyaTypeWritable>(srcId, destId, rtw); results.add(edge); System.out.println(text); } System.out.println(results.size()); System.out.println(results); Assert.assertTrue(results.size() == 2); }
From source file:org.apache.rya.accumulo.mr.GraphXInputFormatTest.java
License:Apache License
@Test public void testInputFormat() throws Exception { RyaStatement input = RyaStatement.builder().setSubject(new RyaURI("http://www.google.com")) .setPredicate(new RyaURI("http://some_other_uri")).setObject(new RyaURI("http://www.yahoo.com")) .setColumnVisibility(new byte[0]).setValue(new byte[0]).build(); apiImpl.add(input);//from w ww .j ava2 s . c o m Job jobConf = Job.getInstance(); GraphXInputFormat.setMockInstance(jobConf, instance.getInstanceName()); GraphXInputFormat.setConnectorInfo(jobConf, username, password); GraphXInputFormat.setInputTableName(jobConf, table); GraphXInputFormat.setInputTableName(jobConf, table); GraphXInputFormat.setScanIsolation(jobConf, false); GraphXInputFormat.setLocalIterators(jobConf, false); GraphXInputFormat.setOfflineTableScan(jobConf, false); GraphXInputFormat inputFormat = new GraphXInputFormat(); JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID()); List<InputSplit> splits = inputFormat.getSplits(context); Assert.assertEquals(1, splits.size()); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1)); RecordReader<Object, RyaTypeWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext); RyaStatementRecordReader ryaStatementRecordReader = (RyaStatementRecordReader) reader; ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext); List<RyaType> results = new ArrayList<RyaType>(); System.out.println("before while"); while (ryaStatementRecordReader.nextKeyValue()) { System.out.println("in while"); RyaTypeWritable writable = ryaStatementRecordReader.getCurrentValue(); RyaType value = writable.getRyaType(); Object text = ryaStatementRecordReader.getCurrentKey(); RyaType type = new RyaType(); type.setData(value.getData()); type.setDataType(value.getDataType()); results.add(type); System.out.println(value.getData()); System.out.println(value.getDataType()); System.out.println(results); System.out.println(type); System.out.println(text); System.out.println(value); } System.out.println("after while"); System.out.println(results.size()); System.out.println(results); // Assert.assertTrue(results.size() == 2); // Assert.assertTrue(results.contains(input)); }
From source file:org.apache.rya.accumulo.mr.RyaInputFormatTest.java
License:Apache License
@Test public void testInputFormat() throws Exception { RyaStatement input = RyaStatement.builder().setSubject(new RyaURI("http://www.google.com")) .setPredicate(new RyaURI("http://some_other_uri")).setObject(new RyaURI("http://www.yahoo.com")) .setColumnVisibility(new byte[0]).setValue(new byte[0]).build(); apiImpl.add(input);/*from w w w .j a va2 s.co m*/ Job jobConf = Job.getInstance(); RyaInputFormat.setMockInstance(jobConf, instance.getInstanceName()); RyaInputFormat.setConnectorInfo(jobConf, username, password); RyaInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO); AccumuloInputFormat.setInputTableName(jobConf, table); AccumuloInputFormat.setInputTableName(jobConf, table); AccumuloInputFormat.setScanIsolation(jobConf, false); AccumuloInputFormat.setLocalIterators(jobConf, false); AccumuloInputFormat.setOfflineTableScan(jobConf, false); RyaInputFormat inputFormat = new RyaInputFormat(); JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID()); List<InputSplit> splits = inputFormat.getSplits(context); Assert.assertEquals(1, splits.size()); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1)); RecordReader<Text, RyaStatementWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext); RyaStatementRecordReader ryaStatementRecordReader = (RyaStatementRecordReader) reader; ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext); List<RyaStatement> results = new ArrayList<RyaStatement>(); while (ryaStatementRecordReader.nextKeyValue()) { RyaStatementWritable writable = ryaStatementRecordReader.getCurrentValue(); RyaStatement value = writable.getRyaStatement(); Text text = ryaStatementRecordReader.getCurrentKey(); RyaStatement stmt = RyaStatement.builder().setSubject(value.getSubject()) .setPredicate(value.getPredicate()).setObject(value.getObject()).setContext(value.getContext()) .setQualifier(value.getQualifer()).setColumnVisibility(value.getColumnVisibility()) .setValue(value.getValue()).build(); results.add(stmt); System.out.println(text); System.out.println(value); } Assert.assertTrue(results.size() == 2); Assert.assertTrue(results.contains(input)); }
From source file:org.apache.tajo.storage.hbase.HBaseStorageManager.java
License:Apache License
@Override public Path commitOutputData(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, TableDesc tableDesc) throws IOException { if (tableDesc == null) { throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId); }//from ww w .jav a2 s. c o m Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); Configuration hbaseConf = HBaseStorageManager.getHBaseConfiguration(queryContext.getConf(), tableDesc.getMeta()); hbaseConf.set("hbase.loadincremental.threads.max", "2"); JobContextImpl jobContext = new JobContextImpl(hbaseConf, new JobID(finalEbId.getQueryId().toString(), finalEbId.getId())); FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext); Path jobAttemptPath = committer.getJobAttemptPath(jobContext); FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf()); if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) { LOG.warn("No query attempt file in " + jobAttemptPath); return stagingResultDir; } committer.commitJob(jobContext); if (tableDesc.getName() == null && tableDesc.getPath() != null) { // insert into location return super.commitOutputData(queryContext, finalEbId, plan, schema, tableDesc, false); } else { // insert into table String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY); HTable htable = new HTable(hbaseConf, tableName); try { LoadIncrementalHFiles loadIncrementalHFiles = null; try { loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new IOException(e.getMessage(), e); } loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable); return stagingResultDir; } finally { htable.close(); } } }
From source file:org.apache.tajo.storage.hbase.HBaseTablespace.java
License:Apache License
@Override public Path commitTable(OverridableConf queryContext, ExecutionBlockId finalEbId, LogicalPlan plan, Schema schema, TableDesc tableDesc) throws IOException { if (tableDesc == null) { throw new IOException("TableDesc is null while calling loadIncrementalHFiles: " + finalEbId); }// w w w. j a v a 2 s . co m Path stagingDir = new Path(queryContext.get(QueryVars.STAGING_DIR)); Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME); Configuration hbaseConf = HBaseConfiguration.create(this.hbaseConf); hbaseConf.set("hbase.loadincremental.threads.max", "2"); JobContextImpl jobContext = new JobContextImpl(hbaseConf, new JobID(finalEbId.getQueryId().toString(), finalEbId.getId())); FileOutputCommitter committer = new FileOutputCommitter(stagingResultDir, jobContext); Path jobAttemptPath = committer.getJobAttemptPath(jobContext); FileSystem fs = jobAttemptPath.getFileSystem(queryContext.getConf()); if (!fs.exists(jobAttemptPath) || fs.listStatus(jobAttemptPath) == null) { LOG.warn("No query attempt file in " + jobAttemptPath); return stagingResultDir; } committer.commitJob(jobContext); // insert into table String tableName = tableDesc.getMeta().getOption(HBaseStorageConstants.META_TABLE_KEY); HTable htable = new HTable(hbaseConf, tableName); try { LoadIncrementalHFiles loadIncrementalHFiles = null; try { loadIncrementalHFiles = new LoadIncrementalHFiles(hbaseConf); } catch (Exception e) { LOG.error(e.getMessage(), e); throw new IOException(e.getMessage(), e); } loadIncrementalHFiles.doBulkLoad(stagingResultDir, htable); return stagingResultDir; } finally { htable.close(); } }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopElementIterator.java
License:Apache License
public HadoopElementIterator(final HadoopGraph graph) { try {/* w w w. j a v a 2 s.c o m*/ this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil .getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage) .isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants .getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat .getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
From source file:org.seqdoop.hadoop_bam.TestVCFInputFormatStringency.java
License:Open Source License
public void checkReading(ValidationStringency validationStringency) throws Exception { String filename = "invalid_info_field.vcf"; Configuration conf = new Configuration(); String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile(); conf.set("mapred.input.dir", "file://" + input_file); if (validationStringency != null) { VCFRecordReader.setValidationStringency(conf, validationStringency); }/*from w w w .j av a 2 s . c o m*/ TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class)); JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID()); VCFInputFormat inputFormat = new VCFInputFormat(conf); List<InputSplit> splits = inputFormat.getSplits(ctx); assertEquals(1, splits.size()); RecordReader<LongWritable, VariantContextWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext); int counter = 0; while (reader.nextKeyValue()) { VariantContextWritable writable = reader.getCurrentValue(); assertNotNull(writable); VariantContext vc = writable.get(); assertNotNull(vc); String value = vc.toString(); assertNotNull(value); counter++; } assertEquals(4, counter); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetLowFootprint() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write/*from w ww . j a va2 s .com*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestout"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.write.lowFootprint", "true"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // Excel // format, // anyway // default, // but // here // for // illustrative // purposes conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); // set generic outputformat settings ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); FileInputFormat.setInputPaths(job, inputFile); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }
From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java
License:Apache License
@Test public void writeExcelOutputFormatExcel2013SingleSheetEncryptedPositiveLowFootprint() throws IOException, InterruptedException { // one row string and three columns ("test1","test2","test3") // (String formattedValue, String comment, String formula, String address,String // sheetName) SpreadSheetCellDAO a1 = new SpreadSheetCellDAO("test1", "", "", "A1", "Sheet1"); SpreadSheetCellDAO b1 = new SpreadSheetCellDAO("test2", "", "", "B1", "Sheet1"); SpreadSheetCellDAO c1 = new SpreadSheetCellDAO("test3", "", "", "C1", "Sheet1"); // empty row => nothing todo // one row numbers (1,2,3) SpreadSheetCellDAO a3 = new SpreadSheetCellDAO("", "", "1", "A3", "Sheet1"); SpreadSheetCellDAO b3 = new SpreadSheetCellDAO("", "", "2", "B3", "Sheet1"); SpreadSheetCellDAO c3 = new SpreadSheetCellDAO("", "", "3", "C3", "Sheet1"); // one row formulas (=A3+B3) SpreadSheetCellDAO a4 = new SpreadSheetCellDAO("", "", "A3+B3", "A4", "Sheet1"); // write/* w w w.ja va2 s .c o m*/ Job job = Job.getInstance(); Configuration conf = job.getConfiguration(); String fileName = "excel2013singlesheettestoutencryptedpositivelowfootprint"; String tmpDir = tmpPath.toString(); Path outputPath = new Path(tmpDir); conf.set("mapreduce.output.basename", fileName); // set generic outputformat settings // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint conf.set("hadoopoffice.write.lowFootprint", "true"); conf.set("hadoopoffice.write.mimeType", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); // new // excel // format // security // for the new Excel format you need to decide on your own which algorithms are // secure conf.set("hadoopoffice.write.security.crypt.encrypt.mode", "agile"); conf.set("hadoopoffice.write.security.crypt.encrypt.algorithm", "aes256"); conf.set("hadoopoffice.write.security.crypt.chain.mode", "cbc"); conf.set("hadoopoffice.write.security.crypt.hash.algorithm", "sha512"); conf.set("hadoopoffice.write.security.crypt.password", "test"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt); conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); conf.setInt(FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); FileOutputFormat.setOutputPath(job, outputPath); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); // setup committer.setupJob(jContext); committer.setupTask(context); ExcelFileOutputFormat outputFormat = new ExcelFileOutputFormat(); RecordWriter<NullWritable, SpreadSheetCellDAO> writer = outputFormat.getRecordWriter(context); assertNotNull(writer, "Format returned null RecordWriter"); writer.write(null, a1); writer.write(null, b1); writer.write(null, c1); writer.write(null, a3); writer.write(null, b3); writer.write(null, c3); writer.write(null, a4); writer.close(context); committer.commitTask(context); // try to read it again conf = new Configuration(defaultConf); job = Job.getInstance(conf); fileName = fileName + this.outputbaseAppendix; Path inputFile = new Path(tmpDir + File.separator + "_temporary" + File.separator + "0" + File.separator + taskAttempt + File.separator + fileName + ".xlsx"); // set locale to the one of the test data conf.set("hadoopoffice.read.locale.bcp47", "de"); // you just need to provide the password to read encrypted data conf.set("hadoopoffice.read.security.crypt.password", "test"); ExcelFileInputFormat inputFormat = new ExcelFileInputFormat(); FileInputFormat.setInputPaths(job, inputFile); context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size(), "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = inputFormat.createRecordReader(splits.get(0), context); assertNotNull(reader, "Format returned null RecordReader"); reader.initialize(splits.get(0), context); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals("[" + fileName + ".xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[" + fileName + ".xlsx]Sheet1!A1\""); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(0, spreadSheetValue.get().length, "Input Split for Excel file contain row 2 and is empty"); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contain row 3 with 3 columns"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"1\""); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"2\""); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 3 == \"3\""); assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4"); spreadSheetKey = reader.getCurrentKey(); spreadSheetValue = reader.getCurrentValue(); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contain row 4 with 1 column"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"3\""); }