Example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl.

Prototype

public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId) 

Source Link

Usage

From source file:org.warcbase.io.GenericArchiveRecordWritableTest.java

License:Apache License

@Test
public void testWarcInputFormat() throws Exception {
    String warcFile = Resources.getResource("warc/example.warc.gz").getPath();

    Configuration conf = new Configuration(false);
    conf.set("fs.defaultFS", "file:///");

    File testFile = new File(warcFile);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<LongWritable, GenericArchiveRecordWritable> inputFormat = ReflectionUtils
            .newInstance(WacGenericInputFormat.class, conf);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    RecordReader<LongWritable, GenericArchiveRecordWritable> reader = inputFormat.createRecordReader(split,
            context);/*www  . ja  v a2s.c o m*/

    reader.initialize(split, context);

    int cnt = 0;
    while (reader.nextKeyValue()) {
        GenericArchiveRecordWritable record = reader.getCurrentValue();

        cnt++;

        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        DataOutputStream dataOut = new DataOutputStream(bytesOut);

        record.write(dataOut);

        GenericArchiveRecordWritable reconstructed = new GenericArchiveRecordWritable();

        reconstructed.setFormat(ArchiveFormat.WARC);
        reconstructed.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray())));

        boolean isWarc = (record.getFormat() == ArchiveFormat.WARC);
        assertTrue(isWarc);
        if (isWarc) {
            assertEquals(record.getRecord().getHeader().getUrl(),
                    reconstructed.getRecord().getHeader().getUrl());
            assertEquals(record.getRecord().getHeader().getContentLength(),
                    reconstructed.getRecord().getHeader().getContentLength());
        }
    }

    assertEquals(822, cnt);
}

From source file:org.warcbase.mapreduce.WacGenericInputFormatTest.java

License:Apache License

@Test
public void testArcInputFormat() throws Exception {
    String[] urls = new String[] { "filedesc://IAH-20080430204825-00000-blackbook.arc", "dns:www.archive.org",
            "http://www.archive.org/robots.txt", "http://www.archive.org/",
            "http://www.archive.org/index.php" };

    String arcFile = Resources.getResource("arc/example.arc.gz").getPath();

    Configuration conf = new Configuration(false);
    conf.set("fs.defaultFS", "file:///");

    File testFile = new File(arcFile);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<LongWritable, GenericArchiveRecordWritable> inputFormat = ReflectionUtils
            .newInstance(WacGenericInputFormat.class, conf);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    RecordReader<LongWritable, GenericArchiveRecordWritable> reader = inputFormat.createRecordReader(split,
            context);/*from  www  .  j a  v a  2 s  . co m*/

    reader.initialize(split, context);

    int cnt = 0;
    while (reader.nextKeyValue()) {
        ArchiveRecord record = reader.getCurrentValue().getRecord();
        boolean isArc = record instanceof ARCRecord;
        assertTrue(isArc);

        if (isArc) {
            ARCRecord arcRecord = (ARCRecord) record;
            ARCRecordMetaData metadata = arcRecord.getMetaData();

            if (cnt < urls.length) {
                assertEquals(urls[cnt], metadata.getUrl());
            }
        }

        cnt++;
    }
    assertEquals(300, cnt);
}

From source file:org.warcbase.mapreduce.WacGenericInputFormatTest.java

License:Apache License

@Test
public void testWarcInputFormat() throws Exception {
    String[] urls = new String[] { null, "dns:www.archive.org", "http://www.archive.org/robots.txt",
            "http://www.archive.org/robots.txt", "http://www.archive.org/robots.txt", "http://www.archive.org/",
            "http://www.archive.org/", "http://www.archive.org/", "http://www.archive.org/index.php",
            "http://www.archive.org/index.php" };

    String[] types = new String[] { "warcinfo", "response", "response", "request", "metadata", "response",
            "request", "metadata", "response", "request" };

    String arcFile = Resources.getResource("warc/example.warc.gz").getPath();

    Configuration conf = new Configuration(false);
    conf.set("fs.defaultFS", "file:///");

    File testFile = new File(arcFile);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<LongWritable, GenericArchiveRecordWritable> inputFormat = ReflectionUtils
            .newInstance(WacGenericInputFormat.class, conf);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    RecordReader<LongWritable, GenericArchiveRecordWritable> reader = inputFormat.createRecordReader(split,
            context);/*from www  .j a v a 2 s . c  o m*/

    reader.initialize(split, context);

    assertTrue(urls.length == types.length);

    int cnt = 0;
    int responseCnt = 0;
    while (reader.nextKeyValue()) {
        ArchiveRecord record = reader.getCurrentValue().getRecord();
        boolean isWarc = record instanceof WARCRecord;
        assertTrue(isWarc);

        if (isWarc) {
            WARCRecord warcRecord = (WARCRecord) record;
            if (cnt < urls.length) {
                assertEquals(urls[cnt], warcRecord.getHeader().getUrl());
                assertEquals(types[cnt], warcRecord.getHeader().getHeaderValue("WARC-Type"));
            }

            if (warcRecord.getHeader().getHeaderValue("WARC-Type").equals("response")) {
                responseCnt++;
            }
        }

        cnt++;
    }
    assertEquals(822, cnt);
    assertEquals(299, responseCnt);
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void readExcelInputFormatExcel2013MultiSheetHeaderLowFootPrint()
        throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "multisheetheader.xlsx";
    String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
    Path file = new Path(fileNameSpreadSheet);
    // set locale to the one of the test data
    conf.set("hadoopoffice.locale.bcp47", "us");
    conf.set("hadoopoffice.read.header.read", "true");
    conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true");
    conf.set("hadoopoffice.read.lowFootprint", "true");

    conf.set("hadoopoffice.read.lowFootprint.parser", "sax");
    Job job = Job.getInstance(conf);/*w  w w . j  av a  2s  .  c o m*/
    FileInputFormat.setInputPaths(job, file);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    assertEquals("column1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0],
            " header column 1 correctly read");
    assertEquals("column2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1],
            " header column 2 correctly read");
    assertEquals("column3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2],
            " header column 3 correctly read");
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();

    // First Sheet
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A2 = \"1\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula");
    assertEquals("A2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet");

    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "B2 = \"test1\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula");
    assertEquals("B2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet");

    assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C2 = \"10\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula");
    assertEquals("C2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A3 = \"2\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula");
    assertEquals("A3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet");

    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "B3 = \"test3\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula");
    assertEquals("B3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet");

    assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C3 = \"15\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula");
    assertEquals("C3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet");

    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A4 = \"10\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula");
    assertEquals("A4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet");

    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "B4 = \"test2\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula");
    assertEquals("B4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet");

    assertEquals("20", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C4 = \"20\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula");
    assertEquals("C4", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet");

    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 (Second Sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("50", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A2 = \"50\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula");
    assertEquals("A2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet");

    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "B2 = \"test1\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula");
    assertEquals("B2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet");

    assertEquals("80", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C2 = \"80\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula");
    assertEquals("C2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet");

    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2 (Second Sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("60", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A3 = \"60\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula");
    assertEquals("A3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet");

    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "B3 = \"test3\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula");
    assertEquals("B3", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet");

    assertEquals("90", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "C3 = \"90\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula");
    assertEquals("C3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet");

    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3 (Second Sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("70", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "A4 = \"70\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormula(), "Empty formula");
    assertEquals("A4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Correct sheet");

    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "B4 = \"test2\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormula(), "Empty formula");
    assertEquals("B4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getSheetName(), "Correct sheet");

    assertEquals("10000", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "C4 = \"10000\"");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getComment(), "Empty Comment");
    assertEquals("", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormula(), "Empty formula");
    assertEquals("C4", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getAddress(), "Correct address");
    assertEquals("Sheet2", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getSheetName(), "Correct sheet");
    assertFalse(reader.nextKeyValue(), "Input Split for Excel file contains no further row");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint()
        throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "multisheetheader.xlsx";
    String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
    Path file = new Path(fileNameSpreadSheet);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "us");
    conf.set("hadoopoffice.read.header.read", "true");
    conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true");
    conf.set("hadoopoffice.read.header.column.names.regex", "column");
    conf.set("hadoopoffice.read.header.column.names.replace", "spalte");
    conf.set("hadoopoffice.read.lowFootprint", "true");
    conf.set("hadoopoffice.read.lowFootprint.parser", "sax");
    Job job = Job.getInstance(conf);//from  w ww  .j ava2s . c o  m
    FileInputFormat.setInputPaths(job, file);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);

    assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0],
            " header column 1 correctly read");
    assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1],
            " header column 2 correctly read");
    assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2],
            " header column 3 correctly read");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void readExcelInputFormatExcel2003SingleSheetLowFootPrint() throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "excel2003test.xls";
    String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
    Path file = new Path(fileNameSpreadSheet);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // low footprint
    conf.set("hadoopoffice.read.lowFootprint", "true");
    Job job = Job.getInstance(conf);//  w w w . j av  a  2 s . c o  m
    FileInputFormat.setInputPaths(job, file);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[excel2003test.xls]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[excel2003test.xls]Sheet1!A1\"");
    assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(),
            "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
    assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(),
            "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 4 == \"test4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 2 with cell 1 == \"4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns");
    assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\"");
    assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"5\"");
    assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null");
    assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null");
    assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 5 == \"null\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 4 with cell 1 == \"1\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 1 == \"2\"");
    assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 2== \"6\"");
    assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 3== \"10\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 1 == \"3\"");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 2== \"4\"");
    assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 3== \"15\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void readExcelInputFormatExcel2003MultiSheetAllLowFootPrint() throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "excel2003testmultisheet.xls";
    String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
    Path file = new Path(fileNameSpreadSheet);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.read.lowFootprint", "true");
    Job job = Job.getInstance(conf);/*from  w  ww. j a v  a2  s  . c o  m*/
    FileInputFormat.setInputPaths(job, file);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[excel2003testmultisheet.xls]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[excel2003testmultisheet.xls]Sheet1!A1\"");
    assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(),
            "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
    assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(),
            "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 4 == \"test4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 2 with cell 1 == \"4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns");
    assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\"");
    assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"5\"");
    assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null");
    assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null");
    assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 5 == \"null\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 4 with cell 1 == \"1\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 1 == \"2\"");
    assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 2== \"6\"");
    assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 3== \"10\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 1 == \"3\"");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 2== \"4\"");
    assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 3== \"15\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 7 (second sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("8", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 7 with cell 1 == \"8\"");
    assertEquals("99", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 7 with cell 2 == \"99\"");
    assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 7 with 2 columns");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 8 (second sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 8 with 1 column");
    assertEquals("test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 8 with cell 1 == \"test\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 9 (second sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 9 with 3 columns");
    assertNull(spreadSheetValue.get()[0], "Input Split for Excel file contains row 9 with cell 1 == null");
    assertNull(spreadSheetValue.get()[1], "Input Split for Excel file contains row 9 with cell 2 == null");
    assertEquals("seven", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 9 with cell 3 == \"seven\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void readExcelInputFormatExcel2013SingleSheetLowFootPrint() throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "excel2013test.xlsx";
    String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
    Path file = new Path(fileNameSpreadSheet);
    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");
    // low footprint
    conf.set("hadoopoffice.read.lowFootprint", "true");
    conf.set("hadoopoffice.read.lowFootprint.parser", "sax");
    Job job = Job.getInstance(conf);/*w w w .  ja v  a 2s .co m*/
    FileInputFormat.setInputPaths(job, file);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[excel2013test.xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[excel2013test.xlsx]Sheet1!A1\"");
    assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(),
            "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
    assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(),
            "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 4 == \"test4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 2 with cell 1 == \"4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns");
    assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\"");
    assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"5\"");
    assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null");
    assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null");
    assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 5 == \"null\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 4 with cell 1 == \"1\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 1 == \"2\"");
    assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 2== \"6\"");
    assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 3== \"10\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 1 == \"3\"");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 2== \"4\"");
    assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 3== \"15\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void readExcelInputFormatExcel2013MultiSheetAllLowFootPrint() throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "excel2013testmultisheet.xlsx";
    String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
    Path file = new Path(fileNameSpreadSheet);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.read.lowFootprint", "true");
    conf.set("hadoopoffice.read.lowFootprint.parser", "sax");
    Job job = Job.getInstance(conf);// ww w .j  ava 2  s .c o  m
    FileInputFormat.setInputPaths(job, file);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[excel2013testmultisheet.xlsx]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[excel2013testmultisheet.xlsx]Sheet1!A1\"");
    assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(),
            "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
    assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(),
            "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
    assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 4 == \"test4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 2 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 2 with cell 1 == \"4\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 3 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns");
    assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\"");
    assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 2 == \"5\"");
    assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null");
    assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null");
    assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(),
            "Input Split for Excel file contains row 3 with cell 5 == \"null\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 4 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column");
    assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 4 with cell 1 == \"1\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 5 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns");
    assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 1 == \"2\"");
    assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 2== \"6\"");
    assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 5 with cell 3== \"10\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 6 (first sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns");
    assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 1 == \"3\"");
    assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 2== \"4\"");
    assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 6 with cell 3== \"15\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 7 (second sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("8", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 7 with cell 1 == \"8\"");
    assertEquals("99", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 7 with cell 2 == \"99\"");
    assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 7 with 2 columns");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 8 (second sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 8 with 1 column");
    assertEquals("test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 8 with cell 1 == \"test\"");
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 9 (second sheet)");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 9 with 3 columns");
    assertNull(spreadSheetValue.get()[0], "Input Split for Excel file contains row 9 with cell 1 == null");
    assertNull(spreadSheetValue.get()[1], "Input Split for Excel file contains row 9 with cell 2 == null");
    assertEquals("seven", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 9 with cell 3 == \"seven\"");
}

From source file:org.zuinnote.hadoop.office.format.mapreduce.OfficeFormatHadoopExcelLowFootPrintSAXTest.java

License:Apache License

@Test
public void readExcelInputFormatExcel2003SingleSheetEncryptedPositiveLowFootprint()
        throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "excel2003encrypt.xls";
    String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
    Path file = new Path(fileNameSpreadSheet);

    // set locale to the one of the test data
    conf.set("hadoopoffice.read.locale.bcp47", "de");

    // low footprint
    conf.set("hadoopoffice.read.lowFootprint", "true");

    // for decryption simply set the password
    conf.set("hadoopoffice.read.security.crypt.password", "test");
    Job job = Job.getInstance(conf);//from   w  ww .j  a va2 s  .com
    FileInputFormat.setInputPaths(job, file);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    ExcelFileInputFormat format = new ExcelFileInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size(), "Only one split generated for Excel file");
    RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
    assertNotNull(reader, "Format returned  null RecordReader");
    reader.initialize(splits.get(0), context);
    Text spreadSheetKey = new Text();
    ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
    assertTrue(reader.nextKeyValue(), "Input Split for Excel file contains row 1");
    spreadSheetKey = reader.getCurrentKey();
    spreadSheetValue = reader.getCurrentValue();
    assertEquals("[excel2003encrypt.xls]Sheet1!A1", spreadSheetKey.toString(),
            "Input Split for Excel file has keyname == \"[excel2003encrypt.xls]Sheet1!A1\"");
    assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
    assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
    assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(),
            "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
    assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(),
            "Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
    assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
    assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
            "Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
}