Example usage for org.apache.poi POIDataSamples getDocumentInstance

List of usage examples for org.apache.poi POIDataSamples getDocumentInstance

Introduction

In this page you can find the example usage for org.apache.poi POIDataSamples getDocumentInstance.

Prototype

public static POIDataSamples getDocumentInstance() 

Source Link

Usage

From source file:NewEmptyJUnitTest.java

protected void setUp() throws Exception {

    String filename = "test2.doc";
    String filename2 = "test.doc";
    filename3 = "excel_with_embeded.xls";
    filename4 = "ThreeColHeadFoot.doc";
    filename5 = "HeaderFooterUnicode.doc";
    filename6 = "footnote.doc";
    POIDataSamples docTests = POIDataSamples.getDocumentInstance();
    extractor = new WordExtractor(docTests.openResourceAsStream(filename));
    extractor2 = new WordExtractor(docTests.openResourceAsStream(filename2));

    // Build splat'd out text version
    for (int i = 0; i < p_text1.length; i++) {
        p_text1_block += p_text1[i];/*from w  ww.j  a  v  a 2s .c  o  m*/
    }
}

From source file:NewEmptyJUnitTest.java

public void testWord95() throws Exception {
    // Too old for the default
    try {//from  w w w.j  av  a2s . com
        extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("Word95.doc"));
        fail();
    } catch (OldWordFileFormatException e) {
    }

    // Can work with the special one
    Word6Extractor w6e = new Word6Extractor(
            POIDataSamples.getDocumentInstance().openResourceAsStream("Word95.doc"));
    String text = w6e.getText();

    assertTrue(text.contains("The quick brown fox jumps over the lazy dog"));
    assertTrue(text.contains("Paragraph 2"));
    assertTrue(text.contains("Paragraph 3. Has some RED text and some BLUE BOLD text in it"));
    assertTrue(text.contains("Last (4th) paragraph"));

    String[] tp = w6e.getParagraphText();
    assertEquals(7, tp.length);
    assertEquals("The quick brown fox jumps over the lazy dog\r\n", tp[0]);
    assertEquals("\r\n", tp[1]);
    assertEquals("Paragraph 2\r\n", tp[2]);
    assertEquals("\r\n", tp[3]);
    assertEquals("Paragraph 3. Has some RED text and some BLUE BOLD text in it.\r\n", tp[4]);
    assertEquals("\r\n", tp[5]);
    assertEquals("Last (4th) paragraph.\r\n", tp[6]);
}

From source file:NewEmptyJUnitTest.java

public void testWord6() throws Exception {
    // Too old for the default
    try {/*from   w  w  w  . j av  a  2 s  .c om*/
        extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("Word6.doc"));
        fail();
    } catch (OldWordFileFormatException e) {
    }

    Word6Extractor w6e = new Word6Extractor(
            POIDataSamples.getDocumentInstance().openResourceAsStream("Word6.doc"));
    String text = w6e.getText();

    assertTrue(text.contains("The quick brown fox jumps over the lazy dog"));

    String[] tp = w6e.getParagraphText();
    assertEquals(1, tp.length);
    assertEquals("The quick brown fox jumps over the lazy dog\r\n", tp[0]);
}

From source file:NewEmptyJUnitTest.java

public void testFastSaved() throws Exception {
    extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("rasp.doc"));

    String text = extractor.getText();
    assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425"));
    assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423"));
}

From source file:NewEmptyJUnitTest.java

public void testFirstParagraphFix() throws Exception {
    extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("Bug48075.doc"));

    String text = extractor.getText();

    assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435"));
}

From source file:NewEmptyJUnitTest.java

/**
 * Tests that we can work with both {@link POIFSFileSystem}
 *  and {@link NPOIFSFileSystem}//from w w  w  .j a va  2 s  .  co  m
 */
public void testDifferentPOIFS() throws Exception {
    POIDataSamples docTests = POIDataSamples.getDocumentInstance();

    // Open the two filesystems
    DirectoryNode[] files = new DirectoryNode[2];
    files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot();
    NPOIFSFileSystem npoifsFileSystem = new NPOIFSFileSystem(docTests.getFile("test2.doc"));
    files[1] = npoifsFileSystem.getRoot();

    // Open directly 
    for (DirectoryNode dir : files) {
        WordExtractor extractor = new WordExtractor(dir);
        assertEquals(p_text1_block, extractor.getText());
    }

    // Open via a HWPFDocument
    for (DirectoryNode dir : files) {
        HWPFDocument doc = new HWPFDocument(dir);
        WordExtractor extractor = new WordExtractor(doc);
        assertEquals(p_text1_block, extractor.getText());
    }

    npoifsFileSystem.close();
}

From source file:NewEmptyJUnitTest.java

/**
 * [RESOLVED FIXED] Bug 51686 - Update to POI 3.8 beta 4 causes
 * ConcurrentModificationException in Tika's OfficeParser
 *///from   w  ww . j a v  a 2  s. c o  m
public void testBug51686() throws IOException {
    InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug51686.doc");

    POIFSFileSystem fs = new POIFSFileSystem(is);

    String text = null;

    for (Entry entry : fs.getRoot()) {
        if ("WordDocument".equals(entry.getName())) {
            WordExtractor ex = new WordExtractor(fs);
            try {
                text = ex.getText();
            } finally {
                ex.close();
            }
        }
    }

    assertNotNull(text);
}