List of usage examples for org.apache.poi POIDataSamples getDocumentInstance
public static POIDataSamples getDocumentInstance()
From source file:NewEmptyJUnitTest.java
protected void setUp() throws Exception { String filename = "test2.doc"; String filename2 = "test.doc"; filename3 = "excel_with_embeded.xls"; filename4 = "ThreeColHeadFoot.doc"; filename5 = "HeaderFooterUnicode.doc"; filename6 = "footnote.doc"; POIDataSamples docTests = POIDataSamples.getDocumentInstance(); extractor = new WordExtractor(docTests.openResourceAsStream(filename)); extractor2 = new WordExtractor(docTests.openResourceAsStream(filename2)); // Build splat'd out text version for (int i = 0; i < p_text1.length; i++) { p_text1_block += p_text1[i];/*from w ww.j a v a 2s .c o m*/ } }
From source file:NewEmptyJUnitTest.java
public void testWord95() throws Exception { // Too old for the default try {//from w w w.j av a2s . com extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("Word95.doc")); fail(); } catch (OldWordFileFormatException e) { } // Can work with the special one Word6Extractor w6e = new Word6Extractor( POIDataSamples.getDocumentInstance().openResourceAsStream("Word95.doc")); String text = w6e.getText(); assertTrue(text.contains("The quick brown fox jumps over the lazy dog")); assertTrue(text.contains("Paragraph 2")); assertTrue(text.contains("Paragraph 3. Has some RED text and some BLUE BOLD text in it")); assertTrue(text.contains("Last (4th) paragraph")); String[] tp = w6e.getParagraphText(); assertEquals(7, tp.length); assertEquals("The quick brown fox jumps over the lazy dog\r\n", tp[0]); assertEquals("\r\n", tp[1]); assertEquals("Paragraph 2\r\n", tp[2]); assertEquals("\r\n", tp[3]); assertEquals("Paragraph 3. Has some RED text and some BLUE BOLD text in it.\r\n", tp[4]); assertEquals("\r\n", tp[5]); assertEquals("Last (4th) paragraph.\r\n", tp[6]); }
From source file:NewEmptyJUnitTest.java
public void testWord6() throws Exception { // Too old for the default try {/*from w w w . j av a 2 s .c om*/ extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("Word6.doc")); fail(); } catch (OldWordFileFormatException e) { } Word6Extractor w6e = new Word6Extractor( POIDataSamples.getDocumentInstance().openResourceAsStream("Word6.doc")); String text = w6e.getText(); assertTrue(text.contains("The quick brown fox jumps over the lazy dog")); String[] tp = w6e.getParagraphText(); assertEquals(1, tp.length); assertEquals("The quick brown fox jumps over the lazy dog\r\n", tp[0]); }
From source file:NewEmptyJUnitTest.java
public void testFastSaved() throws Exception { extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("rasp.doc")); String text = extractor.getText(); assertTrue(text.contains("\u0425\u0425\u0425\u0425\u0425")); assertTrue(text.contains("\u0423\u0423\u0423\u0423\u0423")); }
From source file:NewEmptyJUnitTest.java
public void testFirstParagraphFix() throws Exception { extractor = new WordExtractor(POIDataSamples.getDocumentInstance().openResourceAsStream("Bug48075.doc")); String text = extractor.getText(); assertTrue(text.startsWith("\u041f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435")); }
From source file:NewEmptyJUnitTest.java
/** * Tests that we can work with both {@link POIFSFileSystem} * and {@link NPOIFSFileSystem}//from w w w .j a va 2 s . co m */ public void testDifferentPOIFS() throws Exception { POIDataSamples docTests = POIDataSamples.getDocumentInstance(); // Open the two filesystems DirectoryNode[] files = new DirectoryNode[2]; files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot(); NPOIFSFileSystem npoifsFileSystem = new NPOIFSFileSystem(docTests.getFile("test2.doc")); files[1] = npoifsFileSystem.getRoot(); // Open directly for (DirectoryNode dir : files) { WordExtractor extractor = new WordExtractor(dir); assertEquals(p_text1_block, extractor.getText()); } // Open via a HWPFDocument for (DirectoryNode dir : files) { HWPFDocument doc = new HWPFDocument(dir); WordExtractor extractor = new WordExtractor(doc); assertEquals(p_text1_block, extractor.getText()); } npoifsFileSystem.close(); }
From source file:NewEmptyJUnitTest.java
/** * [RESOLVED FIXED] Bug 51686 - Update to POI 3.8 beta 4 causes * ConcurrentModificationException in Tika's OfficeParser *///from w ww . j a v a 2 s. c o m public void testBug51686() throws IOException { InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug51686.doc"); POIFSFileSystem fs = new POIFSFileSystem(is); String text = null; for (Entry entry : fs.getRoot()) { if ("WordDocument".equals(entry.getName())) { WordExtractor ex = new WordExtractor(fs); try { text = ex.getText(); } finally { ex.close(); } } } assertNotNull(text); }