List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReader registerListener
public void registerListener(final POIFSReaderListener listener, final String name)
From source file:com.flexive.extractor.ExcelExtractor.java
License:Open Source License
/** * Extracts the text informations from the excel file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible *///from w w w .j a va 2 s. c om public ExtractedData extract(final InputStream in) { BufferedInputStream bis = null; try { writer = new ByteArrayOutputStream(); // We need to read the stream 2 times, so we use a buffered input stream and mark the // beginning bis = new BufferedInputStream(in); bis.mark(Integer.MAX_VALUE); // Retrieve summary information POIFSReader r = new POIFSReader(); r.registerListener(this, "\005SummaryInformation"); r.read(bis); bis.reset(); // Retrieve text by processing all sheets HSSFWorkbook wb = new HSSFWorkbook(bis); for (int i = 0; i < wb.getNumberOfSheets(); i++) { HSSFSheet sheet = wb.getSheetAt(i); processSheet(sheet); } // Append summary info to text if (fxsi != null) { writer.write(FxSharedUtils.getBytes(fxsi.getFTIndexInformations())); } writer.flush(); return new ExtractedData(fxsi, writer.toString()); } catch (Exception exc) { exc.printStackTrace(); return null; } finally { try { if (writer != null) writer.close(); } catch (Exception exc) { /*ignore*/} try { if (bis != null) bis.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.flexive.extractor.FxSummaryInformation.java
License:Open Source License
/** * Reads the summary information from a document. * * @param input the input stream to read from, will not be closed at the end * @return the summary information//from w ww.jav a2 s . co m */ public static FxSummaryInformation getSummaryInformation(InputStream input) { class SummaryStore implements POIFSReaderListener { private FxSummaryInformation fxsi = null; /** * Processes the Summary section. * * @param event the summary section event. */ @Override public void processPOIFSReaderEvent(POIFSReaderEvent event) { try { SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream()); fxsi = new FxSummaryInformation(si); } catch (Exception ex) { /* ignore */ } } protected FxSummaryInformation getFxSummaryInformation() { return fxsi; } } try { POIFSReader reader = new POIFSReader(); SummaryStore st = new SummaryStore(); reader.registerListener(st, "\005SummaryInformation"); reader.read(input); return st.getFxSummaryInformation(); } catch (Exception ex) { return null; } }
From source file:com.flexive.extractor.WordExtractor.java
License:Open Source License
/** * Extracts the text informations from the word file. * * @param in the input stream to read from * @return the extraxted informations, or null if no text extraction was possible *//*from w w w . j av a 2 s . co m*/ public ExtractedData extract(final InputStream in) { ByteArrayOutputStream baos = null; PrintWriter writer = null; BufferedInputStream bis = null; try { baos = new ByteArrayOutputStream(); writer = new PrintWriter(baos); // We need to read the stream 2 times, so we use a buffered input stream and mark the // beginning bis = new BufferedInputStream(in); bis.mark(Integer.MAX_VALUE); // Retrieve summary information POIFSReader r = new POIFSReader(); r.registerListener(this, "\005SummaryInformation"); r.read(bis); bis.reset(); // Retrieve text WordDocument wd = new WordDocument(bis); wd.writeAllText(writer); if (fxsi != null) { writer.write(fxsi.getFTIndexInformations()); } writer.flush(); return new ExtractedData(fxsi, baos.toString()); } catch (Exception exc) { return null; } finally { try { if (writer != null) writer.close(); } catch (Exception exc) { /*ignore*/} try { if (baos != null) baos.close(); } catch (Exception exc) { /*ignore*/} try { if (bis != null) bis.close(); } catch (Exception exc) { /*ignore*/} } }
From source file:com.krawler.esp.fileparser.word.ExtractWordFile.java
License:Open Source License
public Properties extractProperties(InputStream in) throws IOException { PropertiesBroker propertiesBroker = new PropertiesBroker(); POIFSReader reader = new POIFSReader(); reader.registerListener(new PropertiesReaderListener(propertiesBroker), "\005SummaryInformation"); reader.read(in);/*from www . ja va 2s . com*/ return propertiesBroker.getProperties(); }
From source file:net.sourceforge.docfetcher.model.parse.MSOfficeParser.java
License:Open Source License
protected final ParseResult parse(File file, ParseContext context) throws ParseException { String contents = renderText(file, context.getFilename()); ParseResult parseResult = new ParseResult(contents); POIFSReader reader = new POIFSReader(); MyReaderListener listener = new MyReaderListener(); reader.registerListener(listener, "\005SummaryInformation"); //$NON-NLS-1$ InputStream in = null;/*from w w w . ja v a 2s .c o m*/ try { in = new FileInputStream(file); try { reader.read(in); } catch (IllegalArgumentException e) { // Bug #3537738: "IllegalArgumentException: name cannot be empty" throw new ParseException(e); } parseResult.setTitle(listener.title).addAuthor(listener.author).addMiscMetadata(listener.subject) .addMiscMetadata(listener.keywords).addMiscMetadata(listener.comments); } catch (IOException e) { // Ignore, we can live without meta data Util.printErr(e); } finally { Closeables.closeQuietly(in); } return parseResult; }
From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test public void testZippedOffice() throws Exception { new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }/*from w ww.ja v a 2 s . c om*/ }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); } }; new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { WordExtractor extractor = null; try { extractor = new WordExtractor(in); extractor.getText(); } finally { Closeables.closeQuietly(extractor); } } }; }
From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test(expected = IOException.class) public void testZippedOfficeFail() throws Exception { // This will fail because we're trying to read the same InputStream twice new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }//from w w w.j a v a 2s.com }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); WordExtractor extractor = null; try { extractor = new WordExtractor(in); extractor.getText(); } finally { Closeables.closeQuietly(extractor); } } }; }
From source file:net.sourceforge.docfetcher.parse.MSOfficeParser.java
License:Open Source License
public Document parse(File file) throws ParseException { StringBuffer contents = new StringBuffer(renderText(file)); POIFSReader reader = new POIFSReader(); MyReaderListener listener = new MyReaderListener(); reader.registerListener(listener, "\005SummaryInformation"); //$NON-NLS-1$ try {// ww w .jav a2s . c om InputStream in = new FileInputStream(file); reader.read(in); in.close(); } catch (FileNotFoundException e) { throw new ParseException(file, Msg.file_not_found.value()); } catch (IOException e) { throw new ParseException(file, Msg.file_not_readable.value()); } String[] metaData = new String[] { listener.author, listener.title, listener.subject, listener.keywords, listener.comments }; for (String field : metaData) if (field != null) contents.append(" ").append(field); //$NON-NLS-1$ return new Document(file, listener.title, contents).addAuthor(listener.author); }
From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test public void testZippedOffice() throws Exception { new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }// w w w. java 2 s . c om }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); } }; new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { WordExtractor extractor = new WordExtractor(in); extractor.getText(); } }; }
From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test(expected = IOException.class) public void testZippedOfficeFail() throws Exception { // This will fail because we're trying to read the same InputStream twice new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }/* w w w.ja v a2s . c o m*/ }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); WordExtractor extractor = new WordExtractor(in); extractor.getText(); } }; }