List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReaderListener POIFSReaderListener
POIFSReaderListener
From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test public void testZippedOffice() throws Exception { new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }/* w w w.j a va 2 s. c o m*/ }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); } }; new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { WordExtractor extractor = null; try { extractor = new WordExtractor(in); extractor.getText(); } finally { Closeables.closeQuietly(extractor); } } }; }
From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test(expected = IOException.class) public void testZippedOfficeFail() throws Exception { // This will fail because we're trying to read the same InputStream twice new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }/*from w w w. j a va 2 s.co m*/ }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); WordExtractor extractor = null; try { extractor = new WordExtractor(in); extractor.getText(); } finally { Closeables.closeQuietly(extractor); } } }; }
From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test public void testZippedOffice() throws Exception { new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }//from www . j a va 2 s .co m }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); } }; new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { WordExtractor extractor = new WordExtractor(in); extractor.getText(); } }; }
From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java
License:Open Source License
@Test(expected = IOException.class) public void testZippedOfficeFail() throws Exception { // This will fail because we're trying to read the same InputStream twice new ZipAndRun(TestFiles.doc) { protected void handleInputStream(InputStream in) throws Exception { POIFSReader reader = new POIFSReader(); reader.registerListener(new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { // Nothing }//from w w w . j av a 2 s.c o m }, "\005SummaryInformation"); //$NON-NLS-1$ reader.read(in); WordExtractor extractor = new WordExtractor(in); extractor.getText(); } }; }
From source file:org.exoplatform.services.document.impl.POIPropertiesReader.java
License:Open Source License
/** * Metadata extraction from OLE2 documents (legacy MS office file formats) * /*from w w w . ja v a 2 s .c o m*/ * @param is * @return * @throws IOException * @throws DocumentReadException */ public Properties readDCProperties(final InputStream is) throws IOException, DocumentReadException { if (is == null) { throw new IllegalArgumentException("InputStream is null."); } @SuppressWarnings("serial") class POIRuntimeException extends RuntimeException { private Throwable ex; public POIRuntimeException(Throwable ex) { this.ex = ex; } public Throwable getException() { return ex; } } POIFSReaderListener readerListener = new POIFSReaderListener() { public void processPOIFSReaderEvent(final POIFSReaderEvent event) { PropertySet ps; try { ps = PropertySetFactory.create(event.getStream()); if (ps instanceof SummaryInformation) { SummaryInformation si = (SummaryInformation) ps; if (si.getLastAuthor() != null && si.getLastAuthor().length() > 0) { props.put(DCMetaData.CONTRIBUTOR, si.getLastAuthor()); } if (si.getComments() != null && si.getComments().length() > 0) { props.put(DCMetaData.DESCRIPTION, si.getComments()); } if (si.getCreateDateTime() != null) { props.put(DCMetaData.DATE, si.getCreateDateTime()); } if (si.getAuthor() != null && si.getAuthor().length() > 0) { props.put(DCMetaData.CREATOR, si.getAuthor()); } if (si.getKeywords() != null && si.getKeywords().length() > 0) { props.put(DCMetaData.SUBJECT, si.getKeywords()); } if (si.getLastSaveDateTime() != null) { props.put(DCMetaData.DATE, si.getLastSaveDateTime()); } // if(docInfo.getProducer() != null) // props.put(DCMetaData.PUBLISHER, docInfo.getProducer()); if (si.getSubject() != null && si.getSubject().length() > 0) { props.put(DCMetaData.SUBJECT, si.getSubject()); } if (si.getTitle() != null && si.getTitle().length() > 0) { props.put(DCMetaData.TITLE, si.getTitle()); } } } catch (NoPropertySetStreamException e) { throw new POIRuntimeException(new DocumentReadException(e.getMessage(), e)); } catch (MarkUnsupportedException e) { throw new POIRuntimeException(new DocumentReadException(e.getMessage(), e)); } catch (UnsupportedEncodingException e) { throw new POIRuntimeException(new DocumentReadException(e.getMessage(), e)); } catch (IOException e) { throw new POIRuntimeException(e); } } }; try { final POIFSReader poiFSReader = new POIFSReader(); poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME); SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { poiFSReader.read(is); return null; } }); } catch (POIRuntimeException e) { Throwable ex = e.getException(); if (ex instanceof IOException) { throw (IOException) ex; } else { throw (DocumentReadException) ex; } } finally { if (is != null) { try { is.close(); } catch (IOException e) { if (LOG.isTraceEnabled()) { LOG.trace("An exception occurred: " + e.getMessage()); } } } } return props; }
From source file:org.sakaiproject.search.component.adapter.contenthosting.PPTContentDigester.java
License:Educational Community License
public String getContent(ContentResource contentResource) { if (contentResource == null) { throw new RuntimeException("Null contentResource passed to getContent"); }//w ww. ja v a 2 s . com InputStream contentStream = null; try { // this is informed by the text extractors in Jackrabbit final ByteArrayOutputStream os = new ByteArrayOutputStream(); POIFSReaderListener listener = new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { try { if (!event.getName().equalsIgnoreCase("PowerPoint Document")) { return; } DocumentInputStream input = event.getStream(); byte[] buffer = new byte[input.available()]; input.read(buffer, 0, input.available()); for (int i = 0; i < buffer.length - 20; i++) { long type = LittleEndian.getUShort(buffer, i + 2); long size = LittleEndian.getUInt(buffer, i + 4); if (type == 4008) { os.write(buffer, i + 4 + 1, (int) size + 3); i = i + 4 + 1 + (int) size - 1; } } } catch (Exception e) { log.debug(e); } } }; POIFSReader reader = new POIFSReader(); reader.registerListener(listener); contentStream = contentResource.streamContent(); reader.read(contentStream); os.flush(); StringBuilder sb = new StringBuilder(); SearchUtils.appendCleanString(new String(os.toByteArray(), "UTF-8"), sb); return sb.toString(); } catch (Exception e) { throw new RuntimeException("Failed to read content for indexing ", e); } finally { if (contentStream != null) { try { contentStream.close(); } catch (IOException e) { log.debug(e); } } } }