Example usage for org.apache.poi.poifs.eventfilesystem POIFSReaderListener POIFSReaderListener

List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReaderListener POIFSReaderListener

Introduction

In this page you can find the example usage for org.apache.poi.poifs.eventfilesystem POIFSReaderListener POIFSReaderListener.

Prototype

POIFSReaderListener

Source Link

Usage

From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test
public void testZippedOffice() throws Exception {
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }/* w  w w.j  a va 2 s. c o m*/
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
        }
    };
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            WordExtractor extractor = null;
            try {
                extractor = new WordExtractor(in);
                extractor.getText();
            } finally {
                Closeables.closeQuietly(extractor);
            }
        }
    };
}

From source file:net.sourceforge.docfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test(expected = IOException.class)
public void testZippedOfficeFail() throws Exception {
    // This will fail because we're trying to read the same InputStream twice
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }/*from w w  w.  j a va  2  s.co m*/
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
            WordExtractor extractor = null;
            try {
                extractor = new WordExtractor(in);
                extractor.getText();
            } finally {
                Closeables.closeQuietly(extractor);
            }
        }
    };
}

From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test
public void testZippedOffice() throws Exception {
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }//from  www .  j a va 2  s .co  m
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
        }
    };
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            WordExtractor extractor = new WordExtractor(in);
            extractor.getText();
        }
    };
}

From source file:net.sourceforge.vaticanfetcher.model.parse.TestParseFromZip.java

License:Open Source License

@Test(expected = IOException.class)
public void testZippedOfficeFail() throws Exception {
    // This will fail because we're trying to read the same InputStream twice
    new ZipAndRun(TestFiles.doc) {
        protected void handleInputStream(InputStream in) throws Exception {
            POIFSReader reader = new POIFSReader();
            reader.registerListener(new POIFSReaderListener() {
                public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                    // Nothing
                }//from   w w  w  . j  av a  2 s.c  o m
            }, "\005SummaryInformation"); //$NON-NLS-1$
            reader.read(in);
            WordExtractor extractor = new WordExtractor(in);
            extractor.getText();
        }
    };
}

From source file:org.exoplatform.services.document.impl.POIPropertiesReader.java

License:Open Source License

/**
 * Metadata extraction from OLE2 documents (legacy MS office file formats)
 * /*from   w  w w  . ja v a  2 s  .c  o m*/
 * @param is
 * @return
 * @throws IOException
 * @throws DocumentReadException
 */
public Properties readDCProperties(final InputStream is) throws IOException, DocumentReadException {
    if (is == null) {
        throw new IllegalArgumentException("InputStream is null.");
    }

    @SuppressWarnings("serial")
    class POIRuntimeException extends RuntimeException {
        private Throwable ex;

        public POIRuntimeException(Throwable ex) {
            this.ex = ex;
        }

        public Throwable getException() {
            return ex;
        }
    }

    POIFSReaderListener readerListener = new POIFSReaderListener() {
        public void processPOIFSReaderEvent(final POIFSReaderEvent event) {

            PropertySet ps;
            try {
                ps = PropertySetFactory.create(event.getStream());

                if (ps instanceof SummaryInformation) {
                    SummaryInformation si = (SummaryInformation) ps;

                    if (si.getLastAuthor() != null && si.getLastAuthor().length() > 0) {
                        props.put(DCMetaData.CONTRIBUTOR, si.getLastAuthor());
                    }
                    if (si.getComments() != null && si.getComments().length() > 0) {
                        props.put(DCMetaData.DESCRIPTION, si.getComments());
                    }
                    if (si.getCreateDateTime() != null) {
                        props.put(DCMetaData.DATE, si.getCreateDateTime());
                    }
                    if (si.getAuthor() != null && si.getAuthor().length() > 0) {
                        props.put(DCMetaData.CREATOR, si.getAuthor());
                    }
                    if (si.getKeywords() != null && si.getKeywords().length() > 0) {
                        props.put(DCMetaData.SUBJECT, si.getKeywords());
                    }
                    if (si.getLastSaveDateTime() != null) {
                        props.put(DCMetaData.DATE, si.getLastSaveDateTime());
                    }
                    // if(docInfo.getProducer() != null)
                    // props.put(DCMetaData.PUBLISHER, docInfo.getProducer());
                    if (si.getSubject() != null && si.getSubject().length() > 0) {
                        props.put(DCMetaData.SUBJECT, si.getSubject());
                    }
                    if (si.getTitle() != null && si.getTitle().length() > 0) {
                        props.put(DCMetaData.TITLE, si.getTitle());
                    }

                }
            } catch (NoPropertySetStreamException e) {
                throw new POIRuntimeException(new DocumentReadException(e.getMessage(), e));
            } catch (MarkUnsupportedException e) {
                throw new POIRuntimeException(new DocumentReadException(e.getMessage(), e));
            } catch (UnsupportedEncodingException e) {
                throw new POIRuntimeException(new DocumentReadException(e.getMessage(), e));
            } catch (IOException e) {
                throw new POIRuntimeException(e);
            }
        }
    };

    try {
        final POIFSReader poiFSReader = new POIFSReader();
        poiFSReader.registerListener(readerListener, SummaryInformation.DEFAULT_STREAM_NAME);
        SecurityHelper.doPrivilegedIOExceptionAction(new PrivilegedExceptionAction<Void>() {
            public Void run() throws Exception {
                poiFSReader.read(is);
                return null;
            }
        });
    } catch (POIRuntimeException e) {
        Throwable ex = e.getException();
        if (ex instanceof IOException) {
            throw (IOException) ex;
        } else {
            throw (DocumentReadException) ex;
        }
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("An exception occurred: " + e.getMessage());
                }
            }
        }
    }

    return props;
}

From source file:org.sakaiproject.search.component.adapter.contenthosting.PPTContentDigester.java

License:Educational Community License

public String getContent(ContentResource contentResource) {
    if (contentResource == null) {
        throw new RuntimeException("Null contentResource passed to getContent");
    }//w  ww.  ja  v a 2 s  .  com
    InputStream contentStream = null;

    try {
        // this is informed by the text extractors in Jackrabbit

        final ByteArrayOutputStream os = new ByteArrayOutputStream();

        POIFSReaderListener listener = new POIFSReaderListener() {
            public void processPOIFSReaderEvent(POIFSReaderEvent event) {
                try {
                    if (!event.getName().equalsIgnoreCase("PowerPoint Document")) {
                        return;
                    }
                    DocumentInputStream input = event.getStream();
                    byte[] buffer = new byte[input.available()];
                    input.read(buffer, 0, input.available());
                    for (int i = 0; i < buffer.length - 20; i++) {
                        long type = LittleEndian.getUShort(buffer, i + 2);
                        long size = LittleEndian.getUInt(buffer, i + 4);
                        if (type == 4008) {
                            os.write(buffer, i + 4 + 1, (int) size + 3);
                            i = i + 4 + 1 + (int) size - 1;
                        }
                    }
                } catch (Exception e) {
                    log.debug(e);
                }
            }
        };

        POIFSReader reader = new POIFSReader();
        reader.registerListener(listener);
        contentStream = contentResource.streamContent();
        reader.read(contentStream);
        os.flush();
        StringBuilder sb = new StringBuilder();
        SearchUtils.appendCleanString(new String(os.toByteArray(), "UTF-8"), sb);
        return sb.toString();
    } catch (Exception e) {
        throw new RuntimeException("Failed to read content for indexing ", e);
    } finally {
        if (contentStream != null) {
            try {
                contentStream.close();
            } catch (IOException e) {
                log.debug(e);
            }
        }
    }
}