Example usage for org.apache.poi.poifs.eventfilesystem POIFSReaderEvent getName

List of usage examples for org.apache.poi.poifs.eventfilesystem POIFSReaderEvent getName

Introduction

In this page you can find the example usage for org.apache.poi.poifs.eventfilesystem POIFSReaderEvent getName.

Prototype


public String getName() 

Source Link

Usage

From source file:com.duroty.lucene.parser.MSPowerPointParser.java

License:Open Source License

/**
 * DOCUMENT ME!// www  .  ja  v a2 s. com
 *
 * @param event DOCUMENT ME!
 */
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {
        if (!event.getName().equalsIgnoreCase("PowerPoint Document")) {
            return;
        }

        DocumentInputStream input = event.getStream();

        byte[] buffer = new byte[input.available()];
        input.read(buffer, 0, input.available());

        byte[] espace = new String("\n\n").getBytes();

        for (int i = 0; i < (buffer.length - 20); i++) {
            long type = LittleEndian.getUShort(buffer, i + 2);
            long size = LittleEndian.getUInt(buffer, i + 4);

            if (type == 4008) {
                writer.write(buffer, i + 4 + 1, (int) size + 3);
                writer.write(espace);
                i = (i + 4 + 1 + (int) size) - 1;
            }

            /*if (sleep > 0) {
                try {
                    Thread.sleep(sleep);
                } catch (Exception ex) {
                }
            }*/
        }
    } catch (Exception ex) {
    }
}

From source file:com.flexive.extractor.PowerpointExtractor.java

License:Open Source License

@Override
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {/*from  www  .j a v a 2  s. c  o m*/
        if (event.getName().equalsIgnoreCase("PowerPoint Document")) {
            DocumentInputStream input = event.getStream();
            byte[] buffer = new byte[input.available()];
            //noinspection ResultOfMethodCallIgnored
            input.read(buffer, 0, input.available());
            processContent(buffer, 0, buffer.length);
        } else if (event.getName().equals("\005SummaryInformation")) {
            SummaryInformation si = (SummaryInformation) PropertySetFactory.create(event.getStream());
            fxsi = new FxSummaryInformation(si);
        }
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:com.frameworkset.platform.cms.searchmanager.extractors.A_CmsTextExtractorMsOfficeBase.java

License:Open Source License

/**
 * @see org.apache.poi.poifs.eventfilesystem.POIFSReaderListener#processPOIFSReaderEvent(org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent)
 */// w  ww .  j a v  a 2s.c o  m
public void processPOIFSReaderEvent(POIFSReaderEvent event) {

    try {
        if ((m_summary == null) && event.getName().startsWith(SummaryInformation.DEFAULT_STREAM_NAME)) {
            m_summary = (SummaryInformation) PropertySetFactory.create(event.getStream());
            return;
        }
        if ((m_documentSummary == null)
                && event.getName().startsWith(DocumentSummaryInformation.DEFAULT_STREAM_NAME)) {
            m_documentSummary = (DocumentSummaryInformation) PropertySetFactory.create(event.getStream());
            return;
        }
    } catch (Exception e) {
        // ignore
    }
}

From source file:com.knowgate.ole.OLEListener.java

License:Open Source License

public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {// ww  w . ja  v  a  2s.  c  om
        si = (SummaryInformation) PropertySetFactory.create(event.getStream());
    } catch (MarkUnsupportedException ex) {
        if (DebugFile.trace)
            DebugFile.writeln("com.knowgate.ole.OLEListener MarkUnsupportedException " + event.getPath()
                    + event.getName() + " " + ex.getMessage());
    } catch (NoPropertySetStreamException ex) {
        if (DebugFile.trace)
            DebugFile.writeln("com.knowgate.ole.OLEListener NoPropertySetStreamException " + event.getPath()
                    + event.getName() + " " + ex.getMessage());
    } catch (IOException ex) {
        if (DebugFile.trace)
            DebugFile.writeln("com.knowgate.ole.OLEListener IOException " + event.getPath() + event.getName()
                    + " " + ex.getMessage());
    }
}

From source file:com.villemos.ispace.aperture.enricher.MicrosoftPropertyReader.java

License:Open Source License

public void processPOIFSReaderEvent(final POIFSReaderEvent event) {
    PropertySet ps = null;/*from  w ww . j a  v a2  s.com*/
    try {
        ps = PropertySetFactory.create(event.getStream());
    } catch (NoPropertySetStreamException ex) {
        LOG.debug("No property set stream: \"" + event.getPath() + event.getName() + "\"");
        return;
    } catch (Exception ex) {
        LOG.error("Exception while processing microsoft property set " + ex);
    }

    /* Print the name of the property set stream: */
    LOG.debug("Property set stream \"" + event.getPath() + event.getName() + "\":");

    /* Print the list of sections: */
    List<Section> sections = ps.getSections();
    int nr = 0;
    for (Section sec : sections) {
        String s = HexDump.dump(sec.getFormatID().getBytes(), 0L, 0);
        s = s.substring(0, s.length() - 1);
        /* Print the number of properties in this section. */
        int propertyCount = sec.getPropertyCount();
        /* Print the properties: */
        Property[] properties = sec.getProperties();
        for (int i2 = 0; i2 < properties.length; i2++) {
            /* Print a single property: */
            Property p = properties[i2];
            long id = p.getID();
            long type = p.getType();
            Object value = p.getValue();

            String propertyName = sec.getPIDString(id);

            if (msProperties.containsKey(propertyName) == false) {
                String valueStr = value.toString();
                if (valueStr.equals("") == false) {
                    msProperties.put(propertyName, valueStr);
                }
            }
        }
    }
}

From source file:lius.index.powerpoint.PPTIndexer.java

License:Apache License

public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {/*from  w w w .j av a 2s.c o m*/
        if (!event.getName().equalsIgnoreCase("PowerPoint Document"))
            return;
        DocumentInputStream input = event.getStream();
        byte[] buffer = new byte[input.available()];
        input.read(buffer, 0, input.available());
        for (int i = 0; i < buffer.length - 20; i++) {
            long type = LittleEndian.getUShort(buffer, i + 2);
            long size = LittleEndian.getUInt(buffer, i + 4);
            if (type == 4008L) {
                writer.write(buffer, i + 4 + 1, (int) size + 3);
                i = i + 4 + 1 + (int) size - 1;
            }
        }
    } catch (Exception ex) {
        logger.error(ex.getMessage());
    }
}

From source file:org.apache.nutch.parse.mspowerpoint.ContentReaderListener.java

License:Apache License

/**
 * Reads the internal PowerPoint document stream.
 * /*from   ww  w.  j  a  v a  2  s.co m*/
 * @see org.apache.poi.poifs.eventfilesystem.POIFSReaderListener#processPOIFSReaderEvent(org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent)
 */
public void processPOIFSReaderEvent(final POIFSReaderEvent event) {

    if (event == null || event.getName() == null
            || !event.getName().startsWith(PPTConstants.POWERPOINT_DOCUMENT)) {
        if (LOG.isWarnEnabled()) {
            LOG.warn("Stream not processed. It is not a PowerPoint document: : " + event.getName());
        }
        return;
    }

    try {
        final DocumentInputStream dis = event.getStream();
        final byte pptdata[] = new byte[dis.available()];
        dis.read(pptdata, 0, dis.available());
        int offset = 0;
        long offsetPD = 0;

        /*
         * Traverse Bytearray to get CurrentUserEditAtom Call to extract the Text
         * in all PlaceHolders to hold PPTClientTextBox objects for mapping into
         * Slide Objects
         */
        Hashtable/* <Long, TextBox> */ containerTextBox = new Hashtable/*
                                                                        * <Long,
                                                                        * TextBox>
                                                                        */();
        // Traverse ByteArray to identiy edit paths of ClientTextBoxes
        long n = pptdata.length - 20;
        for (long i = 0; i < n; i++) {

            final long type = LittleEndian.getUShort(pptdata, (int) i + 2);
            // final long size = LittleEndian.getUInt(pptdata, (int) i + 4);

            if (PPTConstants.PPT_ATOM_USEREDIT == type) {
                /*
                 * Checking the Record Header (UserEditAtom)
                 */
                // final long lastSlideID = LittleEndian.getInt(pptdata, (int) i + 8);
                // final long version = LittleEndian.getUInt(pptdata, (int) i + 12);
                offset = (int) LittleEndian.getUInt(pptdata, (int) i + 16);
                offsetPD = LittleEndian.getUInt(pptdata, (int) i + 20);

                /*
                 * Call to extract ClientTextBox text in each UserEditAtom
                 */
                containerTextBox = extractTextBoxes(containerTextBox, offset, pptdata, offsetPD);
            } else if (PPTConstants.PPT_ATOM_DRAWINGGROUP == type) {
                // if (LOG.isTraceEnabled()) {
                //   LOG.trace("PPT_DRAWINGGROUP_ATOM ignored: " + type);
                // }
            } else if (PPTConstants.PPT_ATOM_TEXTBYTE == type) {
                // if (LOG.isTraceEnabled()) {
                //   LOG.trace("PPT_TEXTBYTE_ATOM ignored: " + type);
                // }
            } else if (PPTConstants.PPT_ATOM_TEXTCHAR == type) {
                // if (LOG.isTraceEnabled()) {
                //   LOG.trace("PPT_TEXTCHAR_ATOM ignored: " + type);
                // }
            } else {
                // no action
                // if (LOG.isTraceEnabled()) {
                //   LOG.trace("type not handled: " + type);
                // }
            }
        }

        final List/* <PPTSlide> */ slides = extractSlides(offset, pptdata, offsetPD);

        if (slides.size() == 0) {
            if (LOG.isInfoEnabled()) {
                LOG.info("No slides extracted!");
            }

        } else {
            Slide slide = (Slide) slides.get(slides.size() - 1);

            for (Enumeration enumeration = containerTextBox.elements(); enumeration.hasMoreElements();) {
                final TextBox textBox = (TextBox) enumeration.nextElement();
                slide.addContent(textBox.getContent());
            }

            /*
             * Merging TextBox data with Slide Data Printing the text from Slides
             * vector object.
             */
            List scontent;
            for (int i = 0; i < slides.size(); i++) {
                slide = (Slide) slides.get(i);
                scontent = slide.getContent();
                String contentText;

                for (int j = 0; j < scontent.size(); j++) {
                    contentText = scontent.get(j).toString();
                    this.buf.append(contentText);

                    // to avoid concatinated words we add a blank additional
                    if (contentText.length() > 0
                            && !(contentText.endsWith("\r") || contentText.endsWith("\n"))) {
                        this.buf.append(" ");
                    }
                }
            }
        }
    } catch (Throwable ex) {
        // because of not killing complete crawling all Throwables are catched.
        if (LOG.isErrorEnabled()) {
            LOG.error("processPOIFSReaderEvent", ex);
        }
    }
}

From source file:org.apache.slide.extractor.MSPowerPointExtractor.java

License:Apache License

public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    try {/*from  www.  ja  va  2  s. c  om*/
        if (!event.getName().equalsIgnoreCase("PowerPoint Document"))
            return;

        DocumentInputStream input = event.getStream();

        byte[] buffer = new byte[input.available()];
        input.read(buffer, 0, input.available());

        for (int i = 0; i < buffer.length - 20; i++) {
            long type = LittleEndian.getUShort(buffer, i + 2);
            long size = LittleEndian.getUInt(buffer, i + 4);

            if (type == 4008) {
                writer.write(buffer, i + 4 + 1, (int) size + 3);
                i = i + 4 + 1 + (int) size - 1;

            }
        }
    } catch (Exception e) {

    }
}

From source file:org.ddt.listener.dsi.DocumentSummaryInfoListener.java

License:Apache License

public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    log.log(Level.FINEST, "reading {0}{1}", new Object[] { event.getPath(), event.getName() });
    DocumentInputStream is = event.getStream();
    try {// w w w  .j a v a  2  s . co  m
        PropertySet ps = PropertySetFactory.create(is);

        if (!(ps instanceof DocumentSummaryInformation))
            return;

        Property docparts = null;
        Property headings = null;
        for (Property prop : ps.getProperties()) {
            if (prop.getID() == PropertyIDMap.PID_HEADINGPAIR) // == 12
                headings = prop;
            else if (prop.getID() == PropertyIDMap.PID_DOCPARTS)
                docparts = prop;
        }

        if (docparts == null) {
            log.log(Level.FINE, "No DOCPARTS section");
            return;
        }

        if (headings == null)
            return;

        HeadingPairVector hdv = new HeadingPairVector((byte[]) headings.getValue(), 0);

        StringVector docpartsVector = new StringVector((byte[]) docparts.getValue(), 0, docparts.getType());

        HeadingPairProperty linkHeader = hdv.getHeadingPairByName("Links"); //*NOT* null terminated

        if (linkHeader == null) {
            log.log(Level.INFO, "No 'Links' header found.");
            return;
        } else {
            log.log(Level.FINEST, "Found {0} link parts", linkHeader.getPartsCount());
        }

        //need to iterate through all of the ones if there's more than one
        //docpart for the header.
        int part = linkHeader.getOffset();
        for (int i = 0; i < linkHeader.getPartsCount(); i++) {
            String url = docpartsVector.get(part).getValue();
            log.log(Level.FINEST, "adding {0} to list of links.", url);
            url = url.trim();
            Link l = new Link(3);
            l.addUnkownPath(url);
            this.add(l);
            part++;
        }

    } catch (NoPropertySetStreamException ex) {
        log.log(Level.INFO, "Not a PropertySetStream {0}{1}",
                new Object[] { event.getPath(), event.getName() });
    } catch (MarkUnsupportedException ex) {
        log.log(Level.INFO, "Couldn't create PropertySet: {0}", ex.getLocalizedMessage());
    } catch (UnsupportedEncodingException ex) {
        log.log(Level.INFO, null, ex);
    } catch (IOException ex) {
        log.log(Level.INFO, null, ex);
    } catch (HPSFException ex) {
        log.log(Level.WARNING, "Couldn't construct HeadingPair vector.", ex);
    } finally {
        is.close();
    }
}

From source file:org.ddt.listener.ole.OleStreamListener.java

License:Apache License

/**
 * Reads a "\1Ole" stream and stores the links found in it.
 * <p/>//from  w w  w  .j ava  2s  . co  m
 * This method returns (fairly) quietly if the stream fails (it doesn't throw exceptions)
 * There are a number of ways that this can fail, not all of which are bad.
 * For instance, POIFSReaderEvent doesn't contain an OLE stream, this is not
 * a disaster, we just need to return quickly.
 *
 * @param event document to process
 */
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
    log.log(Level.FINEST, "Processing Document: {0}/{1}", new Object[] { event.getPath(), event.getName() });
    DocumentInputStream docInStream = event.getStream();

    if (docInStream.available() < LittleEndian.INT_SIZE)
        return;

    if (docInStream.readInt() != VALID_VERSION) {
        log.log(Level.INFO, "Invalid signature - not an OLE Stream.");
        docInStream.close();
        return;
    }
    try {
        docInStream.skip(LittleEndian.INT_SIZE); //ignore what I think might be LinkUpdateOptions
        //check it's a linked object, not embedded
        if (docInStream.readInt() != 1) {
            log.log(Level.FINER, "Not a link");
            docInStream.close();
            return;
        }

        //check reserved field = 0
        if (docInStream.readInt() != 0x000000) {
            docInStream.close();
            return;
        }

        Moniker m;
        String relPath = null;
        String absPath = null;
        byte[] clsid = new byte[16];
        //source moniker, not really interesting.
        if ((docInStream.readInt()) > 0) {
            docInStream.read(clsid);
            ClassID cid = new ClassID(clsid, 0);
            MonikerFactory.getMoniker(cid, docInStream);
        }
        if ((docInStream.readInt()) > 0) {
            docInStream.read(clsid);
            ClassID cid = new ClassID(clsid, 0);
            m = MonikerFactory.getMoniker(cid, docInStream);
            if (m != null)
                relPath = m.getLink();
        }
        if ((docInStream.readInt()) > 0) {
            docInStream.read(clsid);
            ClassID cid = new ClassID(clsid, 0);
            m = MonikerFactory.getMoniker(cid, docInStream);
            if (m != null)
                absPath = m.getLink();
        }

        Link l = new Link(1);
        l.addRelativePath(cleanURLString(relPath));
        l.addAbsolutePath(absPath);
        this.add(l);
    } catch (IOException ex) {
        log.log(Level.FINE, ex.getLocalizedMessage());
    } catch (BadOleStreamException ex) {
        log.log(Level.INFO, ex.getMessage());
    } finally {
        docInStream.close();
    }
}