Example usage for org.apache.poi.poifs.filesystem DirectoryEntry hasEntry

List of usage examples for org.apache.poi.poifs.filesystem DirectoryEntry hasEntry

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem DirectoryEntry hasEntry.

Prototype


public boolean hasEntry(final String name);

Source Link

Document

Checks if entry with specified name present

Usage

From source file:com.auxilii.msgparser.MsgParser.java

License:Open Source License

/**
 * Creates an {@link Attachment} object based on
 * the given directory entry. The entry may either
 * point to an attached file or to an//ww  w  .  j  a v  a  2s.c om
 * attached .msg file, which will be added
 * as a {@link MsgAttachment} object instead.
 *
 * @param dir The directory entry containing the attachment
 *  document entry and some other document entries
 *  describing the attachment (name, extension, mime type, ...)
 * @param msg The {@link Message} object that this
 *  attachment should be added to.
 * @throws IOException Thrown if the attachment could
 *  not be parsed/read.
 */
protected void parseAttachment(DirectoryEntry dir, Message msg) throws IOException {
    if (dir.hasEntry("__substg1.0_3701000D")) {
        parseEmbeddedMessage(dir, msg);
    } else {
        ParseFileAttachment(dir, msg);
    }
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.AbstractPOIFSExtractor.java

License:Apache License

/**
 * Handle an office document that's embedded at the POIFS level
 */// ww  w. j  ava2s  . c o m
protected void handleEmbeddedOfficeDoc(DirectoryEntry dir, XHTMLContentHandler xhtml)
        throws IOException, SAXException, TikaException {

    // Is it an embedded OLE2 document, or an embedded OOXML document?

    if (dir.hasEntry("Package")) {
        // It's OOXML (has a ZipFile):
        Entry ooxml = dir.getEntry("Package");

        TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml));
        try {
            ZipContainerDetector detector = new ZipContainerDetector();
            MediaType type = detector.detect(stream, new Metadata());
            handleEmbeddedResource(stream, null, dir.getName(), type.toString(), xhtml, true);
            return;
        } finally {
            stream.close();
        }
    }

    // It's regular OLE2:

    // What kind of document is it?
    Metadata metadata = new Metadata();
    metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName());
    POIFSDocumentType type = POIFSDocumentType.detectType(dir);
    TikaInputStream embedded = null;

    try {
        if (type == POIFSDocumentType.OLE10_NATIVE) {
            try {
                // Try to un-wrap the OLE10Native record:
                Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir);
                metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '/' + ole.getLabel());

                byte[] data = ole.getDataBuffer();
                embedded = TikaInputStream.get(data);
            } catch (Ole10NativeException ex) {
                // Not a valid OLE10Native record, skip it
            } catch (Exception e) {
                LOGGER.warn(
                        "Ignoring unexpected exception while parsing possible OLE10_NATIVE embedded document "
                                + dir.getName(),
                        e);
            }
        } else if (type == POIFSDocumentType.COMP_OBJ) {
            try {
                // Grab the contents and process
                DocumentEntry contentsEntry;
                try {
                    contentsEntry = (DocumentEntry) dir.getEntry("CONTENTS");
                } catch (FileNotFoundException ioe) {
                    contentsEntry = (DocumentEntry) dir.getEntry("Contents");
                }
                DocumentInputStream inp = new DocumentInputStream(contentsEntry);
                byte[] contents = new byte[contentsEntry.getSize()];
                inp.readFully(contents);
                embedded = TikaInputStream.get(contents);

                // Try to work out what it is
                MediaType mediaType = getDetector().detect(embedded, new Metadata());
                String extension = type.getExtension();
                try {
                    MimeType mimeType = getMimeTypes().forName(mediaType.toString());
                    extension = mimeType.getExtension();
                } catch (MimeTypeException mte) {
                    // No details on this type are known
                }

                // Record what we can do about it
                metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
                metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + extension);
            } catch (Exception e) {
                throw new TikaException("Invalid embedded resource", e);
            }
        } else {
            metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
            metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '.' + type.getExtension());
        }

        // Should we parse it?
        if (extractor.shouldParseEmbedded(metadata)) {
            if (embedded == null) {
                // Make a TikaInputStream that just
                // passes the root directory of the
                // embedded document, and is otherwise
                // empty (byte[0]):
                embedded = TikaInputStream.get(new byte[0]);
                embedded.setOpenContainer(dir);
            }
            extractor.parseEmbedded(embedded, xhtml, metadata, true);
        }
    } finally {
        if (embedded != null) {
            embedded.close();
        }
    }
}

From source file:org.apache.tika.parser.microsoft.AbstractPOIFSExtractor.java

License:Apache License

/**
 * Handle an office document that's embedded at the POIFS level
 *//*from ww  w  .  j a v  a  2  s  .co m*/
protected void handleEmbeddedOfficeDoc(DirectoryEntry dir, XHTMLContentHandler xhtml)
        throws IOException, SAXException, TikaException {

    // Is it an embedded OLE2 document, or an embedded OOXML document?

    if (dir.hasEntry("Package")) {
        // It's OOXML (has a ZipFile):
        Entry ooxml = dir.getEntry("Package");

        try (TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml))) {
            ZipContainerDetector detector = new ZipContainerDetector();
            MediaType type = detector.detect(stream, new Metadata());
            handleEmbeddedResource(stream, null, dir.getName(), dir.getStorageClsid(), type.toString(), xhtml,
                    true);
            return;
        }
    }

    // It's regular OLE2:

    // What kind of document is it?
    Metadata metadata = new Metadata();
    metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName());
    if (dir.getStorageClsid() != null) {
        metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString());
    }
    POIFSDocumentType type = POIFSDocumentType.detectType(dir);
    TikaInputStream embedded = null;

    try {
        if (type == POIFSDocumentType.OLE10_NATIVE) {
            try {
                // Try to un-wrap the OLE10Native record:
                Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir);
                if (ole.getLabel() != null) {
                    metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '/' + ole.getLabel());
                }
                byte[] data = ole.getDataBuffer();
                embedded = TikaInputStream.get(data);
            } catch (Ole10NativeException ex) {
                // Not a valid OLE10Native record, skip it
            } catch (Exception e) {
                logger.warn(
                        "Ignoring unexpected exception while parsing possible OLE10_NATIVE embedded document "
                                + dir.getName(),
                        e);
            }
        } else if (type == POIFSDocumentType.COMP_OBJ) {
            try {
                // Grab the contents and process
                DocumentEntry contentsEntry;
                try {
                    contentsEntry = (DocumentEntry) dir.getEntry("CONTENTS");
                } catch (FileNotFoundException ioe) {
                    contentsEntry = (DocumentEntry) dir.getEntry("Contents");
                }
                DocumentInputStream inp = new DocumentInputStream(contentsEntry);
                byte[] contents = new byte[contentsEntry.getSize()];
                inp.readFully(contents);
                embedded = TikaInputStream.get(contents);

                // Try to work out what it is
                MediaType mediaType = getDetector().detect(embedded, new Metadata());
                String extension = type.getExtension();
                try {
                    MimeType mimeType = getMimeTypes().forName(mediaType.toString());
                    extension = mimeType.getExtension();
                } catch (MimeTypeException mte) {
                    // No details on this type are known
                }

                // Record what we can do about it
                metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
                metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + extension);
            } catch (Exception e) {
                throw new TikaException("Invalid embedded resource", e);
            }
        } else {
            metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
            metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '.' + type.getExtension());
        }

        // Should we parse it?
        if (extractor.shouldParseEmbedded(metadata)) {
            if (embedded == null) {
                // Make a TikaInputStream that just
                // passes the root directory of the
                // embedded document, and is otherwise
                // empty (byte[0]):
                embedded = TikaInputStream.get(new byte[0]);
                embedded.setOpenContainer(dir);
            }
            extractor.parseEmbedded(embedded, xhtml, metadata, true);
        }
    } finally {
        if (embedded != null) {
            embedded.close();
        }
    }
}