Example usage for org.apache.poi.poifs.filesystem Entry isDocumentEntry

List of usage examples for org.apache.poi.poifs.filesystem Entry isDocumentEntry

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem Entry isDocumentEntry.

Prototype


public boolean isDocumentEntry();

Source Link

Document

is this a DocumentEntry?

Usage

From source file:com.argo.hwp.v5.HwpTextExtractorV5.java

License:Open Source License

/**
 * HWP? FileHeader /*from www  .j  av a2  s.c o m*/
 * 
 * @param fs
 * @return
 * @throws IOException
 */
private static FileHeader getHeader(NPOIFSFileSystem fs) throws IOException {
    DirectoryNode root = fs.getRoot();

    // ??? p.18

    // FileHeader  
    Entry headerEntry = root.getEntry("FileHeader");
    if (!headerEntry.isDocumentEntry())
        return null;

    //  ?
    byte[] header = new byte[256]; // FileHeader ? 256
    DocumentInputStream headerStream = new DocumentInputStream((DocumentEntry) headerEntry);
    try {
        int read = headerStream.read(header);
        if (read != 256
                || !Arrays.equals(HWP_V5_SIGNATURE, Arrays.copyOfRange(header, 0, HWP_V5_SIGNATURE.length)))
            return null;
    } finally {
        headerStream.close();
    }

    FileHeader fileHeader = new FileHeader();

    // . debug
    fileHeader.version = HwpVersion.parseVersion(LittleEndian.getUInt(header, 32));
    long flags = LittleEndian.getUInt(header, 36);
    log.debug("Flags={}", Long.toBinaryString(flags).replace(' ', '0'));

    fileHeader.compressed = (flags & 0x01) == 0x01;
    fileHeader.encrypted = (flags & 0x02) == 0x02;
    fileHeader.viewtext = (flags & 0x04) == 0x04;

    return fileHeader;
}

From source file:edu.tsinghua.lumaqq.customface.EIPImporter.java

License:Open Source License

@SuppressWarnings("unchecked")
public EIPImporter(String file, String destDir) {
    this.destDir = destDir;
    buffer = new byte[8192];

    POIFSFileSystem eipSystem;//from   w  ww  .ja v  a  2s. c  o m
    try {
        // eip
        eipStream = new FileInputStream(file);
        eipSystem = new POIFSFileSystem(eipStream);

        // 
        DirectoryEntry configDir = null, fileDir = null;
        DirectoryEntry root = eipSystem.getRoot();
        Iterator<Entry> i = root.getEntries();
        while (i.hasNext()) {
            Entry e = i.next();
            if (e.isDirectoryEntry()) {
                if (CONFIG_DIRECTORY.equals(e.getName().toLowerCase()))
                    configDir = (DirectoryEntry) e;
                else if (FILES_DIRECTORY.equals(e.getName().toLowerCase()))
                    fileDir = (DirectoryEntry) e;
            }
        }

        // ??
        if (configDir == null || fileDir == null)
            throw new IOException("Can't find correct directories");

        // ?face.xml
        i = configDir.getEntries();
        while (i.hasNext()) {
            Entry e = i.next();
            if (e.isDocumentEntry() && CONFIG_FILE.equals(e.getName().toLowerCase())) {
                DocumentInputStream dis = new DocumentInputStream((DocumentEntry) e);
                parser = new FaceXMLParser(dis);
                dis.close();
                break;
            }
        }

        // ??face.xml
        if (parser == null)
            throw new IOException("Can't find " + CONFIG_FILE);

        // iterator
        groupIterator = fileDir.getEntries();
        currentDir = fileDir;
        faceIterator = currentDir.getEntries();
    } catch (IOException e) {
        eipSystem = null;
        try {
            if (eipStream != null) {
                eipStream.close();
                eipStream = null;
            }
        } catch (IOException e1) {
        }
    }
}

From source file:edu.tsinghua.lumaqq.customface.EIPImporter.java

License:Open Source License

/**
 * @return//ww w. j a  v  a  2 s  .  c  om
 *       ?document entry
 */
private DocumentEntry getNextDocument() {
    DocumentEntry de = null;
    while (faceIterator.hasNext()) {
        Entry e = faceIterator.next();
        if (e.isDocumentEntry()) {
            de = (DocumentEntry) e;
            break;
        }
    }
    return de;
}

From source file:mj.ocraptor.extraction.tika.parser.microsoft.POIFSContainerDetector.java

License:Apache License

/**
 * Is this one of the kinds of formats which uses CompObj to
 *  store all of their data, eg Star Draw, Star Impress or
 *  (older) Works?//from  w  w  w  .j a  v  a2 s.co m
 * If not, it's likely an embedded resource
 */
private static MediaType processCompObjFormatType(DirectoryEntry root) {
    try {
        Entry e = root.getEntry("\u0001CompObj");
        if (e != null && e.isDocumentEntry()) {
            DocumentNode dn = (DocumentNode) e;
            DocumentInputStream stream = new DocumentInputStream(dn);
            byte[] bytes = IOUtils.toByteArray(stream);
            /*
             * This array contains a string with a normal ASCII name of the
             * application used to create this file. We want to search for that
             * name.
             */
            if (arrayContains(bytes, STAR_DRAW)) {
                return SDA;
            } else if (arrayContains(bytes, STAR_IMPRESS)) {
                return SDD;
            } else if (arrayContains(bytes, WORKS_QUILL96)) {
                return WPS;
            }
        }
    } catch (Exception e) {
        /*
         * "root.getEntry" can throw FileNotFoundException. The code inside
         * "if" can throw IOExceptions. Theoretically. Practically no
         * exceptions will likely ever appear.
         * 
         * Swallow all of them. If any occur, we just assume that we can't
         * distinguish between Draw and Impress and return something safe:
         * x-tika-msoffice
         */
    }
    return OLE;
}

From source file:org.apache.tika.parser.microsoft.POIFSContainerDetector.java

License:Apache License

/**
 * Is this one of the kinds of formats which uses CompObj to
 * store all of their data, eg Star Draw, Star Impress or
 * (older) Works?/*from  ww  w . j a  v  a2s.c o m*/
 * If not, it's likely an embedded resource
 */
private static MediaType processCompObjFormatType(DirectoryEntry root) {
    try {
        Entry e = root.getEntry("\u0001CompObj");
        if (e != null && e.isDocumentEntry()) {
            DocumentNode dn = (DocumentNode) e;
            DocumentInputStream stream = new DocumentInputStream(dn);
            byte[] bytes = IOUtils.toByteArray(stream);
            /*
             * This array contains a string with a normal ASCII name of the
             * application used to create this file. We want to search for that
             * name.
             */
            if (arrayContains(bytes, STAR_DRAW)) {
                return SDA;
            } else if (arrayContains(bytes, STAR_IMPRESS)) {
                return SDD;
            } else if (arrayContains(bytes, WORKS_QUILL96)) {
                return WPS;
            }
        }
    } catch (Exception e) {
        /*
         * "root.getEntry" can throw FileNotFoundException. The code inside
         * "if" can throw IOExceptions. Theoretically. Practically no
         * exceptions will likely ever appear.
         *
         * Swallow all of them. If any occur, we just assume that we can't
         * distinguish between Draw and Impress and return something safe:
         * x-tika-msoffice
         */
    }
    return OLE;
}

From source file:org.opencrx.application.uses.com.auxilii.msgparser.MsgParser.java

License:Open Source License

/**
 * Recursively parses the complete .msg file with the
 * help of the POI library. The parsed information is
 * put into the {@link Message} object./* w  w w.j  a  v  a  2  s . c  o  m*/
 * 
 * @param dir The current node in the .msg file.
 * @param msg The resulting {@link Message} object.
 * @throws IOException Thrown if the .msg file could not
 *  be parsed.
 * @throws UnsupportedOperationException Thrown if 
 *  the .msg file contains unknown data.
 */
protected void checkDirectoryEntry(DirectoryEntry dir, Message msg)
        throws IOException, UnsupportedOperationException {

    // we iterate through all entries in the current directory
    for (Iterator<?> iter = dir.getEntries(); iter.hasNext();) {
        Entry entry = (Entry) iter.next();

        // check whether the entry is either a directory entry
        // or a document entry

        if (entry.isDirectoryEntry()) {

            DirectoryEntry de = (DirectoryEntry) entry;

            // attachments have a special name and
            // have to be handled separately at this point
            if (de.getName().startsWith("__attach_version1.0")) {
                this.parseAttachment(de, msg);
            } else if (de.getName().startsWith("__recip_version1.0")) {
                // a recipient entry has been found (which is also a directory entry itself)
                this.checkRecipientDirectoryEntry(de, msg);
            } else {
                // a directory entry has been found. this
                // node will be recursively checked
                this.checkDirectoryEntry(de, msg);
            }

        } else if (entry.isDocumentEntry()) {

            // a document entry contains information about
            // the mail (e.g, from, to, subject, ...)
            DocumentEntry de = (DocumentEntry) entry;

            // the data is accessed by getting an input stream
            // for the given document entry
            DocumentInputStream dstream = new DocumentInputStream(de);
            // analyze the document entry
            // (i.e., get class and data type)
            FieldInformation info = this.analyzeDocumentEntry(de);
            // create a Java object from the data provided
            // by the input stream. depending on the field
            // information, either a String or a byte[] will
            // be returned. other datatypes are not yet supported
            Object data = this.getData(dstream, info);

            logger.finest("  Document data: " + ((data == null) ? "null" : data.toString()));
            // the data is written into the Message object
            msg.setProperty(info.getClazz(), data);

        } else {
            // any other type is not supported
        }
    }
}

From source file:org.opencrx.application.uses.com.auxilii.msgparser.MsgParser.java

License:Open Source License

/**
 * Parses a recipient directory entry which holds informations about one of possibly multiple recipients. 
 * The parsed information is put into the {@link Message} object.
 * //  w  w w  .j  a v a  2s .c  om
 * @param dir The current node in the .msg file.
 * @param msg The resulting {@link Message} object.
 * @throws IOException Thrown if the .msg file could not
 *  be parsed.
 * @throws UnsupportedOperationException Thrown if 
 *  the .msg file contains unknown data.
 */
protected void checkRecipientDirectoryEntry(DirectoryEntry dir, Message msg)
        throws IOException, UnsupportedOperationException {

    RecipientEntry recipient = new RecipientEntry();

    // we iterate through all entries in the current directory
    for (Iterator<?> iter = dir.getEntries(); iter.hasNext();) {
        Entry entry = (Entry) iter.next();

        // check whether the entry is either a directory entry
        // or a document entry, while we are just interested in document entries on this level         
        if (entry.isDirectoryEntry()) {
            // not expected within a recipient entry

        } else if (entry.isDocumentEntry()) {

            // a document entry contains information about
            // the mail (e.g, from, to, subject, ...)
            DocumentEntry de = (DocumentEntry) entry;

            // the data is accessed by getting an input stream
            // for the given document entry
            DocumentInputStream dstream = new DocumentInputStream(de);
            // analyze the document entry
            // (i.e., get class and data type)
            FieldInformation info = this.analyzeDocumentEntry(de);
            // create a Java object from the data provided
            // by the input stream. depending on the field
            // information, either a String or a byte[] will
            // be returned. other datatypes are not yet supported
            Object data = this.getData(dstream, info);

            logger.finest("  Document data: " + ((data == null) ? "null" : data.toString()));
            // the data is written into the Message object
            recipient.setProperty(info.getClazz(), data);

        } else {
            // any other type is not supported
        }
    }

    //after all properties are set -> add recipient to msg object
    msg.addRecipient(recipient);
}

From source file:org.opencrx.application.uses.com.auxilii.msgparser.MsgParser.java

License:Open Source License

/**
 * Creates an {@link Attachment} object based on
 * the given directory entry. The entry may either
 * point to an attached file or to an//w w w.j  av  a2s  .c om
 * attached .msg file, which will be added 
 * as a {@link MsgAttachment} object instead.
 * 
 * @param dir The directory entry containing the attachment
 *  document entry and some other document entries
 *  describing the attachment (name, extension, mime type, ...)
 * @param msg The {@link Message} object that this
 *  attachment should be added to.
 * @throws IOException Thrown if the attachment could
 *  not be parsed/read.
 */
protected void parseAttachment(DirectoryEntry dir, Message msg) throws IOException {

    FileAttachment attachment = new FileAttachment();

    // iterate through all document entries
    for (Iterator<?> iter = dir.getEntries(); iter.hasNext();) {
        Entry entry = (Entry) iter.next();
        if (entry.isDocumentEntry()) {

            // the document entry may contain information
            // about the attachment
            DocumentEntry de = (DocumentEntry) entry;

            FieldInformation info = analyzeDocumentEntry(de);
            DocumentInputStream dstream = new DocumentInputStream(de);

            Object data = this.getData(dstream, info);
            String clazz = info.getClazz();

            // we provide the class and data of the document
            // entry to the attachment. the attachment implementation
            // has to know the semantics of the field names
            attachment.setProperty(clazz, data, de);

        } else {

            // a directory within the attachment directory
            // entry  means that a .msg file is attached
            // at this point. we recursively parse
            // this .msg file and add it as a MsgAttachment
            // object to the current Message object.
            Message attachmentMsg = new Message();
            MsgAttachment msgAttachment = new MsgAttachment();
            msgAttachment.setMessage(attachmentMsg);
            msg.addAttachment(msgAttachment);

            this.checkDirectoryEntry((DirectoryEntry) entry, attachmentMsg);
        }
    }

    // only if there was really an attachment, we
    // add this object to the Message object
    if (attachment.getSize() > -1) {
        msg.addAttachment(attachment);
    }

}

From source file:poi.hpsf.examples.CopyCompare.java

License:Apache License

/**
 * <p>Compares two {@link DirectoryEntry} instances of a POI file system.
 * The directories must contain the same streams with the same names and
 * contents.</p>/*from   ww w.ja v  a  2s . c o m*/
 *
 * @param d1 The first directory.
 * @param d2 The second directory.
 * @param msg The method may append human-readable comparison messages to
 * this string buffer.
 * @return <code>true</code> if the directories are equal, else
 * <code>false</code>.
 * @exception MarkUnsupportedException if a POI document stream does not
 * support the mark() operation.
 * @exception NoPropertySetStreamException if the application tries to
 * create a property set from a POI document stream that is not a property
 * set stream.
 * @throws java.io.UnsupportedEncodingException
 * @exception java.io.IOException if any I/O exception occurs.
 */
private static boolean equal(final DirectoryEntry d1, final DirectoryEntry d2, final StringBuffer msg)
        throws NoPropertySetStreamException, MarkUnsupportedException, UnsupportedEncodingException,
        IOException {
    boolean equal = true;
    /* Iterate over d1 and compare each entry with its counterpart in d2. */
    for (final Iterator i = d1.getEntries(); equal && i.hasNext();) {
        final Entry e1 = (Entry) i.next();
        final String n1 = e1.getName();
        Entry e2 = null;
        try {
            e2 = d2.getEntry(n1);
        } catch (FileNotFoundException ex) {
            msg.append("Document \"" + e1 + "\" exists, document \"" + e2 + "\" does not.\n");
            equal = false;
            break;
        }

        if (e1.isDirectoryEntry() && e2.isDirectoryEntry())
            equal = equal((DirectoryEntry) e1, (DirectoryEntry) e2, msg);
        else if (e1.isDocumentEntry() && e2.isDocumentEntry())
            equal = equal((DocumentEntry) e1, (DocumentEntry) e2, msg);
        else {
            msg.append("One of \"" + e1 + "\" and \"" + e2 + "\" is a "
                    + "document while the other one is a directory.\n");
            equal = false;
        }
    }

    /* Iterate over d2 just to make sure that there are no entries in d2
     * that are not in d1. */
    for (final Iterator i = d2.getEntries(); equal && i.hasNext();) {
        final Entry e2 = (Entry) i.next();
        final String n2 = e2.getName();
        Entry e1 = null;
        try {
            e1 = d1.getEntry(n2);
        } catch (FileNotFoundException ex) {
            msg.append("Document \"" + e2 + "\" exitsts, document \"" + e1 + "\" does not.\n");
            equal = false;
            break;
        }
    }
    return equal;
}