Example usage for org.apache.poi.poifs.filesystem DirectoryEntry getStorageClsid

List of usage examples for org.apache.poi.poifs.filesystem DirectoryEntry getStorageClsid

Introduction

In this page you can find the example usage for org.apache.poi.poifs.filesystem DirectoryEntry getStorageClsid.

Prototype

public ClassID getStorageClsid();

Source Link

Document

Gets the storage clsid of the directory entry

Usage

From source file:com.pnf.plugin.ole.parser.streams.OleProcessor.java

License:Apache License

public OleProcessor(InputStream stream, String name) throws IOException {
    try {//from w w w . j a va2  s .c  o  m
        fs = new NPOIFSFileSystem(stream);
    } catch (ArrayIndexOutOfBoundsException | NullPointerException | IllegalArgumentException e) {
        throw new IOException();
    }

    desc = fs.getShortDescription() + ", with block size: " + fs.getBigBlockSize() + " bytes";

    DirectoryEntry rootEntry = fs.getRoot();
    root = new ContainerStream(null, rootEntry.getStorageClsid(), name);

    addAll(root, rootEntry);
}

From source file:com.pnf.plugin.ole.parser.streams.OleProcessor.java

License:Apache License

private void addAll(ContainerStream parent, DirectoryEntry root) throws IOException {
    // Iterate through all entries in the current fs directory
    for (Entry e : root) {
        // If it's another directory entry, recurse deeper
        if (e instanceof DirectoryEntry) {
            // Recurse and parse files, if any, within the current directory
            DirectoryEntry d = (DirectoryEntry) e;
            ClassID id = d.getStorageClsid();

            if (Stream.isVbaStorage(d.getName())) {
                VbaContainerStream vba = new VbaContainerStream(parent, id, d.getName());
                addAll(vba, d);//  w  w w. j  ava  2s .  com
            } else {
                ContainerStream currDir = new ContainerStream(parent, id, d.getName());
                addAll(currDir, d);
            }
        } else if (e instanceof DocumentEntry) {
            // Retrieve chained representation of files in image
            DocumentEntry doc = (DocumentEntry) e;

            // Create byte array around contents of file.
            byte[] data = new byte[doc.getSize()];

            // Read data from image file into buffer
            boolean error = false;
            try (DocumentInputStream stream = new DocumentInputStream(doc)) {
                try {
                    stream.readFully(data);
                } catch (IndexOutOfBoundsException i) {
                    error = true;
                }
            } catch (IOException e1) {
                error = true;
            }

            ByteBuffer buff = ByteBuffer.wrap(data);
            new DocumentStream(parent, e.getName(), buff, error); // constructor takes care of notifying parent of a new child
        }
    }
}

From source file:org.apache.tika.parser.microsoft.AbstractPOIFSExtractor.java

License:Apache License

/**
 * Handle an office document that's embedded at the POIFS level
 *///www .  j ava  2 s.  co m
protected void handleEmbeddedOfficeDoc(DirectoryEntry dir, XHTMLContentHandler xhtml)
        throws IOException, SAXException, TikaException {

    // Is it an embedded OLE2 document, or an embedded OOXML document?

    if (dir.hasEntry("Package")) {
        // It's OOXML (has a ZipFile):
        Entry ooxml = dir.getEntry("Package");

        try (TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml))) {
            ZipContainerDetector detector = new ZipContainerDetector();
            MediaType type = detector.detect(stream, new Metadata());
            handleEmbeddedResource(stream, null, dir.getName(), dir.getStorageClsid(), type.toString(), xhtml,
                    true);
            return;
        }
    }

    // It's regular OLE2:

    // What kind of document is it?
    Metadata metadata = new Metadata();
    metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName());
    if (dir.getStorageClsid() != null) {
        metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString());
    }
    POIFSDocumentType type = POIFSDocumentType.detectType(dir);
    TikaInputStream embedded = null;

    try {
        if (type == POIFSDocumentType.OLE10_NATIVE) {
            try {
                // Try to un-wrap the OLE10Native record:
                Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir);
                if (ole.getLabel() != null) {
                    metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '/' + ole.getLabel());
                }
                byte[] data = ole.getDataBuffer();
                embedded = TikaInputStream.get(data);
            } catch (Ole10NativeException ex) {
                // Not a valid OLE10Native record, skip it
            } catch (Exception e) {
                logger.warn(
                        "Ignoring unexpected exception while parsing possible OLE10_NATIVE embedded document "
                                + dir.getName(),
                        e);
            }
        } else if (type == POIFSDocumentType.COMP_OBJ) {
            try {
                // Grab the contents and process
                DocumentEntry contentsEntry;
                try {
                    contentsEntry = (DocumentEntry) dir.getEntry("CONTENTS");
                } catch (FileNotFoundException ioe) {
                    contentsEntry = (DocumentEntry) dir.getEntry("Contents");
                }
                DocumentInputStream inp = new DocumentInputStream(contentsEntry);
                byte[] contents = new byte[contentsEntry.getSize()];
                inp.readFully(contents);
                embedded = TikaInputStream.get(contents);

                // Try to work out what it is
                MediaType mediaType = getDetector().detect(embedded, new Metadata());
                String extension = type.getExtension();
                try {
                    MimeType mimeType = getMimeTypes().forName(mediaType.toString());
                    extension = mimeType.getExtension();
                } catch (MimeTypeException mte) {
                    // No details on this type are known
                }

                // Record what we can do about it
                metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
                metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + extension);
            } catch (Exception e) {
                throw new TikaException("Invalid embedded resource", e);
            }
        } else {
            metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
            metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '.' + type.getExtension());
        }

        // Should we parse it?
        if (extractor.shouldParseEmbedded(metadata)) {
            if (embedded == null) {
                // Make a TikaInputStream that just
                // passes the root directory of the
                // embedded document, and is otherwise
                // empty (byte[0]):
                embedded = TikaInputStream.get(new byte[0]);
                embedded.setOpenContainer(dir);
            }
            extractor.parseEmbedded(embedded, xhtml, metadata, true);
        }
    } finally {
        if (embedded != null) {
            embedded.close();
        }
    }
}

From source file:org.opf_labs.aqua.OfficeAnalyser.java

License:Apache License

public static void dump(DirectoryEntry root) throws IOException {
    System.out.println(root.getName() + " : storage CLSID " + root.getStorageClsid());
    for (Iterator it = root.getEntries(); it.hasNext();) {
        Entry entry = (Entry) it.next();
        if (entry instanceof DocumentNode) {
            DocumentNode node = (DocumentNode) entry;
            System.out.println("Node name: " + node.getName());
            System.out.println("Node desc: " + node.getShortDescription());
            System.out.println("Node size: " + node.getSize());
            DocumentInputStream is = new DocumentInputStream(node);

            try {
                PropertySet ps = new PropertySet(is);
                if (ps.getSectionCount() != 0) {
                    for (Property p : ps.getProperties()) {
                        System.out.println("Prop: " + p.getID() + " " + p.getValue());
                    }//w  ww .j  a va 2  s.  c  o m
                }
            } catch (NoPropertySetStreamException e) {
                // TODO Auto-generated catch block
                //e.printStackTrace();
            } catch (MarkUnsupportedException e) {
                // TODO Auto-generated catch block
                //e.printStackTrace();
            }
            //byte[] bytes = new byte[node.getSize()];
            //is.read(bytes);
            //is.close();

            //FileOutputStream out = new FileOutputStream(new File(parent, node.getName().trim()));
            //out.write(bytes);
            //out.close();
            //System.out.println("Node: "+new String(bytes).substring(0, 10));
        } else if (entry instanceof DirectoryEntry) {
            DirectoryEntry dir = (DirectoryEntry) entry;
            dump(dir);
        } else {
            System.err.println("Skipping unsupported POIFS entry: " + entry);
        }
    }
}

From source file:uk.bl.wa.tika.parser.ole2.OLE2Parser.java

License:Open Source License

public static void dump(DirectoryEntry root) throws IOException {
    System.out.println(root.getName() + " : storage CLSID " + root.getStorageClsid());
    for (Iterator it = root.getEntries(); it.hasNext();) {
        Entry entry = (Entry) it.next();
        if (entry instanceof DocumentNode) {
            DocumentNode node = (DocumentNode) entry;
            System.out.println("Node name: " + node.getName());
            System.out.println("Node desc: " + node.getShortDescription());
            System.out.println("Node size: " + node.getSize());
            DocumentInputStream is = new DocumentInputStream(node);

            try {
                PropertySet ps = new PropertySet(is);
                if (ps.getSectionCount() != 0) {
                    for (Property p : ps.getProperties()) {
                        System.out.println("Prop: " + p.getID() + " " + p.getValue());
                    }//from ww  w.  j  ava 2s  .  c o  m
                }
            } catch (NoPropertySetStreamException e) {
                // TODO Auto-generated catch block
                //e.printStackTrace();
            }
            //byte[] bytes = new byte[node.getSize()];
            //is.read(bytes);
            //is.close();

            //FileOutputStream out = new FileOutputStream(new File(parent, node.getName().trim()));
            //out.write(bytes);
            //out.close();
            //System.out.println("Node: "+new String(bytes).substring(0, 10));
        } else if (entry instanceof DirectoryEntry) {
            DirectoryEntry dir = (DirectoryEntry) entry;
            dump(dir);
        } else {
            System.err.println("Skipping unsupported POIFS entry: " + entry);
        }
    }
}