List of usage examples for org.apache.poi.poifs.filesystem DirectoryEntry getStorageClsid
public ClassID getStorageClsid();
From source file:com.pnf.plugin.ole.parser.streams.OleProcessor.java
License:Apache License
public OleProcessor(InputStream stream, String name) throws IOException { try {//from w w w . j a va2 s .c o m fs = new NPOIFSFileSystem(stream); } catch (ArrayIndexOutOfBoundsException | NullPointerException | IllegalArgumentException e) { throw new IOException(); } desc = fs.getShortDescription() + ", with block size: " + fs.getBigBlockSize() + " bytes"; DirectoryEntry rootEntry = fs.getRoot(); root = new ContainerStream(null, rootEntry.getStorageClsid(), name); addAll(root, rootEntry); }
From source file:com.pnf.plugin.ole.parser.streams.OleProcessor.java
License:Apache License
private void addAll(ContainerStream parent, DirectoryEntry root) throws IOException { // Iterate through all entries in the current fs directory for (Entry e : root) { // If it's another directory entry, recurse deeper if (e instanceof DirectoryEntry) { // Recurse and parse files, if any, within the current directory DirectoryEntry d = (DirectoryEntry) e; ClassID id = d.getStorageClsid(); if (Stream.isVbaStorage(d.getName())) { VbaContainerStream vba = new VbaContainerStream(parent, id, d.getName()); addAll(vba, d);// w w w. j ava 2s . com } else { ContainerStream currDir = new ContainerStream(parent, id, d.getName()); addAll(currDir, d); } } else if (e instanceof DocumentEntry) { // Retrieve chained representation of files in image DocumentEntry doc = (DocumentEntry) e; // Create byte array around contents of file. byte[] data = new byte[doc.getSize()]; // Read data from image file into buffer boolean error = false; try (DocumentInputStream stream = new DocumentInputStream(doc)) { try { stream.readFully(data); } catch (IndexOutOfBoundsException i) { error = true; } } catch (IOException e1) { error = true; } ByteBuffer buff = ByteBuffer.wrap(data); new DocumentStream(parent, e.getName(), buff, error); // constructor takes care of notifying parent of a new child } } }
From source file:org.apache.tika.parser.microsoft.AbstractPOIFSExtractor.java
License:Apache License
/** * Handle an office document that's embedded at the POIFS level *///www . j ava 2 s. co m protected void handleEmbeddedOfficeDoc(DirectoryEntry dir, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException { // Is it an embedded OLE2 document, or an embedded OOXML document? if (dir.hasEntry("Package")) { // It's OOXML (has a ZipFile): Entry ooxml = dir.getEntry("Package"); try (TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml))) { ZipContainerDetector detector = new ZipContainerDetector(); MediaType type = detector.detect(stream, new Metadata()); handleEmbeddedResource(stream, null, dir.getName(), dir.getStorageClsid(), type.toString(), xhtml, true); return; } } // It's regular OLE2: // What kind of document is it? Metadata metadata = new Metadata(); metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName()); if (dir.getStorageClsid() != null) { metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString()); } POIFSDocumentType type = POIFSDocumentType.detectType(dir); TikaInputStream embedded = null; try { if (type == POIFSDocumentType.OLE10_NATIVE) { try { // Try to un-wrap the OLE10Native record: Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir); if (ole.getLabel() != null) { metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '/' + ole.getLabel()); } byte[] data = ole.getDataBuffer(); embedded = TikaInputStream.get(data); } catch (Ole10NativeException ex) { // Not a valid OLE10Native record, skip it } catch (Exception e) { logger.warn( "Ignoring unexpected exception while parsing possible OLE10_NATIVE embedded document " + dir.getName(), e); } } else if (type == POIFSDocumentType.COMP_OBJ) { try { // Grab the contents and process DocumentEntry contentsEntry; try { contentsEntry = (DocumentEntry) dir.getEntry("CONTENTS"); } catch (FileNotFoundException ioe) { contentsEntry = (DocumentEntry) dir.getEntry("Contents"); } DocumentInputStream inp = new DocumentInputStream(contentsEntry); byte[] contents = new byte[contentsEntry.getSize()]; inp.readFully(contents); embedded = TikaInputStream.get(contents); // Try to work out what it is MediaType mediaType = getDetector().detect(embedded, new Metadata()); String extension = type.getExtension(); try { MimeType mimeType = getMimeTypes().forName(mediaType.toString()); extension = mimeType.getExtension(); } catch (MimeTypeException mte) { // No details on this type are known } // Record what we can do about it metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString()); metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + extension); } catch (Exception e) { throw new TikaException("Invalid embedded resource", e); } } else { metadata.set(Metadata.CONTENT_TYPE, type.getType().toString()); metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '.' + type.getExtension()); } // Should we parse it? if (extractor.shouldParseEmbedded(metadata)) { if (embedded == null) { // Make a TikaInputStream that just // passes the root directory of the // embedded document, and is otherwise // empty (byte[0]): embedded = TikaInputStream.get(new byte[0]); embedded.setOpenContainer(dir); } extractor.parseEmbedded(embedded, xhtml, metadata, true); } } finally { if (embedded != null) { embedded.close(); } } }
From source file:org.opf_labs.aqua.OfficeAnalyser.java
License:Apache License
public static void dump(DirectoryEntry root) throws IOException { System.out.println(root.getName() + " : storage CLSID " + root.getStorageClsid()); for (Iterator it = root.getEntries(); it.hasNext();) { Entry entry = (Entry) it.next(); if (entry instanceof DocumentNode) { DocumentNode node = (DocumentNode) entry; System.out.println("Node name: " + node.getName()); System.out.println("Node desc: " + node.getShortDescription()); System.out.println("Node size: " + node.getSize()); DocumentInputStream is = new DocumentInputStream(node); try { PropertySet ps = new PropertySet(is); if (ps.getSectionCount() != 0) { for (Property p : ps.getProperties()) { System.out.println("Prop: " + p.getID() + " " + p.getValue()); }//w ww .j a va 2 s. c o m } } catch (NoPropertySetStreamException e) { // TODO Auto-generated catch block //e.printStackTrace(); } catch (MarkUnsupportedException e) { // TODO Auto-generated catch block //e.printStackTrace(); } //byte[] bytes = new byte[node.getSize()]; //is.read(bytes); //is.close(); //FileOutputStream out = new FileOutputStream(new File(parent, node.getName().trim())); //out.write(bytes); //out.close(); //System.out.println("Node: "+new String(bytes).substring(0, 10)); } else if (entry instanceof DirectoryEntry) { DirectoryEntry dir = (DirectoryEntry) entry; dump(dir); } else { System.err.println("Skipping unsupported POIFS entry: " + entry); } } }
From source file:uk.bl.wa.tika.parser.ole2.OLE2Parser.java
License:Open Source License
public static void dump(DirectoryEntry root) throws IOException { System.out.println(root.getName() + " : storage CLSID " + root.getStorageClsid()); for (Iterator it = root.getEntries(); it.hasNext();) { Entry entry = (Entry) it.next(); if (entry instanceof DocumentNode) { DocumentNode node = (DocumentNode) entry; System.out.println("Node name: " + node.getName()); System.out.println("Node desc: " + node.getShortDescription()); System.out.println("Node size: " + node.getSize()); DocumentInputStream is = new DocumentInputStream(node); try { PropertySet ps = new PropertySet(is); if (ps.getSectionCount() != 0) { for (Property p : ps.getProperties()) { System.out.println("Prop: " + p.getID() + " " + p.getValue()); }//from ww w. j ava 2s . c o m } } catch (NoPropertySetStreamException e) { // TODO Auto-generated catch block //e.printStackTrace(); } //byte[] bytes = new byte[node.getSize()]; //is.read(bytes); //is.close(); //FileOutputStream out = new FileOutputStream(new File(parent, node.getName().trim())); //out.write(bytes); //out.close(); //System.out.println("Node: "+new String(bytes).substring(0, 10)); } else if (entry instanceof DirectoryEntry) { DirectoryEntry dir = (DirectoryEntry) entry; dump(dir); } else { System.err.println("Skipping unsupported POIFS entry: " + entry); } } }