List of usage examples for org.apache.poi.poifs.filesystem Entry isDocumentEntry
public boolean isDocumentEntry();
From source file:com.argo.hwp.v5.HwpTextExtractorV5.java
License:Open Source License
/** * HWP? FileHeader /*from www .j av a2 s.c o m*/ * * @param fs * @return * @throws IOException */ private static FileHeader getHeader(NPOIFSFileSystem fs) throws IOException { DirectoryNode root = fs.getRoot(); // ??? p.18 // FileHeader Entry headerEntry = root.getEntry("FileHeader"); if (!headerEntry.isDocumentEntry()) return null; // ? byte[] header = new byte[256]; // FileHeader ? 256 DocumentInputStream headerStream = new DocumentInputStream((DocumentEntry) headerEntry); try { int read = headerStream.read(header); if (read != 256 || !Arrays.equals(HWP_V5_SIGNATURE, Arrays.copyOfRange(header, 0, HWP_V5_SIGNATURE.length))) return null; } finally { headerStream.close(); } FileHeader fileHeader = new FileHeader(); // . debug fileHeader.version = HwpVersion.parseVersion(LittleEndian.getUInt(header, 32)); long flags = LittleEndian.getUInt(header, 36); log.debug("Flags={}", Long.toBinaryString(flags).replace(' ', '0')); fileHeader.compressed = (flags & 0x01) == 0x01; fileHeader.encrypted = (flags & 0x02) == 0x02; fileHeader.viewtext = (flags & 0x04) == 0x04; return fileHeader; }
From source file:edu.tsinghua.lumaqq.customface.EIPImporter.java
License:Open Source License
@SuppressWarnings("unchecked") public EIPImporter(String file, String destDir) { this.destDir = destDir; buffer = new byte[8192]; POIFSFileSystem eipSystem;//from w ww .ja v a 2s. c o m try { // eip eipStream = new FileInputStream(file); eipSystem = new POIFSFileSystem(eipStream); // DirectoryEntry configDir = null, fileDir = null; DirectoryEntry root = eipSystem.getRoot(); Iterator<Entry> i = root.getEntries(); while (i.hasNext()) { Entry e = i.next(); if (e.isDirectoryEntry()) { if (CONFIG_DIRECTORY.equals(e.getName().toLowerCase())) configDir = (DirectoryEntry) e; else if (FILES_DIRECTORY.equals(e.getName().toLowerCase())) fileDir = (DirectoryEntry) e; } } // ?? if (configDir == null || fileDir == null) throw new IOException("Can't find correct directories"); // ?face.xml i = configDir.getEntries(); while (i.hasNext()) { Entry e = i.next(); if (e.isDocumentEntry() && CONFIG_FILE.equals(e.getName().toLowerCase())) { DocumentInputStream dis = new DocumentInputStream((DocumentEntry) e); parser = new FaceXMLParser(dis); dis.close(); break; } } // ??face.xml if (parser == null) throw new IOException("Can't find " + CONFIG_FILE); // iterator groupIterator = fileDir.getEntries(); currentDir = fileDir; faceIterator = currentDir.getEntries(); } catch (IOException e) { eipSystem = null; try { if (eipStream != null) { eipStream.close(); eipStream = null; } } catch (IOException e1) { } } }
From source file:edu.tsinghua.lumaqq.customface.EIPImporter.java
License:Open Source License
/** * @return//ww w. j a v a 2 s . c om * ?document entry */ private DocumentEntry getNextDocument() { DocumentEntry de = null; while (faceIterator.hasNext()) { Entry e = faceIterator.next(); if (e.isDocumentEntry()) { de = (DocumentEntry) e; break; } } return de; }
From source file:mj.ocraptor.extraction.tika.parser.microsoft.POIFSContainerDetector.java
License:Apache License
/** * Is this one of the kinds of formats which uses CompObj to * store all of their data, eg Star Draw, Star Impress or * (older) Works?//from w w w .j a v a2 s.co m * If not, it's likely an embedded resource */ private static MediaType processCompObjFormatType(DirectoryEntry root) { try { Entry e = root.getEntry("\u0001CompObj"); if (e != null && e.isDocumentEntry()) { DocumentNode dn = (DocumentNode) e; DocumentInputStream stream = new DocumentInputStream(dn); byte[] bytes = IOUtils.toByteArray(stream); /* * This array contains a string with a normal ASCII name of the * application used to create this file. We want to search for that * name. */ if (arrayContains(bytes, STAR_DRAW)) { return SDA; } else if (arrayContains(bytes, STAR_IMPRESS)) { return SDD; } else if (arrayContains(bytes, WORKS_QUILL96)) { return WPS; } } } catch (Exception e) { /* * "root.getEntry" can throw FileNotFoundException. The code inside * "if" can throw IOExceptions. Theoretically. Practically no * exceptions will likely ever appear. * * Swallow all of them. If any occur, we just assume that we can't * distinguish between Draw and Impress and return something safe: * x-tika-msoffice */ } return OLE; }
From source file:org.apache.tika.parser.microsoft.POIFSContainerDetector.java
License:Apache License
/** * Is this one of the kinds of formats which uses CompObj to * store all of their data, eg Star Draw, Star Impress or * (older) Works?/*from ww w . j a v a2s.c o m*/ * If not, it's likely an embedded resource */ private static MediaType processCompObjFormatType(DirectoryEntry root) { try { Entry e = root.getEntry("\u0001CompObj"); if (e != null && e.isDocumentEntry()) { DocumentNode dn = (DocumentNode) e; DocumentInputStream stream = new DocumentInputStream(dn); byte[] bytes = IOUtils.toByteArray(stream); /* * This array contains a string with a normal ASCII name of the * application used to create this file. We want to search for that * name. */ if (arrayContains(bytes, STAR_DRAW)) { return SDA; } else if (arrayContains(bytes, STAR_IMPRESS)) { return SDD; } else if (arrayContains(bytes, WORKS_QUILL96)) { return WPS; } } } catch (Exception e) { /* * "root.getEntry" can throw FileNotFoundException. The code inside * "if" can throw IOExceptions. Theoretically. Practically no * exceptions will likely ever appear. * * Swallow all of them. If any occur, we just assume that we can't * distinguish between Draw and Impress and return something safe: * x-tika-msoffice */ } return OLE; }
From source file:org.opencrx.application.uses.com.auxilii.msgparser.MsgParser.java
License:Open Source License
/** * Recursively parses the complete .msg file with the * help of the POI library. The parsed information is * put into the {@link Message} object./* w w w.j a v a 2 s . c o m*/ * * @param dir The current node in the .msg file. * @param msg The resulting {@link Message} object. * @throws IOException Thrown if the .msg file could not * be parsed. * @throws UnsupportedOperationException Thrown if * the .msg file contains unknown data. */ protected void checkDirectoryEntry(DirectoryEntry dir, Message msg) throws IOException, UnsupportedOperationException { // we iterate through all entries in the current directory for (Iterator<?> iter = dir.getEntries(); iter.hasNext();) { Entry entry = (Entry) iter.next(); // check whether the entry is either a directory entry // or a document entry if (entry.isDirectoryEntry()) { DirectoryEntry de = (DirectoryEntry) entry; // attachments have a special name and // have to be handled separately at this point if (de.getName().startsWith("__attach_version1.0")) { this.parseAttachment(de, msg); } else if (de.getName().startsWith("__recip_version1.0")) { // a recipient entry has been found (which is also a directory entry itself) this.checkRecipientDirectoryEntry(de, msg); } else { // a directory entry has been found. this // node will be recursively checked this.checkDirectoryEntry(de, msg); } } else if (entry.isDocumentEntry()) { // a document entry contains information about // the mail (e.g, from, to, subject, ...) DocumentEntry de = (DocumentEntry) entry; // the data is accessed by getting an input stream // for the given document entry DocumentInputStream dstream = new DocumentInputStream(de); // analyze the document entry // (i.e., get class and data type) FieldInformation info = this.analyzeDocumentEntry(de); // create a Java object from the data provided // by the input stream. depending on the field // information, either a String or a byte[] will // be returned. other datatypes are not yet supported Object data = this.getData(dstream, info); logger.finest(" Document data: " + ((data == null) ? "null" : data.toString())); // the data is written into the Message object msg.setProperty(info.getClazz(), data); } else { // any other type is not supported } } }
From source file:org.opencrx.application.uses.com.auxilii.msgparser.MsgParser.java
License:Open Source License
/** * Parses a recipient directory entry which holds informations about one of possibly multiple recipients. * The parsed information is put into the {@link Message} object. * // w w w .j a v a 2s .c om * @param dir The current node in the .msg file. * @param msg The resulting {@link Message} object. * @throws IOException Thrown if the .msg file could not * be parsed. * @throws UnsupportedOperationException Thrown if * the .msg file contains unknown data. */ protected void checkRecipientDirectoryEntry(DirectoryEntry dir, Message msg) throws IOException, UnsupportedOperationException { RecipientEntry recipient = new RecipientEntry(); // we iterate through all entries in the current directory for (Iterator<?> iter = dir.getEntries(); iter.hasNext();) { Entry entry = (Entry) iter.next(); // check whether the entry is either a directory entry // or a document entry, while we are just interested in document entries on this level if (entry.isDirectoryEntry()) { // not expected within a recipient entry } else if (entry.isDocumentEntry()) { // a document entry contains information about // the mail (e.g, from, to, subject, ...) DocumentEntry de = (DocumentEntry) entry; // the data is accessed by getting an input stream // for the given document entry DocumentInputStream dstream = new DocumentInputStream(de); // analyze the document entry // (i.e., get class and data type) FieldInformation info = this.analyzeDocumentEntry(de); // create a Java object from the data provided // by the input stream. depending on the field // information, either a String or a byte[] will // be returned. other datatypes are not yet supported Object data = this.getData(dstream, info); logger.finest(" Document data: " + ((data == null) ? "null" : data.toString())); // the data is written into the Message object recipient.setProperty(info.getClazz(), data); } else { // any other type is not supported } } //after all properties are set -> add recipient to msg object msg.addRecipient(recipient); }
From source file:org.opencrx.application.uses.com.auxilii.msgparser.MsgParser.java
License:Open Source License
/** * Creates an {@link Attachment} object based on * the given directory entry. The entry may either * point to an attached file or to an//w w w.j av a2s .c om * attached .msg file, which will be added * as a {@link MsgAttachment} object instead. * * @param dir The directory entry containing the attachment * document entry and some other document entries * describing the attachment (name, extension, mime type, ...) * @param msg The {@link Message} object that this * attachment should be added to. * @throws IOException Thrown if the attachment could * not be parsed/read. */ protected void parseAttachment(DirectoryEntry dir, Message msg) throws IOException { FileAttachment attachment = new FileAttachment(); // iterate through all document entries for (Iterator<?> iter = dir.getEntries(); iter.hasNext();) { Entry entry = (Entry) iter.next(); if (entry.isDocumentEntry()) { // the document entry may contain information // about the attachment DocumentEntry de = (DocumentEntry) entry; FieldInformation info = analyzeDocumentEntry(de); DocumentInputStream dstream = new DocumentInputStream(de); Object data = this.getData(dstream, info); String clazz = info.getClazz(); // we provide the class and data of the document // entry to the attachment. the attachment implementation // has to know the semantics of the field names attachment.setProperty(clazz, data, de); } else { // a directory within the attachment directory // entry means that a .msg file is attached // at this point. we recursively parse // this .msg file and add it as a MsgAttachment // object to the current Message object. Message attachmentMsg = new Message(); MsgAttachment msgAttachment = new MsgAttachment(); msgAttachment.setMessage(attachmentMsg); msg.addAttachment(msgAttachment); this.checkDirectoryEntry((DirectoryEntry) entry, attachmentMsg); } } // only if there was really an attachment, we // add this object to the Message object if (attachment.getSize() > -1) { msg.addAttachment(attachment); } }
From source file:poi.hpsf.examples.CopyCompare.java
License:Apache License
/** * <p>Compares two {@link DirectoryEntry} instances of a POI file system. * The directories must contain the same streams with the same names and * contents.</p>/*from ww w.ja v a 2s . c o m*/ * * @param d1 The first directory. * @param d2 The second directory. * @param msg The method may append human-readable comparison messages to * this string buffer. * @return <code>true</code> if the directories are equal, else * <code>false</code>. * @exception MarkUnsupportedException if a POI document stream does not * support the mark() operation. * @exception NoPropertySetStreamException if the application tries to * create a property set from a POI document stream that is not a property * set stream. * @throws java.io.UnsupportedEncodingException * @exception java.io.IOException if any I/O exception occurs. */ private static boolean equal(final DirectoryEntry d1, final DirectoryEntry d2, final StringBuffer msg) throws NoPropertySetStreamException, MarkUnsupportedException, UnsupportedEncodingException, IOException { boolean equal = true; /* Iterate over d1 and compare each entry with its counterpart in d2. */ for (final Iterator i = d1.getEntries(); equal && i.hasNext();) { final Entry e1 = (Entry) i.next(); final String n1 = e1.getName(); Entry e2 = null; try { e2 = d2.getEntry(n1); } catch (FileNotFoundException ex) { msg.append("Document \"" + e1 + "\" exists, document \"" + e2 + "\" does not.\n"); equal = false; break; } if (e1.isDirectoryEntry() && e2.isDirectoryEntry()) equal = equal((DirectoryEntry) e1, (DirectoryEntry) e2, msg); else if (e1.isDocumentEntry() && e2.isDocumentEntry()) equal = equal((DocumentEntry) e1, (DocumentEntry) e2, msg); else { msg.append("One of \"" + e1 + "\" and \"" + e2 + "\" is a " + "document while the other one is a directory.\n"); equal = false; } } /* Iterate over d2 just to make sure that there are no entries in d2 * that are not in d1. */ for (final Iterator i = d2.getEntries(); equal && i.hasNext();) { final Entry e2 = (Entry) i.next(); final String n2 = e2.getName(); Entry e1 = null; try { e1 = d1.getEntry(n2); } catch (FileNotFoundException ex) { msg.append("Document \"" + e2 + "\" exitsts, document \"" + e1 + "\" does not.\n"); equal = false; break; } } return equal; }