List of usage examples for org.apache.poi.hmef HMEFMessage getMessageMAPIAttribute
public MAPIAttribute getMessageMAPIAttribute(MAPIProperty id)
From source file:mj.ocraptor.extraction.tika.parser.microsoft.TNEFParser.java
License:Apache License
/** * Extracts properties and text from an MS Document input stream */// w ww. jav a2s . com public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // We work by recursing, so get the appropriate bits EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class); EmbeddedDocumentExtractor embeddedExtractor; if (ex == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } else { embeddedExtractor = ex; } // Ask POI to process the file for us HMEFMessage msg = new HMEFMessage(stream); // Set the message subject if known String subject = msg.getSubject(); if (subject != null && subject.length() > 0) { // TODO: Move to title in Tika 2.0 metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject); } // Recurse into the message body RTF MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); if (attr != null && attr instanceof MAPIRtfAttribute) { MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr; handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler); } // Recurse into each attachment in turn for (Attachment attachment : msg.getAttachments()) { String name = attachment.getLongFilename(); if (name == null || name.length() == 0) { name = attachment.getFilename(); } if (name == null || name.length() == 0) { String ext = attachment.getExtension(); if (ext != null) { name = "unknown" + ext; } } handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler); } }
From source file:org.apache.tika.parser.microsoft.TNEFParser.java
License:Apache License
/** * Extracts properties and text from an MS Document input stream *//*from w w w . j a v a2 s. c o m*/ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // We work by recursing, so get the appropriate bits EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class); EmbeddedDocumentExtractor embeddedExtractor; if (ex == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } else { embeddedExtractor = ex; } // Ask POI to process the file for us HMEFMessage msg = new HMEFMessage(stream); // Set the message subject if known String subject = msg.getSubject(); if (subject != null && subject.length() > 0) { // TODO: Move to title in Tika 2.0 metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject); } // Recurse into the message body RTF MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); if (attr != null && attr instanceof MAPIRtfAttribute) { MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr; handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler); } // Recurse into each attachment in turn for (Attachment attachment : msg.getAttachments()) { String name = attachment.getLongFilename(); if (name == null || name.length() == 0) { name = attachment.getFilename(); } if (name == null || name.length() == 0) { String ext = attachment.getExtension(); if (ext != null) { name = "unknown" + ext; } } handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler); } }