List of usage examples for org.apache.poi.hmef Attachment getContents
public byte[] getContents()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.TNEFParser.java
License:Apache License
/** * Extracts properties and text from an MS Document input stream *///from w w w . ja v a 2 s .c o m public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // We work by recursing, so get the appropriate bits EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class); EmbeddedDocumentExtractor embeddedExtractor; if (ex == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } else { embeddedExtractor = ex; } // Ask POI to process the file for us HMEFMessage msg = new HMEFMessage(stream); // Set the message subject if known String subject = msg.getSubject(); if (subject != null && subject.length() > 0) { // TODO: Move to title in Tika 2.0 metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject); } // Recurse into the message body RTF MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); if (attr != null && attr instanceof MAPIRtfAttribute) { MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr; handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler); } // Recurse into each attachment in turn for (Attachment attachment : msg.getAttachments()) { String name = attachment.getLongFilename(); if (name == null || name.length() == 0) { name = attachment.getFilename(); } if (name == null || name.length() == 0) { String ext = attachment.getExtension(); if (ext != null) { name = "unknown" + ext; } } handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler); } }
From source file:org.alfresco.repo.imap.AttachmentsExtractor.java
License:Open Source License
/** * Create an attachment given a mime part * //from ww w . j av a 2 s . c om * @param messageFile the file containing the message * @param attachmentsFolderRef where to put the attachment * @param part the mime part * @throws MessagingException * @throws IOException */ private void createAttachment(NodeRef messageFile, NodeRef attachmentsFolderRef, Part part) throws MessagingException, IOException { String fileName = part.getFileName(); if (fileName == null || fileName.isEmpty()) { fileName = "unnamed"; } try { fileName = MimeUtility.decodeText(fileName); } catch (UnsupportedEncodingException e) { if (logger.isWarnEnabled()) { logger.warn("Cannot decode file name '" + fileName + "'", e); } } ContentType contentType = new ContentType(part.getContentType()); if (contentType.getBaseType().equalsIgnoreCase("application/ms-tnef")) { // The content is TNEF HMEFMessage hmef = new HMEFMessage(part.getInputStream()); // hmef.getBody(); List<org.apache.poi.hmef.Attachment> attachments = hmef.getAttachments(); for (org.apache.poi.hmef.Attachment attachment : attachments) { String subName = attachment.getLongFilename(); NodeRef attachmentNode = fileFolderService.searchSimple(attachmentsFolderRef, subName); if (attachmentNode == null) { /* * If the node with the given name does not already exist Create the content node to contain the attachment */ FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, subName, ContentModel.TYPE_CONTENT); attachmentNode = createdFile.getNodeRef(); serviceRegistry.getNodeService().createAssociation(messageFile, attachmentNode, ImapModel.ASSOC_IMAP_ATTACHMENT); byte[] bytes = attachment.getContents(); ContentWriter writer = fileFolderService.getWriter(attachmentNode); // TODO ENCODING - attachment.getAttribute(TNEFProperty.); String extension = attachment.getExtension(); String mimetype = mimetypeService.getMimetype(extension); if (mimetype != null) { writer.setMimetype(mimetype); } OutputStream os = writer.getContentOutputStream(); ByteArrayInputStream is = new ByteArrayInputStream(bytes); FileCopyUtils.copy(is, os); } } } else { // not TNEF NodeRef attachmentFile = fileFolderService.searchSimple(attachmentsFolderRef, fileName); // The one possible behaviour /* * if (result.size() > 0) { for (FileInfo fi : result) { fileFolderService.delete(fi.getNodeRef()); } } */ // And another one behaviour which will overwrite the content of the existing file. It is performance preferable. if (attachmentFile == null) { FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, fileName, ContentModel.TYPE_CONTENT); nodeService.createAssociation(messageFile, createdFile.getNodeRef(), ImapModel.ASSOC_IMAP_ATTACHMENT); attachmentFile = createdFile.getNodeRef(); } else { String newFileName = imapService.generateUniqueFilename(attachmentsFolderRef, fileName); FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, newFileName, ContentModel.TYPE_CONTENT); nodeService.createAssociation(messageFile, createdFile.getNodeRef(), ImapModel.ASSOC_IMAP_ATTACHMENT); attachmentFile = createdFile.getNodeRef(); } nodeService.setProperty(attachmentFile, ContentModel.PROP_DESCRIPTION, nodeService.getProperty(messageFile, ContentModel.PROP_NAME)); ContentWriter writer = fileFolderService.getWriter(attachmentFile); writer.setMimetype(contentType.getBaseType()); OutputStream os = writer.getContentOutputStream(); FileCopyUtils.copy(part.getInputStream(), os); } }
From source file:org.apache.nifi.processors.email.ExtractTNEFAttachments.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) { final ComponentLog logger = getLogger(); final FlowFile originalFlowFile = session.get(); if (originalFlowFile == null) { return;//from ww w . jav a 2 s.c o m } final List<FlowFile> attachmentsList = new ArrayList<>(); final List<FlowFile> invalidFlowFilesList = new ArrayList<>(); final List<FlowFile> originalFlowFilesList = new ArrayList<>(); session.read(originalFlowFile, new InputStreamCallback() { @Override public void process(final InputStream rawIn) throws IOException { try (final InputStream in = new BufferedInputStream(rawIn)) { Properties props = new Properties(); HMEFMessage hmefMessage = null; // This will trigger an exception in case content is not a TNEF. hmefMessage = new HMEFMessage(in); // Add otiginal flowfile (may revert later on in case of errors) // originalFlowFilesList.add(originalFlowFile); if (hmefMessage != null) { // Attachments isn empty, proceeding. if (!hmefMessage.getAttachments().isEmpty()) { final String originalFlowFileName = originalFlowFile .getAttribute(CoreAttributes.FILENAME.key()); try { for (final Attachment attachment : hmefMessage.getAttachments()) { FlowFile split = session.create(originalFlowFile); final Map<String, String> attributes = new HashMap<>(); if (StringUtils.isNotBlank(attachment.getLongFilename())) { attributes.put(CoreAttributes.FILENAME.key(), attachment.getFilename()); } String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key()); attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid); attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName); // TODO: Extract Mime Type (HMEF doesn't seem to be able to get this info. split = session.append(split, new OutputStreamCallback() { @Override public void process(OutputStream out) throws IOException { out.write(attachment.getContents()); } }); split = session.putAllAttributes(split, attributes); attachmentsList.add(split); } } catch (FlowFileHandlingException e) { // Something went wrong // Removing splits that may have been created session.remove(attachmentsList); // Removing the original flow from its list originalFlowFilesList.remove(originalFlowFile); logger.error( "Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions", new Object[] { originalFlowFile, e }); invalidFlowFilesList.add(originalFlowFile); } } } } catch (Exception e) { // Another error hit... // Removing the original flow from its list originalFlowFilesList.remove(originalFlowFile); logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e }); // Message is invalid or triggered an error during parsing invalidFlowFilesList.add(originalFlowFile); } } }); session.transfer(attachmentsList, REL_ATTACHMENTS); // As per above code, originalFlowfile may be routed to invalid or // original depending on RFC2822 compliance. session.transfer(invalidFlowFilesList, REL_FAILURE); session.transfer(originalFlowFilesList, REL_ORIGINAL); // check if attachments have been extracted if (attachmentsList.size() != 0) { if (attachmentsList.size() > 10) { // If more than 10, summarise log logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() }); } else { // Otherwise be more verbose and list each individual split logger.info("Split {} into {} files: {}", new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList }); } } }
From source file:org.apache.tika.parser.microsoft.TNEFParser.java
License:Apache License
/** * Extracts properties and text from an MS Document input stream *//*w w w . j a v a 2s .c om*/ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // We work by recursing, so get the appropriate bits EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class); EmbeddedDocumentExtractor embeddedExtractor; if (ex == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } else { embeddedExtractor = ex; } // Ask POI to process the file for us HMEFMessage msg = new HMEFMessage(stream); // Set the message subject if known String subject = msg.getSubject(); if (subject != null && subject.length() > 0) { // TODO: Move to title in Tika 2.0 metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject); } // Recurse into the message body RTF MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); if (attr != null && attr instanceof MAPIRtfAttribute) { MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr; handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler); } // Recurse into each attachment in turn for (Attachment attachment : msg.getAttachments()) { String name = attachment.getLongFilename(); if (name == null || name.length() == 0) { name = attachment.getFilename(); } if (name == null || name.length() == 0) { String ext = attachment.getExtension(); if (ext != null) { name = "unknown" + ext; } } handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler); } }