Example usage for org.apache.poi.hmef Attachment getContents

List of usage examples for org.apache.poi.hmef Attachment getContents

Introduction

In this page you can find the example usage for org.apache.poi.hmef Attachment getContents.

Prototype

public byte[] getContents() 

Source Link

Document

Returns the contents of the attachment.

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.TNEFParser.java

License:Apache License

/**
 * Extracts properties and text from an MS Document input stream
 *///from  w  w w  .  ja  v a  2 s .c  o  m
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // We work by recursing, so get the appropriate bits 
    EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);
    EmbeddedDocumentExtractor embeddedExtractor;
    if (ex == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    } else {
        embeddedExtractor = ex;
    }

    // Ask POI to process the file for us
    HMEFMessage msg = new HMEFMessage(stream);

    // Set the message subject if known
    String subject = msg.getSubject();
    if (subject != null && subject.length() > 0) {
        // TODO: Move to title in Tika 2.0
        metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject);
    }

    // Recurse into the message body RTF
    MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
    if (attr != null && attr instanceof MAPIRtfAttribute) {
        MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr;
        handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler);
    }

    // Recurse into each attachment in turn
    for (Attachment attachment : msg.getAttachments()) {
        String name = attachment.getLongFilename();
        if (name == null || name.length() == 0) {
            name = attachment.getFilename();
        }
        if (name == null || name.length() == 0) {
            String ext = attachment.getExtension();
            if (ext != null) {
                name = "unknown" + ext;
            }
        }
        handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler);
    }
}

From source file:org.alfresco.repo.imap.AttachmentsExtractor.java

License:Open Source License

/**
 * Create an attachment given a mime part
 * //from ww w . j  av  a 2  s  .  c om
 * @param messageFile the file containing the message
 * @param attachmentsFolderRef where to put the attachment
 * @param part the mime part
 * @throws MessagingException
 * @throws IOException
 */
private void createAttachment(NodeRef messageFile, NodeRef attachmentsFolderRef, Part part)
        throws MessagingException, IOException {
    String fileName = part.getFileName();
    if (fileName == null || fileName.isEmpty()) {
        fileName = "unnamed";
    }
    try {
        fileName = MimeUtility.decodeText(fileName);
    } catch (UnsupportedEncodingException e) {
        if (logger.isWarnEnabled()) {
            logger.warn("Cannot decode file name '" + fileName + "'", e);
        }
    }

    ContentType contentType = new ContentType(part.getContentType());

    if (contentType.getBaseType().equalsIgnoreCase("application/ms-tnef")) {
        // The content is TNEF
        HMEFMessage hmef = new HMEFMessage(part.getInputStream());

        // hmef.getBody();
        List<org.apache.poi.hmef.Attachment> attachments = hmef.getAttachments();
        for (org.apache.poi.hmef.Attachment attachment : attachments) {
            String subName = attachment.getLongFilename();

            NodeRef attachmentNode = fileFolderService.searchSimple(attachmentsFolderRef, subName);
            if (attachmentNode == null) {
                /*
                 * If the node with the given name does not already exist Create the content node to contain the attachment
                 */
                FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, subName,
                        ContentModel.TYPE_CONTENT);

                attachmentNode = createdFile.getNodeRef();

                serviceRegistry.getNodeService().createAssociation(messageFile, attachmentNode,
                        ImapModel.ASSOC_IMAP_ATTACHMENT);

                byte[] bytes = attachment.getContents();
                ContentWriter writer = fileFolderService.getWriter(attachmentNode);

                // TODO ENCODING - attachment.getAttribute(TNEFProperty.);
                String extension = attachment.getExtension();
                String mimetype = mimetypeService.getMimetype(extension);
                if (mimetype != null) {
                    writer.setMimetype(mimetype);
                }

                OutputStream os = writer.getContentOutputStream();
                ByteArrayInputStream is = new ByteArrayInputStream(bytes);
                FileCopyUtils.copy(is, os);
            }
        }
    } else {
        // not TNEF
        NodeRef attachmentFile = fileFolderService.searchSimple(attachmentsFolderRef, fileName);
        // The one possible behaviour
        /*
         * if (result.size() > 0) { for (FileInfo fi : result) { fileFolderService.delete(fi.getNodeRef()); } }
         */
        // And another one behaviour which will overwrite the content of the existing file. It is performance preferable.
        if (attachmentFile == null) {
            FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, fileName,
                    ContentModel.TYPE_CONTENT);
            nodeService.createAssociation(messageFile, createdFile.getNodeRef(),
                    ImapModel.ASSOC_IMAP_ATTACHMENT);
            attachmentFile = createdFile.getNodeRef();
        } else {

            String newFileName = imapService.generateUniqueFilename(attachmentsFolderRef, fileName);

            FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, newFileName,
                    ContentModel.TYPE_CONTENT);
            nodeService.createAssociation(messageFile, createdFile.getNodeRef(),
                    ImapModel.ASSOC_IMAP_ATTACHMENT);
            attachmentFile = createdFile.getNodeRef();

        }

        nodeService.setProperty(attachmentFile, ContentModel.PROP_DESCRIPTION,
                nodeService.getProperty(messageFile, ContentModel.PROP_NAME));

        ContentWriter writer = fileFolderService.getWriter(attachmentFile);
        writer.setMimetype(contentType.getBaseType());
        OutputStream os = writer.getContentOutputStream();
        FileCopyUtils.copy(part.getInputStream(), os);
    }
}

From source file:org.apache.nifi.processors.email.ExtractTNEFAttachments.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;//from  ww  w  .  jav  a 2 s.c o  m
    }
    final List<FlowFile> attachmentsList = new ArrayList<>();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> originalFlowFilesList = new ArrayList<>();

    session.read(originalFlowFile, new InputStreamCallback() {
        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();

                HMEFMessage hmefMessage = null;

                // This will trigger an exception in case content is not a TNEF.
                hmefMessage = new HMEFMessage(in);

                // Add otiginal flowfile (may revert later on in case of errors) //
                originalFlowFilesList.add(originalFlowFile);

                if (hmefMessage != null) {
                    // Attachments isn empty, proceeding.
                    if (!hmefMessage.getAttachments().isEmpty()) {
                        final String originalFlowFileName = originalFlowFile
                                .getAttribute(CoreAttributes.FILENAME.key());
                        try {
                            for (final Attachment attachment : hmefMessage.getAttachments()) {
                                FlowFile split = session.create(originalFlowFile);
                                final Map<String, String> attributes = new HashMap<>();
                                if (StringUtils.isNotBlank(attachment.getLongFilename())) {
                                    attributes.put(CoreAttributes.FILENAME.key(), attachment.getFilename());
                                }

                                String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
                                attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
                                attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);

                                // TODO: Extract Mime Type (HMEF doesn't seem to be able to get this info.

                                split = session.append(split, new OutputStreamCallback() {
                                    @Override
                                    public void process(OutputStream out) throws IOException {
                                        out.write(attachment.getContents());
                                    }
                                });
                                split = session.putAllAttributes(split, attributes);
                                attachmentsList.add(split);
                            }
                        } catch (FlowFileHandlingException e) {
                            // Something went wrong
                            // Removing splits that may have been created
                            session.remove(attachmentsList);
                            // Removing the original flow from its list
                            originalFlowFilesList.remove(originalFlowFile);
                            logger.error(
                                    "Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions",
                                    new Object[] { originalFlowFile, e });
                            invalidFlowFilesList.add(originalFlowFile);
                        }
                    }
                }
            } catch (Exception e) {
                // Another error hit...
                // Removing the original flow from its list
                originalFlowFilesList.remove(originalFlowFile);
                logger.error("Could not parse the flowfile {} as an email, treating as failure",
                        new Object[] { originalFlowFile, e });
                // Message is invalid or triggered an error during parsing
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });

    session.transfer(attachmentsList, REL_ATTACHMENTS);

    // As per above code, originalFlowfile may be routed to invalid or
    // original depending on RFC2822 compliance.
    session.transfer(invalidFlowFilesList, REL_FAILURE);
    session.transfer(originalFlowFilesList, REL_ORIGINAL);

    // check if attachments have been extracted
    if (attachmentsList.size() != 0) {
        if (attachmentsList.size() > 10) {
            // If more than 10, summarise log
            logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
        } else {
            // Otherwise be more verbose and list each individual split
            logger.info("Split {} into {} files: {}",
                    new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
        }
    }
}

From source file:org.apache.tika.parser.microsoft.TNEFParser.java

License:Apache License

/**
 * Extracts properties and text from an MS Document input stream
 *//*w  w  w . j a  v  a 2s .c  om*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // We work by recursing, so get the appropriate bits
    EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);
    EmbeddedDocumentExtractor embeddedExtractor;
    if (ex == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    } else {
        embeddedExtractor = ex;
    }

    // Ask POI to process the file for us
    HMEFMessage msg = new HMEFMessage(stream);

    // Set the message subject if known
    String subject = msg.getSubject();
    if (subject != null && subject.length() > 0) {
        // TODO: Move to title in Tika 2.0
        metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject);
    }

    // Recurse into the message body RTF
    MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
    if (attr != null && attr instanceof MAPIRtfAttribute) {
        MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr;
        handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler);
    }

    // Recurse into each attachment in turn
    for (Attachment attachment : msg.getAttachments()) {
        String name = attachment.getLongFilename();
        if (name == null || name.length() == 0) {
            name = attachment.getFilename();
        }
        if (name == null || name.length() == 0) {
            String ext = attachment.getExtension();
            if (ext != null) {
                name = "unknown" + ext;
            }
        }
        handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler);
    }
}