Example usage for org.apache.poi.hmef Attachment getFilename

List of usage examples for org.apache.poi.hmef Attachment getFilename

Introduction

In this page you can find the example usage for org.apache.poi.hmef Attachment getFilename.

Prototype

public String getFilename() 

Source Link

Document

Returns the short filename

Usage

From source file:mj.ocraptor.extraction.tika.parser.microsoft.TNEFParser.java

License:Apache License

/**
 * Extracts properties and text from an MS Document input stream
 *//*from  w  w  w .j  a  v  a 2  s. com*/
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // We work by recursing, so get the appropriate bits 
    EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);
    EmbeddedDocumentExtractor embeddedExtractor;
    if (ex == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    } else {
        embeddedExtractor = ex;
    }

    // Ask POI to process the file for us
    HMEFMessage msg = new HMEFMessage(stream);

    // Set the message subject if known
    String subject = msg.getSubject();
    if (subject != null && subject.length() > 0) {
        // TODO: Move to title in Tika 2.0
        metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject);
    }

    // Recurse into the message body RTF
    MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
    if (attr != null && attr instanceof MAPIRtfAttribute) {
        MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr;
        handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler);
    }

    // Recurse into each attachment in turn
    for (Attachment attachment : msg.getAttachments()) {
        String name = attachment.getLongFilename();
        if (name == null || name.length() == 0) {
            name = attachment.getFilename();
        }
        if (name == null || name.length() == 0) {
            String ext = attachment.getExtension();
            if (ext != null) {
                name = "unknown" + ext;
            }
        }
        handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler);
    }
}

From source file:org.apache.nifi.processors.email.ExtractTNEFAttachments.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;/*from w ww  . jav  a2  s  .c o  m*/
    }
    final List<FlowFile> attachmentsList = new ArrayList<>();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> originalFlowFilesList = new ArrayList<>();

    session.read(originalFlowFile, new InputStreamCallback() {
        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();

                HMEFMessage hmefMessage = null;

                // This will trigger an exception in case content is not a TNEF.
                hmefMessage = new HMEFMessage(in);

                // Add otiginal flowfile (may revert later on in case of errors) //
                originalFlowFilesList.add(originalFlowFile);

                if (hmefMessage != null) {
                    // Attachments isn empty, proceeding.
                    if (!hmefMessage.getAttachments().isEmpty()) {
                        final String originalFlowFileName = originalFlowFile
                                .getAttribute(CoreAttributes.FILENAME.key());
                        try {
                            for (final Attachment attachment : hmefMessage.getAttachments()) {
                                FlowFile split = session.create(originalFlowFile);
                                final Map<String, String> attributes = new HashMap<>();
                                if (StringUtils.isNotBlank(attachment.getLongFilename())) {
                                    attributes.put(CoreAttributes.FILENAME.key(), attachment.getFilename());
                                }

                                String parentUuid = originalFlowFile.getAttribute(CoreAttributes.UUID.key());
                                attributes.put(ATTACHMENT_ORIGINAL_UUID, parentUuid);
                                attributes.put(ATTACHMENT_ORIGINAL_FILENAME, originalFlowFileName);

                                // TODO: Extract Mime Type (HMEF doesn't seem to be able to get this info.

                                split = session.append(split, new OutputStreamCallback() {
                                    @Override
                                    public void process(OutputStream out) throws IOException {
                                        out.write(attachment.getContents());
                                    }
                                });
                                split = session.putAllAttributes(split, attributes);
                                attachmentsList.add(split);
                            }
                        } catch (FlowFileHandlingException e) {
                            // Something went wrong
                            // Removing splits that may have been created
                            session.remove(attachmentsList);
                            // Removing the original flow from its list
                            originalFlowFilesList.remove(originalFlowFile);
                            logger.error(
                                    "Flowfile {} triggered error {} while processing message removing generated FlowFiles from sessions",
                                    new Object[] { originalFlowFile, e });
                            invalidFlowFilesList.add(originalFlowFile);
                        }
                    }
                }
            } catch (Exception e) {
                // Another error hit...
                // Removing the original flow from its list
                originalFlowFilesList.remove(originalFlowFile);
                logger.error("Could not parse the flowfile {} as an email, treating as failure",
                        new Object[] { originalFlowFile, e });
                // Message is invalid or triggered an error during parsing
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });

    session.transfer(attachmentsList, REL_ATTACHMENTS);

    // As per above code, originalFlowfile may be routed to invalid or
    // original depending on RFC2822 compliance.
    session.transfer(invalidFlowFilesList, REL_FAILURE);
    session.transfer(originalFlowFilesList, REL_ORIGINAL);

    // check if attachments have been extracted
    if (attachmentsList.size() != 0) {
        if (attachmentsList.size() > 10) {
            // If more than 10, summarise log
            logger.info("Split {} into {} files", new Object[] { originalFlowFile, attachmentsList.size() });
        } else {
            // Otherwise be more verbose and list each individual split
            logger.info("Split {} into {} files: {}",
                    new Object[] { originalFlowFile, attachmentsList.size(), attachmentsList });
        }
    }
}

From source file:org.apache.tika.parser.microsoft.TNEFParser.java

License:Apache License

/**
 * Extracts properties and text from an MS Document input stream
 *///from   w w  w . j  a va 2 s .  c  o m
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    // We work by recursing, so get the appropriate bits
    EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class);
    EmbeddedDocumentExtractor embeddedExtractor;
    if (ex == null) {
        embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context);
    } else {
        embeddedExtractor = ex;
    }

    // Ask POI to process the file for us
    HMEFMessage msg = new HMEFMessage(stream);

    // Set the message subject if known
    String subject = msg.getSubject();
    if (subject != null && subject.length() > 0) {
        // TODO: Move to title in Tika 2.0
        metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject);
    }

    // Recurse into the message body RTF
    MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED);
    if (attr != null && attr instanceof MAPIRtfAttribute) {
        MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr;
        handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler);
    }

    // Recurse into each attachment in turn
    for (Attachment attachment : msg.getAttachments()) {
        String name = attachment.getLongFilename();
        if (name == null || name.length() == 0) {
            name = attachment.getFilename();
        }
        if (name == null || name.length() == 0) {
            String ext = attachment.getExtension();
            if (ext != null) {
                name = "unknown" + ext;
            }
        }
        handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler);
    }
}