List of usage examples for org.apache.poi.hmef Attachment getExtension
public String getExtension()
From source file:mj.ocraptor.extraction.tika.parser.microsoft.TNEFParser.java
License:Apache License
/** * Extracts properties and text from an MS Document input stream *//*ww w . j ava 2s. c o m*/ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // We work by recursing, so get the appropriate bits EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class); EmbeddedDocumentExtractor embeddedExtractor; if (ex == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } else { embeddedExtractor = ex; } // Ask POI to process the file for us HMEFMessage msg = new HMEFMessage(stream); // Set the message subject if known String subject = msg.getSubject(); if (subject != null && subject.length() > 0) { // TODO: Move to title in Tika 2.0 metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject); } // Recurse into the message body RTF MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); if (attr != null && attr instanceof MAPIRtfAttribute) { MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr; handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler); } // Recurse into each attachment in turn for (Attachment attachment : msg.getAttachments()) { String name = attachment.getLongFilename(); if (name == null || name.length() == 0) { name = attachment.getFilename(); } if (name == null || name.length() == 0) { String ext = attachment.getExtension(); if (ext != null) { name = "unknown" + ext; } } handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler); } }
From source file:org.alfresco.repo.imap.AttachmentsExtractor.java
License:Open Source License
/** * Create an attachment given a mime part * //from www . j a v a2 s . c om * @param messageFile the file containing the message * @param attachmentsFolderRef where to put the attachment * @param part the mime part * @throws MessagingException * @throws IOException */ private void createAttachment(NodeRef messageFile, NodeRef attachmentsFolderRef, Part part) throws MessagingException, IOException { String fileName = part.getFileName(); if (fileName == null || fileName.isEmpty()) { fileName = "unnamed"; } try { fileName = MimeUtility.decodeText(fileName); } catch (UnsupportedEncodingException e) { if (logger.isWarnEnabled()) { logger.warn("Cannot decode file name '" + fileName + "'", e); } } ContentType contentType = new ContentType(part.getContentType()); if (contentType.getBaseType().equalsIgnoreCase("application/ms-tnef")) { // The content is TNEF HMEFMessage hmef = new HMEFMessage(part.getInputStream()); // hmef.getBody(); List<org.apache.poi.hmef.Attachment> attachments = hmef.getAttachments(); for (org.apache.poi.hmef.Attachment attachment : attachments) { String subName = attachment.getLongFilename(); NodeRef attachmentNode = fileFolderService.searchSimple(attachmentsFolderRef, subName); if (attachmentNode == null) { /* * If the node with the given name does not already exist Create the content node to contain the attachment */ FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, subName, ContentModel.TYPE_CONTENT); attachmentNode = createdFile.getNodeRef(); serviceRegistry.getNodeService().createAssociation(messageFile, attachmentNode, ImapModel.ASSOC_IMAP_ATTACHMENT); byte[] bytes = attachment.getContents(); ContentWriter writer = fileFolderService.getWriter(attachmentNode); // TODO ENCODING - attachment.getAttribute(TNEFProperty.); String extension = attachment.getExtension(); String mimetype = mimetypeService.getMimetype(extension); if (mimetype != null) { writer.setMimetype(mimetype); } OutputStream os = writer.getContentOutputStream(); ByteArrayInputStream is = new ByteArrayInputStream(bytes); FileCopyUtils.copy(is, os); } } } else { // not TNEF NodeRef attachmentFile = fileFolderService.searchSimple(attachmentsFolderRef, fileName); // The one possible behaviour /* * if (result.size() > 0) { for (FileInfo fi : result) { fileFolderService.delete(fi.getNodeRef()); } } */ // And another one behaviour which will overwrite the content of the existing file. It is performance preferable. if (attachmentFile == null) { FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, fileName, ContentModel.TYPE_CONTENT); nodeService.createAssociation(messageFile, createdFile.getNodeRef(), ImapModel.ASSOC_IMAP_ATTACHMENT); attachmentFile = createdFile.getNodeRef(); } else { String newFileName = imapService.generateUniqueFilename(attachmentsFolderRef, fileName); FileInfo createdFile = fileFolderService.create(attachmentsFolderRef, newFileName, ContentModel.TYPE_CONTENT); nodeService.createAssociation(messageFile, createdFile.getNodeRef(), ImapModel.ASSOC_IMAP_ATTACHMENT); attachmentFile = createdFile.getNodeRef(); } nodeService.setProperty(attachmentFile, ContentModel.PROP_DESCRIPTION, nodeService.getProperty(messageFile, ContentModel.PROP_NAME)); ContentWriter writer = fileFolderService.getWriter(attachmentFile); writer.setMimetype(contentType.getBaseType()); OutputStream os = writer.getContentOutputStream(); FileCopyUtils.copy(part.getInputStream(), os); } }
From source file:org.apache.tika.parser.microsoft.TNEFParser.java
License:Apache License
/** * Extracts properties and text from an MS Document input stream *///www. j a v a2 s. c o m public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // We work by recursing, so get the appropriate bits EmbeddedDocumentExtractor ex = context.get(EmbeddedDocumentExtractor.class); EmbeddedDocumentExtractor embeddedExtractor; if (ex == null) { embeddedExtractor = new ParsingEmbeddedDocumentExtractor(context); } else { embeddedExtractor = ex; } // Ask POI to process the file for us HMEFMessage msg = new HMEFMessage(stream); // Set the message subject if known String subject = msg.getSubject(); if (subject != null && subject.length() > 0) { // TODO: Move to title in Tika 2.0 metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject); } // Recurse into the message body RTF MAPIAttribute attr = msg.getMessageMAPIAttribute(MAPIProperty.RTF_COMPRESSED); if (attr != null && attr instanceof MAPIRtfAttribute) { MAPIRtfAttribute rtf = (MAPIRtfAttribute) attr; handleEmbedded("message.rtf", "application/rtf", rtf.getData(), embeddedExtractor, handler); } // Recurse into each attachment in turn for (Attachment attachment : msg.getAttachments()) { String name = attachment.getLongFilename(); if (name == null || name.length() == 0) { name = attachment.getFilename(); } if (name == null || name.length() == 0) { String ext = attachment.getExtension(); if (ext != null) { name = "unknown" + ext; } } handleEmbedded(name, null, attachment.getContents(), embeddedExtractor, handler); } }