com.jaeksoft.searchlib.parser.EmlParser.java Source code

Java tutorial

Introduction

Here is the source code for com.jaeksoft.searchlib.parser.EmlParser.java

Source

/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2015 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see <http://www.gnu.org/licenses/>.
 **/

package com.jaeksoft.searchlib.parser;

import java.io.IOException;
import java.util.List;
import java.util.Properties;

import javax.activation.DataSource;
import javax.mail.Address;
import javax.mail.Session;
import javax.mail.internet.MimeMessage;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.mail.util.MimeMessageParser;

import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.streamlimiter.StreamLimiter;

public class EmlParser extends Parser {

    public static final String[] DEFAULT_MIMETYPES = { "message/rfc822" };

    public static final String[] DEFAULT_EXTENSIONS = { "eml" };

    private static ParserFieldEnum[] fl = { ParserFieldEnum.parser_name, ParserFieldEnum.email_display_from,
            ParserFieldEnum.email_display_to, ParserFieldEnum.email_display_cc, ParserFieldEnum.email_display_bcc,
            ParserFieldEnum.email_conversation_topic, ParserFieldEnum.subject, ParserFieldEnum.content,
            ParserFieldEnum.email_sent_date, ParserFieldEnum.email_received_date,
            ParserFieldEnum.email_attachment_name, ParserFieldEnum.email_attachment_type,
            ParserFieldEnum.email_attachment_content, ParserFieldEnum.htmlSource, ParserFieldEnum.lang };

    public EmlParser() {
        super(fl);
    }

    private final static Properties JAVAMAIL_PROPS = new Properties();

    static {
        JAVAMAIL_PROPS.put("mail.host", "localhost");
        JAVAMAIL_PROPS.put("mail.transport.protocol", "smtp");
    }

    @Override
    protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang)
            throws IOException, SearchLibException {
        Session session = Session.getDefaultInstance(JAVAMAIL_PROPS);
        try {

            MimeMessage mimeMessage = new MimeMessage(session, streamLimiter.getNewInputStream());
            MimeMessageParser mimeMessageParser = new MimeMessageParser(mimeMessage).parse();

            ParserResultItem result = getNewParserResultItem();
            String from = mimeMessageParser.getFrom();
            if (from != null)
                result.addField(ParserFieldEnum.email_display_from, from.toString());
            for (Address address : mimeMessageParser.getTo())
                result.addField(ParserFieldEnum.email_display_to, address.toString());
            for (Address address : mimeMessageParser.getCc())
                result.addField(ParserFieldEnum.email_display_cc, address.toString());
            for (Address address : mimeMessageParser.getBcc())
                result.addField(ParserFieldEnum.email_display_bcc, address.toString());
            result.addField(ParserFieldEnum.subject, mimeMessageParser.getSubject());
            result.addField(ParserFieldEnum.htmlSource, mimeMessageParser.getHtmlContent());
            result.addField(ParserFieldEnum.content, mimeMessageParser.getPlainContent());
            result.addField(ParserFieldEnum.email_sent_date, mimeMessage.getSentDate());
            result.addField(ParserFieldEnum.email_received_date, mimeMessage.getReceivedDate());

            for (DataSource dataSource : mimeMessageParser.getAttachmentList()) {
                result.addField(ParserFieldEnum.email_attachment_name, dataSource.getName());
                result.addField(ParserFieldEnum.email_attachment_type, dataSource.getContentType());
                if (parserSelector == null)
                    continue;
                Parser attachParser = parserSelector.parseStream(getSourceDocument(), dataSource.getName(),
                        dataSource.getContentType(), null, dataSource.getInputStream(), null, null, null);
                if (attachParser == null)
                    continue;
                List<ParserResultItem> parserResults = attachParser.getParserResults();
                if (parserResults != null)
                    for (ParserResultItem parserResult : parserResults)
                        result.addField(ParserFieldEnum.email_attachment_content, parserResult);
            }
            if (StringUtils.isEmpty(mimeMessageParser.getHtmlContent()))
                result.langDetection(10000, ParserFieldEnum.content);
            else
                result.langDetection(10000, ParserFieldEnum.htmlSource);
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
}