com.xue777hua.emails.test.Test.java Source code

Java tutorial

Introduction

Here is the source code for com.xue777hua.emails.test.Test.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

package com.xue777hua.emails.test;

import com.xue777hua.util.Log;
import com.xue777hua.util.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringEscapeUtils;

/**
 *
 * @author morgan
 */
public class Test {
    public static void main(String[] args) {
        String text = "<div id=\"frag_1\" class=\"page_fragment auth_frag\" data-first=\"true\" data-fid=\"1\"><div class=\"module_topic_paths\"></div><h1 class=\"svTitle\" id=\"tm005\">Effect of inulin and pectin on rheological and thermal properties of potato starch paste and gel</h1><ul class=\"authorGroup noCollab\"><li><a href=\"#\" class=\"authorName\" id=\"authname_N41d730a0N3ee493d4\" data-t=\"a\" data-fn=\"Teresa\" data-ln=\"Witczak\" data-pos=\"1\" data-tb=\"\">Teresa Witczak</a><a title=\"Affiliation: a\" href=\"#af005\" class=\"intra_ref auth_aff\" id=\"baf005\"><sup>a</sup></a><sup>, </sup><a title=\"Corresponding author contact information\" href=\"#cor1\" id=\"bcor1\" class=\"intra_ref auth_corr\"><img class=\"imgLazyJSB\" alt=\"Corresponding author contact information\" src=\"/sd/grey_pxl.gif\" data-inlimg=\"/entities/REcor.gif\"><noscript><img alt=\"Corresponding author contact information\" src=\"http://origin-cdn.els-cdn.com/sd/entities/REcor.gif\"></noscript></a><sup>, </sup><a href=\"mailto:t.witczak@ur.krakow.pl\" class=\"auth_mail\"><img class=\"imgLazyJSB\" src=\"/sd/grey_pxl.gif\" alt=\"E-mail the corresponding author\" data-inlimg=\"/entities/REemail.gif\"><noscript><img src=\"http://origin-cdn.els-cdn.com/sd/entities/REemail.gif\" alt=\"E-mail the corresponding author\"></noscript></a>, </li><li><a href=\"#\" class=\"authorName\" id=\"authname_N41d730a0N3ee4953c\" data-t=\"a\" data-fn=\"Mariusz\" data-ln=\"Witczak\" data-pos=\"2\" data-tb=\"\">Mariusz Witczak</a><a title=\"Affiliation: a\" href=\"#af005\" class=\"intra_ref auth_aff\" id=\"baf005\"><sup>a</sup></a>, </li><li><a href=\"#\" class=\"authorName\" id=\"authname_N41d730a0N3ee495f0\" data-t=\"a\" data-fn=\"Rafa&#322;\" data-ln=\"Ziobro\" data-pos=\"3\" data-tb=\"\">Rafa Ziobro</a><a title=\"Affiliation: b\" href=\"#af010\" class=\"intra_ref auth_aff\" id=\"baf010\"><sup>b</sup></a></li></ul><!--VALIDHTML--><ul class=\"affiliation\"><li id=\"af005\"><sup>a</sup> <span id=\"\">Department of Engineering and Machinery for Food Industry, University of Agriculture in Krakow, Balicka 122 Str., 30-149 Krakow, Poland</span></li><li id=\"af010\"><sup>b</sup> <span id=\"\">Department of Carbohydrates Technology, University of Agriculture in Krakow, Balicka 122 Str., 30-149 Krakow, Poland</span></li></ul><!--VALIDHTML--><!--VALIDHTML--><dl class=\"articleDates\"><dd>Received 24 May 2013, Revised 1 October 2013, Accepted 1 October 2013, Available online 11 October 2013</dd></dl><!--VALIDHTML--><div class=\"moreInformation\"></div><div id=\"ppvPlaceHolder\" class=\"hidden\"></div><!--VALIDHTML--><div id=\"showMoreButtons\"></div><dl class=\"extLinks\"><dd class=\"doiLink\"></dd><dd class=\"rightsLink\"></dd></dl><div class=\"articleOAlabelForced\"></div><div id=\"refersToAndreferredToBy\"><dl id=\"referredToBy\" class=\"documentThread\"><!--Referred To By--></dl></div><!--FRAGMENTEND--><div class=\"page_fragment_ind auth_frag\" data-id=\"frag_2\"></div></div>";
        String authorList = "";
        String articleTitle = "";
        // ???title
        Pattern articleTitlePattern = Pattern.compile("<h1.+?svTitle.+?>(.+?)</h1>");
        Matcher articleTitleMatcher = articleTitlePattern.matcher(text);
        while (articleTitleMatcher.find()) {
            articleTitle = articleTitleMatcher.group(1);
            articleTitle = StringEscapeUtils.unescapeHtml(articleTitle);
            articleTitle = StringUtils.stripTags(articleTitle);
            System.out.println("" + articleTitle);
        }
        // ???
        Pattern p = Pattern.compile("<ul.+?authorGroup.+?>(.+?)</ul>");
        Matcher m = p.matcher(text);
        while (m.find()) {
            authorList = m.group(1);
        }
        p = Pattern.compile("<li>(.+?)</li>");
        m = p.matcher(authorList);
        while (m.find()) {
            String authorItem = m.group(1);
            if (authorItem.contains("mailto")) {
                Pattern nameEmailPattern = Pattern
                        .compile("data-tb=\"[\\d]{0,}\">(.+?)</a>.*href=\"mailto:(.+?)\" class=\"auth_mail\">");
                Matcher nameEmailMatcher = nameEmailPattern.matcher(authorItem);
                if (nameEmailMatcher.find()) {
                    String name = nameEmailMatcher.group(1);
                    String email = nameEmailMatcher.group(2);
                    name = StringEscapeUtils.unescapeHtml(name);
                    // ???new ArrayList
                    List<String> fieldList = new ArrayList<String>();
                    fieldList.add(name);
                    fieldList.add(email);
                    fieldList.add(articleTitle);
                    int hashKey = (name + email + articleTitle).hashCode();
                    System.out.println("?????TitlehashKey:" + name + "|" + email
                            + "|" + articleTitle + "|" + hashKey + ",  nameEmailsList");

                }
            }
        }

    }

}