org.openurp.thesis.service.impl.CnkiThesisCheckServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for org.openurp.thesis.service.impl.CnkiThesisCheckServiceImpl.java

Source

/*
 * OpenURP,Open Source University Resource Plan Solution
 *
 * Copyright (c) 2013-2013, OpenURP Software.
 *
 * OpenURP is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * OpenURP is distributed in the hope that it will be useful.
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with OpenURP.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.openurp.thesis.service.impl;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.sql.Date;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.UnhandledException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.cookie.Cookie;
import org.apache.http.entity.mime.MultipartEntity;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.entity.mime.content.StringBody;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.beangle.commons.collection.CollectUtils;
import org.beangle.commons.comparators.PropertyComparator;
import org.openurp.thesis.service.CheckResult;
import org.openurp.thesis.service.ReportStyle;
import org.openurp.thesis.service.ThesisCheckService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * ?
 * 
 * @see http://pmlc.cnki.net/school/Login.aspx
 * @author chaostone
 */
public class CnkiThesisCheckServiceImpl implements ThesisCheckService {
    DefaultHttpClient httpclient = new DefaultHttpClient();

    private static Logger logger = LoggerFactory.getLogger(CnkiThesisCheckServiceImpl.class);

    String context = "http://pmlc.cnki.net/school";
    /** ? */
    String loginUrl = context + "/Login.aspx";

    /** ??? */
    String loginCaptchaUrl = context + "/Users/LoginCheckCode.aspx";

    /** ? */
    String uploadUrl = context + "/upload/receivefiles.aspx";
    /** ?? */
    String searchUrl = context + "/SimResult.aspx";

    /** ? */
    String reportUrl = context + "/Report.aspx";

    String logoutUrl;

    /** ?? */
    Pattern checkPattern = Pattern.compile(
            "<tr([\\s\\S]*?)SR_FileNameS([\\s\\S]*?)</td>([\\s\\S]*?)<td([\\s\\S]*?)>([\\s\\S]*?)</td>([\\s\\S]*?)<td([\\s\\S]*?)>([\\s\\S]*?)</td>([\\s\\S]*?)<td([\\s\\S]*?)>([\\s\\S]*?)</td>([\\s\\S]*?)<td([\\s\\S]*?)>([\\s\\S]*?)</td>([\\s\\S]*?)<td([\\s\\S]*?)>([\\s\\S]*?)</td>([\\s\\S]*?)</tr>");

    /** ID */
    String foldId = null;
    /** ??sessionId */
    String sessionId = null;

    /** ?? */
    Map<String, String> infoParams = CollectUtils.newHashMap();

    /**
     * ???foldIdsessionId
     */
    public boolean login(String username, String password, String captcha) {
        foldId = null;
        sessionId = null;
        Map<String, String> params = new HashMap<String, String>();

        params.put("UserName", username);
        params.put("UserPwd", password);
        if (null != captcha)
            params.put("TextBox_Check", captcha);

        params.put("__EVENTVALIDATION", "/wEWBQKf9pzyCQKvruq2CAKEzp2FBwKf06GLAQLSwpnTCOrkpBeWGp1IoUnLJnsl8k/Cet1H");
        params.put("__VIEWSTATE",
                "/wEPDwUKLTY1NDc5ODY2NWQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFDEltYWdlQnV0dG9uMbIaUF+dr62e/YiWT95H6zzf5pIB");
        params.put("ImageButton1.x", "0");
        params.put("ImageButton1.y", "0");

        HttpPost httpost = new HttpPost(loginUrl);
        boolean success = false;
        try {
            httpost.setEntity(new UrlEncodedFormEntity(convertToValuePairs(params), "UTF-8"));
            HttpResponse response = httpclient.execute(httpost);
            HttpEntity entity = response.getEntity();
            logger.debug(httpost.getRequestLine() + " " + response.getStatusLine().getStatusCode());
            EntityUtils.consume(entity);

            success = (302 == response.getStatusLine().getStatusCode());
            if (success) {
                for (Cookie ck : httpclient.getCookieStore().getCookies()) {
                    if (ck.getName().equals("ASP.NET_SessionId")) {
                        sessionId = ck.getValue();
                        break;
                    }
                }
                response = httpclient.execute(new HttpGet(searchUrl));
                if (HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
                    buildInfoParams(EntityUtils.toString(response.getEntity()));
                } else if (HttpStatus.SC_MOVED_TEMPORARILY == response.getStatusLine().getStatusCode()) {
                    String location = response.getFirstHeader("Location").getValue();
                    buildInfoParams(access(location));
                }
                EntityUtils.consume(response.getEntity());
                success = (StringUtils.isNotEmpty(foldId) && StringUtils.isNotEmpty(sessionId));
            }
        } catch (Exception e) {
            throw new UnhandledException(e);
        }
        return success;
    }

    public void logout() {
        if (null != logoutUrl)
            accessTo(logoutUrl);
        // When HttpClient instance is no longer needed,
        // shut down the connection manager to ensure
        // immediate deallocation of all system resources
        httpclient.getConnectionManager().shutdown();
    }

    public File getCaptcha() {
        try {
            return download(loginCaptchaUrl, "captcha", ".gif");
        } catch (Exception e) {
            return null;
        }
    }

    public CheckResult check(String author, String article, File file) throws Exception {
        CheckResult result = get(author, article);
        if (null != result)
            return result;
        if (null == file)
            return null;
        upload(author, article, file);
        Thread.sleep(1000);
        return get(author, article);
    }

    public File report(long id, ReportStyle style) {
        String param = "p";
        if (style.equals(ReportStyle.Detail))
            param = "a";
        String content = access(this.reportUrl + "?LeftFile=" + id + "&t=" + param);
        content = content.replaceAll("<script([\\s\\S]*?)</script>", "");
        content = content.replaceAll("<input([\\s\\S]*?)>", "");
        content = content.replaceAll("href=\"(.*?).css", "href=\"" + context + "/$1.css");
        content = content.replaceAll("images(.*?)gif", context + "/images$1gif");
        File tmp = null;
        try {
            tmp = File.createTempFile("report", ".html");
            FileUtils.writeStringToFile(tmp, content, "UTF-8");
        } catch (IOException e) {
            throw new UnhandledException(e);
        }
        return tmp;
    }

    public List<CheckResult> search(String author, String article) {
        Map<String, String> params = new HashMap<String, String>();
        params.put("DDL1", "");
        params.put("TB1", author);
        params.put("DDLJCZT", "0");
        params.put("ImageButton5.x", "-762");
        params.put("ImageButton5.y", "-178");
        params.putAll(infoParams);
        HttpPost httpost = new HttpPost(searchUrl);
        // params.put("__ASYNCPOST","true");
        // httpost.addHeader("X-MicrosoftAjax", "Delta=true");
        String text = null;
        try {
            httpost.setEntity(new UrlEncodedFormEntity(convertToValuePairs(params), "UTF-8"));
            HttpResponse response = httpclient.execute(httpost);
            HttpEntity entity = response.getEntity();
            text = EntityUtils.toString(entity);
            EntityUtils.consume(entity);
        } catch (Exception e) {
            return Collections.emptyList();
        }
        text = StringUtils.substringBetween(text, "GridView2", "</table>");
        if (StringUtils.isEmpty(text))
            return Collections.emptyList();
        List<CheckResult> results = extract(text);
        if (null == article)
            return results;

        List<CheckResult> rs = new ArrayList<CheckResult>();
        for (CheckResult cr : results) {
            if (null != cr.getArticle() && cr.getArticle().contains(article))
                rs.add(cr);
        }
        return rs;
    }

    /**
     * ??
     * 
     * @param author
     *          notnull
     * @param article
     *          notnull
     * @return
     * @throws Exception
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public CheckResult get(String author, String article) {
        List<CheckResult> results = search(author, article);
        if (!results.isEmpty()) {
            if (results.size() > 1)
                Collections.sort(results, new PropertyComparator("checkAt desc"));
            return results.get(0);
        } else
            return null;
    }

    /**
     * 
     * 
     * @param author
     * @param article
     * @param file
     * @return
     * @throws Exception
     * @see http://pmlc.cnki.net/school/SwfUpload/handlers.js#uploadSuccess
     */
    public boolean upload(String author, String article, File file) {
        Charset utf8 = Charset.forName("UTF-8");
        MultipartEntity reqEntity = new MultipartEntity();
        String content = null;
        try {
            reqEntity.addPart("JJ", new StringBody("", utf8));
            reqEntity.addPart("DW", new StringBody("", utf8));
            reqEntity.addPart("FL", new StringBody("", utf8));
            reqEntity.addPart("PM", new StringBody(article, utf8));
            reqEntity.addPart("ZZ", new StringBody(author, utf8));
            reqEntity.addPart("FD", new StringBody(foldId, utf8));
            reqEntity.addPart("ASPSESSID", new StringBody(sessionId, utf8));
            reqEntity.addPart("Filedata", new FileBody(file));
            HttpPost httpost = new HttpPost(uploadUrl);
            httpost.setEntity(reqEntity);

            HttpResponse response = httpclient.execute(httpost);
            HttpEntity entity = response.getEntity();
            content = EntityUtils.toString(entity);
            EntityUtils.consume(entity);
        } catch (Exception e) {
            throw new UnhandledException(e);
        }
        logger.debug("upload " + file.getName() + " response is " + content);
        /* ?200??handlers.jsuploadSuccess */
        return StringUtils.trim(content).equals("200");
    }

    protected String access(String url) {
        HttpGet innerget = new HttpGet(url);
        try {
            HttpResponse response = httpclient.execute(innerget);
            HttpEntity entity = response.getEntity();
            String content = EntityUtils.toString(entity);
            EntityUtils.consume(entity);
            return content;
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    protected File download(String url, String prefix, String suffix) throws Exception {
        HttpGet innerget = new HttpGet(url);
        HttpResponse response = httpclient.execute(innerget);
        if (HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
            File tmp = File.createTempFile(prefix, suffix);
            FileOutputStream output = new FileOutputStream(tmp);
            IOUtils.copy(response.getEntity().getContent(), output);
            output.flush();
            IOUtils.closeQuietly(output);
            return tmp;
        } else
            return null;
    }

    /**
     * ?
     * 
     * @param text
     * @return
     */
    protected List<CheckResult> extract(String text) {
        Matcher m = checkPattern.matcher(text);
        List<CheckResult> results = new ArrayList<CheckResult>();
        // [2]id:value="id"
        // [5]article:<a>article</a>
        // [8]author:<a>author</a>
        // [11]content:<div title="?">0%</div><div title="??">0</div>
        // [14]date
        // [17]downloadurl:<a target="_blank"
        // href=" http://checkdownload.cnki.net/thesisdownload/?downType=0&user=yourname&userServerID=1&fileID=fileId&check=5f6cad62f7fa352955321bc1b4989912"
        // >
        while (m.find()) {
            String content = m.group(11);
            long id = Long.valueOf(StringUtils.substringBetween(m.group(2), "value=\"", "\""));
            String thesis = StringUtils.substringBetween(m.group(5), ">", "</a>");
            String author = StringUtils.substringBetween(m.group(8), ">", "</a>");
            if (content.contains("?")) {
                String ratioStr = StringUtils.substringBetween(content, "?\">", "%</div>");
                float ratio = Float.valueOf(ratioStr) / 100;
                int count = Integer.valueOf(StringUtils.substringBetween(content, "??\">", "</div>"));
                Date checkOn = Date.valueOf(m.group(14).trim());
                String checksum = StringUtils.substringBetween(m.group(17), "check=", "\"");
                results.add(new CheckResult(id, thesis, author, checksum, checkOn, ratio, count));
            } else {
                Date checkOn = Date.valueOf(m.group(14).trim());
                String checksum = StringUtils.substringBetween(m.group(17), "check=", "\"");
                results.add(new CheckResult(id, thesis, author, checksum, checkOn));
            }

        }
        return results;
    }

    /**
     * ???
     * 
     * @param content
     */
    private void buildInfoParams(String content) {
        Pattern pattern = Pattern.compile("<select([\\s\\S]*?)\"(\\d*?)\"([\\s\\S]*?)</select>");
        Matcher matcher = pattern.matcher(content);
        if (matcher.find()) {
            foldId = matcher.group(2);
        }
        infoParams.put("ID", foldId);
        String[] paramNames = new String[] { "__VIEWSTATE", "__EVENTVALIDATION", "__VIEWSTATEENCRYPTED" };
        for (String paramName : paramNames) {
            String v = getHiddenValue(content, paramName);
            infoParams.put(paramName, v);
        }
    }

    private List<NameValuePair> convertToValuePairs(Map<String, String> merged) {
        List<NameValuePair> nvps = new ArrayList<NameValuePair>();
        for (Map.Entry<String, String> entry : merged.entrySet()) {
            nvps.add(new BasicNameValuePair(entry.getKey(), entry.getValue()));
        }
        return nvps;
    }

    private HttpResponse accessTo(String url) {
        HttpGet innerget = new HttpGet(url);
        try {
            HttpResponse response = httpclient.execute(innerget);
            HttpEntity entity = response.getEntity();
            EntityUtils.consume(entity);
            return response;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    private String getHiddenValue(String text, String name) {
        String prefix = "id=\"" + name + "\" value=\"";
        int startIndex = text.indexOf(prefix);
        if (startIndex > 0) {
            int endIndex = text.indexOf("\"", startIndex + prefix.length());
            if (endIndex > 0)
                return text.substring(startIndex + prefix.length(), endIndex);
        }
        return null;
    }

    public String getLoginUrl() {
        return loginUrl;
    }

    public void setLoginUrl(String loginUrl) {
        this.loginUrl = loginUrl;
    }

    public String getLogoutUrl() {
        return logoutUrl;
    }

    public void setLogoutUrl(String logoutUrl) {
        this.logoutUrl = logoutUrl;
    }

    public String getUploadUrl() {
        return uploadUrl;
    }

    public void setUploadUrl(String uploadUrl) {
        this.uploadUrl = uploadUrl;
    }

    public String getSearchUrl() {
        return searchUrl;
    }

    public void setSearchUrl(String searchUrl) {
        this.searchUrl = searchUrl;
    }

    public String getLoginCaptchaUrl() {
        return loginCaptchaUrl;
    }

    public void setLoginCaptchaUrl(String loginCaptchaUrl) {
        this.loginCaptchaUrl = loginCaptchaUrl;
    }

}