Java tutorial
/** * * APDPlat - Application Product Development Platform * Copyright (c) 2013, ??, yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.qa.datasource; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apdplat.qa.files.FilesConfig; import org.apdplat.qa.model.Evidence; import org.apdplat.qa.model.Question; import org.apdplat.qa.system.QuestionAnsweringSystem; import org.apdplat.qa.util.MySQLUtils; import org.apdplat.qa.util.Tools; import org.json.JSONArray; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Google??? * * @author ?? */ public class GoogleDataSource implements DataSource { private static final Logger LOG = LoggerFactory.getLogger(GoogleDataSource.class); private String file; private int googleSearchTimes = 0; private static final int GOOGLESEARCHLIMIT = 10; //? private static final int PAGE = 1; private static final int PAGESIZE = 8; //? //? private static final boolean SUMMARY = true; // //private static final boolean SUMMARY = false; private List<String> files = new ArrayList<>(); public GoogleDataSource() { } public GoogleDataSource(String file) { this.files.add(file); } public GoogleDataSource(List<String> files) { this.files.addAll(files); } @Override public Question getQuestion(String questionStr) { return getAndAnswerQuestion(questionStr, null); } @Override public List<Question> getQuestions() { return getAndAnswerQuestions(null); } @Override public List<Question> getAndAnswerQuestions(QuestionAnsweringSystem questionAnsweringSystem) { List<Question> questions = new ArrayList<>(); for (String file : files) { BufferedReader reader = null; try { reader = new BufferedReader( new InputStreamReader(this.getClass().getResourceAsStream(file), "utf-8")); String line = reader.readLine(); while (line != null) { if (line.trim().equals("") || line.trim().startsWith("#") || line.indexOf("#") == 1 || line.length() < 3) { // line = reader.readLine(); continue; } LOG.info(" " + file + " Question:" + line.trim()); if (googleSearchTimes % GOOGLESEARCHLIMIT == (GOOGLESEARCHLIMIT - 1)) { LOG.info("?IP"); try { Thread.sleep(30000); } catch (InterruptedException e) { e.printStackTrace(); } LOG.info(""); } try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } String questionStr = null; String expectAnswer = null; String[] attrs = line.trim().split("[:|]"); if (attrs == null) { questionStr = line.trim(); } if (attrs != null && attrs.length == 1) { questionStr = attrs[0]; } if (attrs != null && attrs.length == 2) { questionStr = attrs[0]; expectAnswer = attrs[1]; } LOG.info("Question:" + questionStr); LOG.info("ExpectAnswer:" + expectAnswer); Question question = getQuestion(questionStr); if (question != null) { question.setExpectAnswer(expectAnswer); questions.add(question); } // if (questionAnsweringSystem != null && question != null) { questionAnsweringSystem.answerQuestion(question); } // line = reader.readLine(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } LOG.info("Question" + file + "Questiongoogle " + questions.size() + " Question"); } return questions; } @Override public Question getAndAnswerQuestion(String questionStr, QuestionAnsweringSystem questionAnsweringSystem) { //1?? Question question = MySQLUtils.getQuestionFromDatabase("google:", questionStr); if (question != null) { //? LOG.info("?Question" + question.getQuestion()); // if (questionAnsweringSystem != null) { questionAnsweringSystem.answerQuestion(question); } return question; } //2???google question = new Question(); question.setQuestion(questionStr); String query = ""; try { query = URLEncoder.encode(question.getQuestion(), "UTF-8"); } catch (UnsupportedEncodingException e) { LOG.error("url", e); return null; } for (int i = 0; i < PAGE; i++) { query = "http://ajax.googleapis.com/ajax/services/search/web?start=" + i * PAGESIZE + "&rsz=large&v=1.0&q=" + query; List<Evidence> evidences = search(query); if (evidences.size() > 0) { question.addEvidences(evidences); } else { LOG.error(" " + (i + 1) + " ?"); break; } } LOG.info("Question" + question.getQuestion() + " ?Evidence " + question.getEvidences().size() + " ?"); if (question.getEvidences().isEmpty()) { return null; } //3?google if (question.getEvidences().size() > 7) { LOG.info("Question" + question.getQuestion() + " MySQL?"); MySQLUtils.saveQuestionToDatabase("google:", question); } googleSearchTimes++; // if (questionAnsweringSystem != null) { questionAnsweringSystem.answerQuestion(question); } return question; } private List<Evidence> search(String query) { List<Evidence> evidences = new ArrayList<>(); try { HttpClient httpClient = new HttpClient(); GetMethod getMethod = new GetMethod(query); httpClient.executeMethod(getMethod); getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler()); int statusCode = httpClient.executeMethod(getMethod); if (statusCode != HttpStatus.SC_OK) { LOG.error("Method failed: " + getMethod.getStatusLine()); } byte[] responseBody = getMethod.getResponseBody(); String response = new String(responseBody, "UTF-8"); LOG.debug("??" + response); JSONObject json = new JSONObject(response); String totalResult = json.getJSONObject("responseData").getJSONObject("cursor") .getString("estimatedResultCount"); int totalResultCount = Integer.parseInt(totalResult); LOG.info("? " + totalResultCount); JSONArray results = json.getJSONObject("responseData").getJSONArray("results"); LOG.debug(" Results:"); for (int i = 0; i < results.length(); i++) { Evidence evidence = new Evidence(); JSONObject result = results.getJSONObject(i); String title = result.getString("titleNoFormatting"); LOG.debug(title); evidence.setTitle(title); if (SUMMARY) { String content = result.get("content").toString(); content = content.replaceAll("<b>", ""); content = content.replaceAll("</b>", ""); content = content.replaceAll("\\.\\.\\.", ""); LOG.debug(content); evidence.setSnippet(content); } else { //URL??? String url = result.get("url").toString(); String content = Tools.getHTMLContent(url); if (content == null) { content = result.get("content").toString(); content = content.replaceAll("<b>", ""); content = content.replaceAll("</b>", ""); content = content.replaceAll("\\.\\.\\.", ""); } evidence.setSnippet(content); LOG.debug(content); } evidences.add(evidence); } } catch (Exception e) { LOG.error("?", e); } return evidences; } public static void main(String args[]) { Question question = new GoogleDataSource(FilesConfig.personNameQuestions) .getQuestion("APDPlat??"); LOG.info(question.toString()); } }