Java tutorial
/** * * APDPlat - Application Product Development Platform * Copyright (c) 2013, ??, yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.apdplat.search; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class GoogleAjaxSearcher implements GoogleSearcher { private static final Logger LOG = LoggerFactory.getLogger(GoogleAjaxSearcher.class); @Override public SearchResult search(String keyword) { return search(keyword, 1); } @Override public SearchResult search(String keyword, int page) { int pageSize = 8; //???8start? //?start=0start=10start=20?(page-1)*pageSize String url = "http://ajax.googleapis.com/ajax/services/search/web?start=" + (page - 1) * pageSize + "&rsz=large&v=1.0&q=" + keyword; SearchResult searchResult = new SearchResult(); searchResult.setPage(page); List<Webpage> webpages = new ArrayList<>(); try { HttpClient httpClient = new HttpClient(); GetMethod getMethod = new GetMethod(url); httpClient.executeMethod(getMethod); getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler()); int statusCode = httpClient.executeMethod(getMethod); if (statusCode != HttpStatus.SC_OK) { LOG.error("?: " + getMethod.getStatusLine()); return null; } InputStream in = getMethod.getResponseBodyAsStream(); byte[] responseBody = Tools.readAll(in); String response = new String(responseBody, "UTF-8"); LOG.debug("??" + response); JSONObject json = new JSONObject(response); String totalResult = json.getJSONObject("responseData").getJSONObject("cursor") .getString("estimatedResultCount"); int totalResultCount = Integer.parseInt(totalResult); LOG.info("? " + totalResultCount); searchResult.setTotal(totalResultCount); JSONArray results = json.getJSONObject("responseData").getJSONArray("results"); LOG.debug("?:"); for (int i = 0; i < results.length(); i++) { Webpage webpage = new Webpage(); JSONObject result = results.getJSONObject(i); //??? String title = result.getString("titleNoFormatting"); LOG.debug("" + title); webpage.setTitle(title); //???? String summary = result.get("content").toString(); summary = summary.replaceAll("<b>", ""); summary = summary.replaceAll("</b>", ""); summary = summary.replaceAll("\\.\\.\\.", ""); LOG.debug("?" + summary); webpage.setSummary(summary); //URL??? String _url = result.get("url").toString(); webpage.setUrl(_url); String content = Tools.getHTMLContent(_url); LOG.debug("" + content); webpage.setContent(content); webpages.add(webpage); } } catch (IOException | JSONException | NumberFormatException e) { LOG.error("?", e); } searchResult.setWebpages(webpages); return searchResult; } public static void main(String args[]) { String keyword = "??"; try { keyword = URLEncoder.encode(keyword, "UTF-8"); } catch (UnsupportedEncodingException e) { LOG.error("url", e); return; } Searcher searcher = new GoogleAjaxSearcher(); SearchResult searchResult = searcher.search(keyword, 1); List<Webpage> webpages = searchResult.getWebpages(); if (webpages != null) { int i = 1; LOG.info("? ? " + searchResult.getPage() + " ??" + searchResult.getPageSize() + " " + searchResult.getTotal()); for (Webpage webpage : webpages) { LOG.info("? " + (i++) + " "); LOG.info("" + webpage.getTitle()); LOG.info("URL" + webpage.getUrl()); LOG.info("?" + webpage.getSummary()); LOG.info("" + webpage.getContent()); LOG.info(""); } } else { LOG.error("?"); } } }