org.apdplat.search.GoogleAjaxSearcher.java Source code

Java tutorial

Introduction

Here is the source code for org.apdplat.search.GoogleAjaxSearcher.java

Source

/**
 *
 * APDPlat - Application Product Development Platform
 * Copyright (c) 2013, ??, yang-shangchuan@qq.com
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

package org.apdplat.search;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GoogleAjaxSearcher implements GoogleSearcher {
    private static final Logger LOG = LoggerFactory.getLogger(GoogleAjaxSearcher.class);

    @Override
    public SearchResult search(String keyword) {
        return search(keyword, 1);
    }

    @Override
    public SearchResult search(String keyword, int page) {
        int pageSize = 8;
        //???8start?
        //?start=0start=10start=20?(page-1)*pageSize
        String url = "http://ajax.googleapis.com/ajax/services/search/web?start=" + (page - 1) * pageSize
                + "&rsz=large&v=1.0&q=" + keyword;

        SearchResult searchResult = new SearchResult();
        searchResult.setPage(page);
        List<Webpage> webpages = new ArrayList<>();
        try {
            HttpClient httpClient = new HttpClient();
            GetMethod getMethod = new GetMethod(url);

            httpClient.executeMethod(getMethod);
            getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());

            int statusCode = httpClient.executeMethod(getMethod);
            if (statusCode != HttpStatus.SC_OK) {
                LOG.error("?: " + getMethod.getStatusLine());
                return null;
            }
            InputStream in = getMethod.getResponseBodyAsStream();
            byte[] responseBody = Tools.readAll(in);
            String response = new String(responseBody, "UTF-8");
            LOG.debug("??" + response);
            JSONObject json = new JSONObject(response);
            String totalResult = json.getJSONObject("responseData").getJSONObject("cursor")
                    .getString("estimatedResultCount");
            int totalResultCount = Integer.parseInt(totalResult);
            LOG.info("? " + totalResultCount);
            searchResult.setTotal(totalResultCount);

            JSONArray results = json.getJSONObject("responseData").getJSONArray("results");

            LOG.debug("?:");
            for (int i = 0; i < results.length(); i++) {
                Webpage webpage = new Webpage();
                JSONObject result = results.getJSONObject(i);
                //???
                String title = result.getString("titleNoFormatting");
                LOG.debug("" + title);
                webpage.setTitle(title);
                //????
                String summary = result.get("content").toString();
                summary = summary.replaceAll("<b>", "");
                summary = summary.replaceAll("</b>", "");
                summary = summary.replaceAll("\\.\\.\\.", "");
                LOG.debug("?" + summary);
                webpage.setSummary(summary);
                //URL???
                String _url = result.get("url").toString();
                webpage.setUrl(_url);
                String content = Tools.getHTMLContent(_url);
                LOG.debug("" + content);
                webpage.setContent(content);
                webpages.add(webpage);
            }
        } catch (IOException | JSONException | NumberFormatException e) {
            LOG.error("?", e);
        }
        searchResult.setWebpages(webpages);
        return searchResult;
    }

    public static void main(String args[]) {
        String keyword = "??";
        try {
            keyword = URLEncoder.encode(keyword, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            LOG.error("url", e);
            return;
        }

        Searcher searcher = new GoogleAjaxSearcher();
        SearchResult searchResult = searcher.search(keyword, 1);
        List<Webpage> webpages = searchResult.getWebpages();
        if (webpages != null) {
            int i = 1;
            LOG.info("? ? " + searchResult.getPage() + " ??"
                    + searchResult.getPageSize() + " " + searchResult.getTotal());
            for (Webpage webpage : webpages) {
                LOG.info("? " + (i++) + " ");
                LOG.info("" + webpage.getTitle());
                LOG.info("URL" + webpage.getUrl());
                LOG.info("?" + webpage.getSummary());
                LOG.info("" + webpage.getContent());
                LOG.info("");
            }
        } else {
            LOG.error("?");
        }
    }
}