sg.yeefan.searchenginewrapper.clients.GoogleCustomClient.java Source code

Java tutorial

Introduction

Here is the source code for sg.yeefan.searchenginewrapper.clients.GoogleCustomClient.java

Source

/*
 * GoogleCustomClient.java
 *
 * Copyright (C) Tan Yee Fan
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sg.yeefan.searchenginewrapper.clients;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringEscapeUtils;
import sg.yeefan.filedownloader.FileDownloader;
import sg.yeefan.filedownloader.FileDownloaderException;
import sg.yeefan.searchenginewrapper.KeyedSearchEngineClient;
import sg.yeefan.searchenginewrapper.SearchEngineException;
import sg.yeefan.searchenginewrapper.SearchEngineFatalException;
import sg.yeefan.searchenginewrapper.SearchEngineQuery;
import sg.yeefan.searchenginewrapper.DefaultSearchEngineQuery;
import sg.yeefan.searchenginewrapper.SearchEngineResult;
import sg.yeefan.searchenginewrapper.SearchEngineResults;

/**
 * A search engine client for <a href="http://www.google.com/cse/">Google Custom
 * Search</a>.
 * <p>
 * Google Custom Search requires two keys to operate:
 * <ul>
 * <li>{@code api_key}: Google API key.</li>
 * <li>{@code cx}: Custom search engine identifier.</li>
 * </ul>
 * In the query, the registration key should be supplied by concatenating these
 * keys together, using {@code "$"} as the delimiter:
 * <pre><code>
 * api_key + "$" + cx
 * </code></pre>
 *
 * @author Tan Yee Fan
 */
public class GoogleCustomClient implements KeyedSearchEngineClient {
    /**
     * Constructor.
     */
    public GoogleCustomClient() {
    }

    // Classes for binding JSON data to Java objects.

    private static class Response {
        private SearchInformation searchInformation;
        private Item[] items;

        public Response() {
            this.searchInformation = new SearchInformation();
            this.items = new Item[0];
        }

        public SearchInformation getSearchInformation() {
            return this.searchInformation;
        }

        public void setSearchInformation(SearchInformation searchInformation) {
            if (searchInformation == null)
                searchInformation = new SearchInformation();
            this.searchInformation = searchInformation;
        }

        public Item[] getItems() {
            return this.items;
        }

        public void setItems(Item[] items) {
            if (items == null)
                items = new Item[0];
            this.items = items;
        }
    }

    private static class Item {
        String link;
        String title;
        String htmlSnippet;

        public Item() {
            this.link = "";
            this.title = "";
            this.htmlSnippet = "";
        }

        public String getLink() {
            return this.link;
        }

        public void setLink(String link) {
            if (link == null)
                link = "";
            this.link = link;
        }

        public String getTitle() {
            return this.title;
        }

        public void setTitle(String title) {
            if (title == null)
                title = "";
            this.title = title;
        }

        public String getHtmlSnippet() {
            return this.htmlSnippet;
        }

        public void setHtmlSnippet(String htmlSnippet) {
            if (htmlSnippet == "")
                htmlSnippet = "";
            this.htmlSnippet = htmlSnippet;
        }
    }

    private static class SearchInformation {
        private long totalResults;

        public SearchInformation() {
            this.totalResults = 0L;
        }

        public long getTotalResults() {
            return this.totalResults;
        }

        public void setTotalResults(long totalResults) {
            this.totalResults = totalResults;
        }
    }

    /**
     * Processes the snippet of the search result.
     */
    private String[] processSnippet(String snippet) {
        String[] lines = snippet.split("<b>\\.+</b>", 0);
        List<String> list = new ArrayList<String>(lines.length);
        for (String line : lines) {
            line = line.replaceAll("<b>", "").replaceAll("</b>", "").replaceAll("<br>", "");
            line = StringEscapeUtils.unescapeHtml4(line);
            line = line.trim().replaceAll("\\s+", " ");
            if (line.length() > 0)
                list.add(line);
        }
        String[] result = new String[list.size()];
        list.toArray(result);
        return result;
    }

    /**
     * Makes a query to Google Custom Search using a default query.
     */
    private SearchEngineResults getResults(DefaultSearchEngineQuery query) throws SearchEngineException {
        if (query == null)
            throw new SearchEngineFatalException("Missing query.");
        String keyString = query.getKey();
        String label = query.getLabel();
        String queryString = query.getQuery();
        long startIndex = query.getStartIndex();
        String[] keyStrings = keyString.split("\\$", 0);
        if (keyStrings.length != 2)
            throw new SearchEngineFatalException("Key must be of the form: api_key + \"$\" + cx");
        String apiKey = keyStrings[0];
        String cx = keyStrings[1];
        if (startIndex < 1)
            throw new SearchEngineFatalException("Start index must be at least 1.");
        String encodedApiKey = null;
        String encodedCx = null;
        String encodedQuery = null;
        try {
            encodedApiKey = URLEncoder.encode(apiKey, "UTF-8");
            encodedCx = URLEncoder.encode(cx, "UTF-8");
            encodedQuery = URLEncoder.encode(queryString, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new SearchEngineFatalException(e);
        }
        long startTime = System.currentTimeMillis();
        FileDownloader downloader = new FileDownloader();
        String jsonString = null;
        try {
            downloader.setUserAgent(
                    "Search Engine Wrapper (http://wing.comp.nus.edu.sg/~tanyeefa/downloads/searchenginewrapper/)");
            String requestUrl = "https://www.googleapis.com/customsearch/v1?key=" + encodedApiKey + "&cx="
                    + encodedCx + "&q=" + encodedQuery + "&start=" + startIndex + "&num=10";
            byte[] bytes = downloader.download(requestUrl);
            jsonString = new String(bytes, "UTF-8");
        } catch (FileDownloaderException e) {
            // TODO: Handle response code and error stream to check
            // whether quota is exceeded.
            throw new SearchEngineException(e);
        } catch (UnsupportedEncodingException e) {
            throw new SearchEngineException(e);
        }
        long endTime = System.currentTimeMillis();
        Response response = null;
        try {
            ObjectMapper mapper = new ObjectMapper();
            mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
            response = mapper.readValue(jsonString, Response.class);
        } catch (IOException e) {
            throw new SearchEngineException(e);
        }
        SearchEngineResults results = new SearchEngineResults();
        results.setLabel(label);
        results.setQuery(queryString);
        results.setTotalResults(response.getSearchInformation().getTotalResults());
        results.setStartIndex(startIndex);
        Item[] items = response.getItems();
        SearchEngineResult[] resultArray = new SearchEngineResult[items.length];
        for (int i = 0; i < items.length; i++) {
            String url = items[i].getLink();
            String title = items[i].getTitle();
            String snippet = items[i].getHtmlSnippet();
            resultArray[i] = new SearchEngineResult();
            resultArray[i].setURL(url);
            resultArray[i].setTitle(title);
            resultArray[i].setSnippet(processSnippet(snippet));
        }
        results.setResults(resultArray);
        results.setStartTime(startTime);
        results.setEndTime(endTime);
        if (items.length >= 10) {
            DefaultSearchEngineQuery nextQuery = new DefaultSearchEngineQuery();
            nextQuery.setKey(keyString);
            nextQuery.setLabel(label);
            nextQuery.setQuery(queryString);
            nextQuery.setStartIndex(startIndex + items.length);
            results.setNextQuery(nextQuery);
        }
        return results;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public SearchEngineResults getResults(SearchEngineQuery query) throws SearchEngineException {
        if (query instanceof DefaultSearchEngineQuery) {
            DefaultSearchEngineQuery defaultQuery = (DefaultSearchEngineQuery) query;
            return getResults(defaultQuery);
        } else
            throw new SearchEngineFatalException("Invalid query.");
    }
}