Example usage for org.apache.http.params HttpProtocolParamBean setUserAgent

List of usage examples for org.apache.http.params HttpProtocolParamBean setUserAgent

Introduction

In this page you can find the example usage for org.apache.http.params HttpProtocolParamBean setUserAgent.

Prototype

public void setUserAgent(String str) 

Source Link

Usage

From source file:org.berlin.crawl.net.RobotsConnector.java

/**
 * Connect to robots.txt file./*from w  w  w.j av a 2s . co m*/
 * 
 * On error, close inputstream, return empty document.
 * 
 * @param builder
 * @return
 * @throws Exception
 */
protected synchronized String connect(final URIBuilder builder) throws Exception {
    this.lastURIBuilder = builder;
    InputStream instream = null;
    try {
        logger.info("Attempting request : " + builder.toString());
        final HttpParams params = new BasicHttpParams();
        final HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
        paramsBean.setUserAgent(OctaneCrawlerConstants.USER_AGENT);
        // Set this to false, or else you'll get an
        // Expectation Failed: error
        paramsBean.setUseExpectContinue(false);

        final URI uri = builder.build();
        final HttpClient httpclient = new DefaultHttpClient();
        final HttpGet httpget = new HttpGet(uri);
        httpget.setParams(params);

        // Connect //
        final HttpResponse response = httpclient.execute(httpget);
        final HttpEntity entity = response.getEntity();

        this.response = response;
        if (response != null) {
            if (response.getStatusLine() != null) {
                if (response.getStatusLine().getStatusCode() != 200) {
                    // Log the error line
                    logger.error("Invalid status code - " + response.getStatusLine().getStatusCode());
                    throw new CrawlerError("Invalid status code - " + response.getStatusLine().getStatusCode());
                }
            }
        }

        if (entity != null) {
            instream = entity.getContent();
            if (instream != null) {
                final StringBuffer document = new StringBuffer();
                final BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
                String line = "";
                while ((line = reader.readLine()) != null) {
                    document.append(line);
                    document.append(NL);
                } // End of the while //
                return document.toString();
            } // End of - instream ///
        } // End of the if /
        Thread.sleep(100);
    } catch (final Exception e) {
        logger.error("Error at robots connect", e);
        throw new CrawlerError("Error at connect", e);
    } finally {
        try {
            if (instream != null) {
                instream.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    } // End of the try - catch block //
    return null;
}

From source file:org.berlin.crawl.net.WebConnector.java

public synchronized String connect(final BotLink blink, final URIBuilder builder) throws Exception {
    InputStream instream = null;/*from   w  w  w . ja va 2s  . c om*/
    try {
        logger.info("!* Attempting download and connect request : " + builder.toString());
        final HttpParams params = new BasicHttpParams();
        final HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params);
        paramsBean.setUserAgent(USER_AGENT);
        // Set this to false, or else you'll get an
        // Expectation Failed: error
        paramsBean.setUseExpectContinue(false);

        final URI uri = builder.build();
        final HttpClient httpclient = new DefaultHttpClient();
        final HttpGet httpget = new HttpGet(uri);
        httpget.setParams(params);

        // Connect //
        final HttpResponse response = httpclient.execute(httpget);
        final HttpEntity entity = response.getEntity();

        this.response = response;
        if (response != null) {
            if (response.getStatusLine() != null) {
                if (response.getStatusLine().getStatusCode() != 200) {
                    // Log the error line
                    logger.error("Invalid status code - " + response.getStatusLine().getStatusCode());
                    throw new CrawlerError("Invalid status code - " + response.getStatusLine().getStatusCode());
                }
            }
        }

        if (entity != null) {
            blink.setStatusline(String.valueOf(response.getStatusLine()));
            blink.setCode(response.getStatusLine().getStatusCode());
            instream = entity.getContent();
            if (instream != null) {
                final StringBuffer document = new StringBuffer();
                final BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
                String line = "";
                while ((line = reader.readLine()) != null) {
                    document.append(line);
                    document.append(NL);
                } // End of the while //

                db.proc(blink);
                Thread.sleep(LINK_PROCESS_DELAY);

                return document.toString();
            } // End of - instream ///
        } // End of the if /

    } catch (final Throwable e) {
        logger.error("Error at connect to LINK", e);
        throw new CrawlerError("Error at connect to LINK", e);
    } finally {
        try {
            if (instream != null) {
                instream.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    } // End of the try - catch block //
    return null;
}