List of usage examples for org.apache.http.params HttpProtocolParamBean setVersion
public void setVersion(HttpVersion httpVersion)
From source file:SandBox.testing.PageFetcher.java
public PageFetcher(CrawlConfig config) { super(config); HttpParams params = new BasicHttpParams(); HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params); paramsBean.setVersion(HttpVersion.HTTP_1_1); paramsBean.setContentCharset("UTF-8"); paramsBean.setUseExpectContinue(false); params.setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BROWSER_COMPATIBILITY); params.setParameter(CoreProtocolPNames.USER_AGENT, config.getUserAgentString()); params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, config.getSocketTimeout()); params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, config.getConnectionTimeout()); params.setBooleanParameter("http.protocol.handle-redirects", false); SSLContext sslContext = null; try {/*from ww w .ja v a2 s. co m*/ sslContext = SSLContext.getInstance("SSL"); // set up a TrustManager that trusts everything sslContext.init(null, new TrustManager[] { new X509TrustManager() { public X509Certificate[] getAcceptedIssuers() { System.out.println("getAcceptedIssuers ============="); return null; } public void checkClientTrusted(X509Certificate[] certs, String authType) { System.out.println("checkClientTrusted ============="); } public void checkServerTrusted(X509Certificate[] certs, String authType) { System.out.println("checkServerTrusted ============="); } } }, new SecureRandom()); } catch (NoSuchAlgorithmException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (KeyManagementException e) { // TODO Auto-generated catch block e.printStackTrace(); } SSLSocketFactory sf = new SSLSocketFactory(sslContext); Scheme httpsScheme = new Scheme("https", 443, sf); SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register(httpsScheme); //SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); if (config.isIncludeHttpsPages()) { schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory())); } connectionManager = new PoolingClientConnectionManager(schemeRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); httpClient = new DefaultHttpClient(connectionManager, params); if (config.getProxyHost() != null) { if (config.getProxyUsername() != null) { httpClient.getCredentialsProvider().setCredentials( new AuthScope(config.getProxyHost(), config.getProxyPort()), new UsernamePasswordCredentials(config.getProxyUsername(), config.getProxyPassword())); } HttpHost proxy = new HttpHost(config.getProxyHost(), config.getProxyPort()); httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy); } httpClient.addResponseInterceptor(new HttpResponseInterceptor() { @Override public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { HttpEntity entity = response.getEntity(); Header contentEncoding = entity.getContentEncoding(); if (contentEncoding != null) { HeaderElement[] codecs = contentEncoding.getElements(); for (HeaderElement codec : codecs) { if (codec.getName().equalsIgnoreCase("gzip")) { response.setEntity(new GzipDecompressingEntity(response.getEntity())); return; } } } } }); if (connectionMonitorThread == null) { connectionMonitorThread = new IdleConnectionMonitorThread(connectionManager); } connectionMonitorThread.start(); }
From source file:edu.uci.ics.crawler4j.crawler.fetcher.PageFetcher.java
public PageFetcher(ICrawlerSettings config) { politenessDelay = config.getPolitenessDelay(); maxDownloadSize = config.getMaxDownloadSize(); show404Pages = config.getShow404Pages(); ignoreBinary = !config.getIncludeBinaryContent(); cache = config.getCacheProvider();//from w w w. j a v a 2 s . c o m HttpParams params = new BasicHttpParams(); HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params); paramsBean.setVersion(HttpVersion.HTTP_1_1); paramsBean.setContentCharset("UTF-8"); paramsBean.setUseExpectContinue(false); params.setParameter("http.useragent", config.getUserAgent()); params.setIntParameter("http.socket.timeout", config.getSocketTimeout()); params.setIntParameter("http.connection.timeout", config.getConnectionTimeout()); params.setBooleanParameter("http.protocol.handle-redirects", false); SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); if (config.getAllowHttps()) { schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory())); } connectionManager = new ThreadSafeClientConnManager(schemeRegistry); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); connectionManager.setMaxTotal(config.getMaxTotalConnections()); logger.setLevel(Level.INFO); httpclient = new DefaultHttpClient(connectionManager, params); }
From source file:com.tiaoin.crawl.plugin.util.PageFetcherImpl.java
/** * client?Header?Cookie//from w ww .jav a 2 s. c o m * @param aconfig * @param cookies */ public void init(Site site) { //System.out.println(site.toString()); if (null != site.getHeaders() && site.getHeaders().getHeader() != null) { for (com.tiaoin.crawl.core.xml.Header header : site.getHeaders().getHeader()) { this.addHeader(header.getName(), header.getValue()); } } if (null != site.getCookies() && site.getCookies().getCookie() != null) { for (com.tiaoin.crawl.core.xml.Cookie cookie : site.getCookies().getCookie()) { this.addCookie(cookie.getName(), cookie.getValue(), cookie.getHost(), cookie.getPath()); } } //HTTP? HttpParams params = new BasicHttpParams(); params.setParameter(CoreProtocolPNames.USER_AGENT, config.getUserAgentString()); params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, config.getSocketTimeout()); params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, config.getConnectionTimeout()); HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params); paramsBean.setVersion(HttpVersion.HTTP_1_1); paramsBean.setContentCharset("UTF-8"); paramsBean.setUseExpectContinue(false); SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); if (config.isIncludeHttpsPages()) schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory())); connectionManager = new ThreadSafeClientConnManager(schemeRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); httpClient = new DefaultHttpClient(connectionManager, params); httpClient.getParams().setIntParameter("http.socket.timeout", 15000); httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH); //? httpClient.addResponseInterceptor(new HttpResponseInterceptor() { public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { HttpEntity entity = response.getEntity(); Header contentEncoding = entity.getContentEncoding(); if (contentEncoding != null) { HeaderElement[] codecs = contentEncoding.getElements(); for (HeaderElement codec : codecs) { //?GZIP if (codec.getName().equalsIgnoreCase("gzip")) { response.setEntity(new GzipDecompressingEntity(response.getEntity())); return; } } } } }); }
From source file:org.eweb4j.spiderman.plugin.util.PageFetcherImpl.java
/** * client?Header?Cookie/* w ww .java2s . co m*/ * @param aconfig * @param cookies */ public void init(Site _site) { //HTTP? HttpParams params = new BasicHttpParams(); params.setParameter(CoreProtocolPNames.USER_AGENT, config.getUserAgentString()); params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, config.getSocketTimeout()); params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, config.getConnectionTimeout()); HttpProtocolParamBean paramsBean = new HttpProtocolParamBean(params); paramsBean.setVersion(HttpVersion.HTTP_1_1); paramsBean.setContentCharset("UTF-8"); paramsBean.setUseExpectContinue(false); SchemeRegistry schemeRegistry = new SchemeRegistry(); schemeRegistry.register(new Scheme("http", 80, PlainSocketFactory.getSocketFactory())); if (config.isIncludeHttpsPages()) schemeRegistry.register(new Scheme("https", 443, SSLSocketFactory.getSocketFactory())); connectionManager = new ThreadSafeClientConnManager(schemeRegistry); connectionManager.setMaxTotal(config.getMaxTotalConnections()); connectionManager.setDefaultMaxPerRoute(config.getMaxConnectionsPerHost()); httpClient = new DefaultHttpClient(connectionManager, params); httpClient.getParams().setIntParameter("http.socket.timeout", 60000); httpClient.getParams().setParameter(ClientPNames.COOKIE_POLICY, CookiePolicy.BEST_MATCH); httpClient.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS, config.isFollowRedirects()); // HttpClientParams.setCookiePolicy(httpClient.getParams(),CookiePolicy.BEST_MATCH); //? httpClient.addResponseInterceptor(new HttpResponseInterceptor() { public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { HttpEntity entity = response.getEntity(); Header contentEncoding = entity.getContentEncoding(); if (contentEncoding != null) { HeaderElement[] codecs = contentEncoding.getElements(); for (HeaderElement codec : codecs) { //?GZIP if (codec.getName().equalsIgnoreCase("gzip")) { response.setEntity(new GzipDecompressingEntity(response.getEntity())); return; } } } } }); if (_site != null) { this.site = _site; if (this.site.getHeaders() != null && this.site.getHeaders().getHeader() != null) { for (org.eweb4j.spiderman.xml.Header header : this.site.getHeaders().getHeader()) { this.addHeader(header.getName(), header.getValue()); } } if (this.site.getCookies() != null && this.site.getCookies().getCookie() != null) { for (org.eweb4j.spiderman.xml.Cookie cookie : this.site.getCookies().getCookie()) { this.addCookie(cookie.getName(), cookie.getValue(), cookie.getHost(), cookie.getPath()); } } } }