Java tutorial
/** * This software is licensed to you under the Apache License, Version 2.0 (the * "Apache License"). * * LinkedIn's contributions are made under the Apache License. If you contribute * to the Software, the contributions will be deemed to have been made under the * Apache License, unless you expressly indicate otherwise. Please do not make any * contributions that would be inconsistent with the Apache License. * * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, this software * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache * License for the specific language governing permissions and limitations for the * software governed under the Apache License. * * 2012 LinkedIn Corp. All Rights Reserved. */ package com.senseidb.dataprovider.http; import java.io.IOException; import java.io.InputStream; import java.util.Comparator; import java.util.Iterator; import org.apache.commons.io.IOUtils; import org.apache.http.Header; import org.apache.http.HeaderElement; import org.apache.http.HttpEntity; import org.apache.http.HttpException; import org.apache.http.HttpRequest; import org.apache.http.HttpRequestInterceptor; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseInterceptor; import org.apache.http.HttpVersion; import org.apache.http.StatusLine; import org.apache.http.client.entity.GzipDecompressingEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.conn.ClientConnectionManager; import org.apache.http.conn.scheme.PlainSocketFactory; import org.apache.http.conn.scheme.Scheme; import org.apache.http.conn.scheme.SchemeRegistry; import org.apache.http.conn.ssl.SSLSocketFactory; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.SingleClientConnManager; import org.apache.http.params.BasicHttpParams; import org.apache.http.params.HttpConnectionParams; import org.apache.http.params.HttpParams; import org.apache.http.params.HttpProtocolParams; import org.apache.http.protocol.HttpContext; import org.apache.log4j.Logger; import proj.zoie.api.DataConsumer.DataEvent; import proj.zoie.impl.indexing.StreamDataProvider; public abstract class HttpStreamDataProvider<D> extends StreamDataProvider<D> implements HttpDataProviderAdminMBean { private static final Logger logger = Logger.getLogger(HttpStreamDataProvider.class); protected final String _baseUrl; private final ClientConnectionManager _httpClientManager; private DefaultHttpClient _httpclient; public static final int DEFAULT_TIMEOUT_MS = 10000; public static final int DEFAULT_RETRYTIME_MS = 5000; public static final String DEFAULT_OFFSET_PARAM = "offset"; public static final String DFEAULT_DATA_PARAM = "data"; protected final int _fetchSize; protected final String _password; protected String _offset; protected String _initialOffset; private final boolean _disableHttps; private Iterator<DataEvent<D>> _currentDataIter; private volatile boolean _stopped; private int _retryTime; private volatile long _httpGetLatency; private volatile long _responseParseLatency; public HttpStreamDataProvider(Comparator<String> versionComparator, String baseUrl, String pw, int fetchSize, String startingOffset, boolean disableHttps) { super(versionComparator); _baseUrl = baseUrl; _password = pw; _fetchSize = fetchSize; _offset = startingOffset; _disableHttps = disableHttps; _initialOffset = null; _currentDataIter = null; _stopped = true; _httpGetLatency = 0L; _responseParseLatency = 0L; Scheme http = new Scheme("http", 80, PlainSocketFactory.getSocketFactory()); SchemeRegistry sr = new SchemeRegistry(); sr.register(http); HttpParams params = new BasicHttpParams(); params.setParameter(HttpProtocolParams.PROTOCOL_VERSION, HttpVersion.HTTP_1_1); params.setParameter(HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8"); params.setIntParameter(HttpConnectionParams.CONNECTION_TIMEOUT, 5000); // 5s conn timeout params.setIntParameter(HttpConnectionParams.SO_LINGER, 0); // no socket linger params.setBooleanParameter(HttpConnectionParams.TCP_NODELAY, true); // tcp no delay params.setIntParameter(HttpConnectionParams.SO_TIMEOUT, 5000); // 5s sock timeout params.setIntParameter(HttpConnectionParams.SOCKET_BUFFER_SIZE, 1024 * 1024); // 1mb socket buffer params.setBooleanParameter(HttpConnectionParams.SO_REUSEADDR, true); // 5s sock timeout _httpClientManager = new SingleClientConnManager(sr); _httpclient = new DefaultHttpClient(_httpClientManager, params); if (!_disableHttps) { _httpclient = HttpsClientDecorator.decorate(_httpclient); } _httpclient.addRequestInterceptor(new HttpRequestInterceptor() { public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException { if (!request.containsHeader("Accept-Encoding")) { request.addHeader("Accept-Encoding", "gzip"); } } }); _httpclient.addResponseInterceptor(new HttpResponseInterceptor() { public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { HttpEntity entity = response.getEntity(); Header ceheader = entity.getContentEncoding(); if (ceheader != null) { HeaderElement[] codecs = ceheader.getElements(); for (int i = 0; i < codecs.length; i++) { if (codecs[i].getName().equalsIgnoreCase("gzip")) { response.setEntity(new GzipDecompressingEntity(response.getEntity())); return; } } } } }); _retryTime = DEFAULT_RETRYTIME_MS; // default retry after 5 seconds } public void setRetryTime(int retryTime) { _retryTime = retryTime; } public int getRetryTime() { return _retryTime; } @Override public void setStartingOffset(String initialOffset) { _initialOffset = initialOffset; } protected abstract String buildGetString(String offset); protected abstract Iterator<DataEvent<D>> parse(InputStream is) throws Exception; private Iterator<DataEvent<D>> fetchBatch() throws HttpException { InputStream stream = null; try { HttpGet httpget = new HttpGet(buildGetString(_offset)); long getStart = System.currentTimeMillis(); HttpResponse response = _httpclient.execute(httpget); long getEnd = System.currentTimeMillis(); _httpGetLatency = getEnd - getStart; HttpEntity entity = response.getEntity(); StatusLine status = response.getStatusLine(); int statusCode = status.getStatusCode(); if (statusCode >= 400) { try { IOUtils.closeQuietly(entity.getContent()); } catch (Exception e) { logger.error(e.getMessage(), e); } throw new HttpException(status.getReasonPhrase()); } try { stream = entity.getContent(); long parseStart = System.currentTimeMillis(); Iterator<DataEvent<D>> iter = parse(stream); long parseEnd = System.currentTimeMillis(); _responseParseLatency = parseEnd - parseStart; return iter; } catch (Exception e) { logger.error(e.getMessage(), e); httpget.abort(); throw new HttpException(e.getMessage(), e); } } catch (IOException ioe) { throw new HttpException(ioe.getMessage(), ioe); } finally { if (stream != null) { IOUtils.closeQuietly(stream); } } } @Override public DataEvent<D> next() { if (_stopped) { return null; } if (_currentDataIter == null || !_currentDataIter.hasNext()) { while (true && !_stopped) { try { Iterator<DataEvent<D>> data = fetchBatch(); if (data == null || !data.hasNext()) { if (logger.isDebugEnabled()) { logger.debug("no more data"); } synchronized (this) { try { this.wait(_retryTime); return null; } catch (InterruptedException e1) { return null; } } } _currentDataIter = data; break; } catch (HttpException e) { logger.error(e.getMessage(), e); try { logger.error("retrying in " + _retryTime + "ms"); synchronized (this) { this.wait(_retryTime); } continue; } catch (InterruptedException e1) { return null; } } } } DataEvent<D> data = null; if (_currentDataIter != null && _currentDataIter.hasNext()) { data = _currentDataIter.next(); if (data != null) { _offset = data.getVersion(); } } return data; } @Override public void reset() { if (_initialOffset != null) { _offset = _initialOffset; } } @Override public long getHttpGetLatency() { return _httpGetLatency; } @Override public long getResponseParseLatency() { return _responseParseLatency; } @Override public void start() { super.start(); _stopped = false; } @Override public void stop() { synchronized (this) { _stopped = true; this.notifyAll(); } try { super.stop(); } finally { if (_httpClientManager != null) { _httpClientManager.shutdown(); } } } }