com.senseidb.dataprovider.http.HttpStreamDataProvider.java Source code

Java tutorial

Introduction

Here is the source code for com.senseidb.dataprovider.http.HttpStreamDataProvider.java

Source

/**
 * This software is licensed to you under the Apache License, Version 2.0 (the
 * "Apache License").
 *
 * LinkedIn's contributions are made under the Apache License. If you contribute
 * to the Software, the contributions will be deemed to have been made under the
 * Apache License, unless you expressly indicate otherwise. Please do not make any
 * contributions that would be inconsistent with the Apache License.
 *
 * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, this software
 * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
 * License for the specific language governing permissions and limitations for the
 * software governed under the Apache License.
 *
 *  2012 LinkedIn Corp. All Rights Reserved.  
 */
package com.senseidb.dataprovider.http;

import java.io.IOException;
import java.io.InputStream;
import java.util.Comparator;
import java.util.Iterator;

import org.apache.commons.io.IOUtils;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.HttpVersion;
import org.apache.http.StatusLine;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.PlainSocketFactory;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.SingleClientConnManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.protocol.HttpContext;
import org.apache.log4j.Logger;

import proj.zoie.api.DataConsumer.DataEvent;
import proj.zoie.impl.indexing.StreamDataProvider;

public abstract class HttpStreamDataProvider<D> extends StreamDataProvider<D>
        implements HttpDataProviderAdminMBean {

    private static final Logger logger = Logger.getLogger(HttpStreamDataProvider.class);

    protected final String _baseUrl;

    private final ClientConnectionManager _httpClientManager;
    private DefaultHttpClient _httpclient;

    public static final int DEFAULT_TIMEOUT_MS = 10000;
    public static final int DEFAULT_RETRYTIME_MS = 5000;

    public static final String DEFAULT_OFFSET_PARAM = "offset";
    public static final String DFEAULT_DATA_PARAM = "data";

    protected final int _fetchSize;
    protected final String _password;
    protected String _offset;
    protected String _initialOffset;
    private final boolean _disableHttps;
    private Iterator<DataEvent<D>> _currentDataIter;
    private volatile boolean _stopped;
    private int _retryTime;

    private volatile long _httpGetLatency;
    private volatile long _responseParseLatency;

    public HttpStreamDataProvider(Comparator<String> versionComparator, String baseUrl, String pw, int fetchSize,
            String startingOffset, boolean disableHttps) {
        super(versionComparator);
        _baseUrl = baseUrl;
        _password = pw;
        _fetchSize = fetchSize;
        _offset = startingOffset;
        _disableHttps = disableHttps;
        _initialOffset = null;
        _currentDataIter = null;
        _stopped = true;

        _httpGetLatency = 0L;
        _responseParseLatency = 0L;

        Scheme http = new Scheme("http", 80, PlainSocketFactory.getSocketFactory());
        SchemeRegistry sr = new SchemeRegistry();
        sr.register(http);

        HttpParams params = new BasicHttpParams();
        params.setParameter(HttpProtocolParams.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
        params.setParameter(HttpProtocolParams.HTTP_CONTENT_CHARSET, "UTF-8");
        params.setIntParameter(HttpConnectionParams.CONNECTION_TIMEOUT, 5000); // 5s conn timeout
        params.setIntParameter(HttpConnectionParams.SO_LINGER, 0); //  no socket linger
        params.setBooleanParameter(HttpConnectionParams.TCP_NODELAY, true); // tcp no delay
        params.setIntParameter(HttpConnectionParams.SO_TIMEOUT, 5000); // 5s sock timeout
        params.setIntParameter(HttpConnectionParams.SOCKET_BUFFER_SIZE, 1024 * 1024); // 1mb socket buffer
        params.setBooleanParameter(HttpConnectionParams.SO_REUSEADDR, true); // 5s sock timeout

        _httpClientManager = new SingleClientConnManager(sr);
        _httpclient = new DefaultHttpClient(_httpClientManager, params);

        if (!_disableHttps) {
            _httpclient = HttpsClientDecorator.decorate(_httpclient);
        }

        _httpclient.addRequestInterceptor(new HttpRequestInterceptor() {
            public void process(final HttpRequest request, final HttpContext context)
                    throws HttpException, IOException {
                if (!request.containsHeader("Accept-Encoding")) {
                    request.addHeader("Accept-Encoding", "gzip");
                }
            }
        });

        _httpclient.addResponseInterceptor(new HttpResponseInterceptor() {
            public void process(final HttpResponse response, final HttpContext context)
                    throws HttpException, IOException {
                HttpEntity entity = response.getEntity();
                Header ceheader = entity.getContentEncoding();
                if (ceheader != null) {
                    HeaderElement[] codecs = ceheader.getElements();
                    for (int i = 0; i < codecs.length; i++) {
                        if (codecs[i].getName().equalsIgnoreCase("gzip")) {
                            response.setEntity(new GzipDecompressingEntity(response.getEntity()));
                            return;
                        }
                    }
                }
            }
        });

        _retryTime = DEFAULT_RETRYTIME_MS; // default retry after 5 seconds
    }

    public void setRetryTime(int retryTime) {
        _retryTime = retryTime;
    }

    public int getRetryTime() {
        return _retryTime;
    }

    @Override
    public void setStartingOffset(String initialOffset) {
        _initialOffset = initialOffset;
    }

    protected abstract String buildGetString(String offset);

    protected abstract Iterator<DataEvent<D>> parse(InputStream is) throws Exception;

    private Iterator<DataEvent<D>> fetchBatch() throws HttpException {
        InputStream stream = null;
        try {
            HttpGet httpget = new HttpGet(buildGetString(_offset));
            long getStart = System.currentTimeMillis();
            HttpResponse response = _httpclient.execute(httpget);
            long getEnd = System.currentTimeMillis();
            _httpGetLatency = getEnd - getStart;

            HttpEntity entity = response.getEntity();
            StatusLine status = response.getStatusLine();
            int statusCode = status.getStatusCode();

            if (statusCode >= 400) {
                try {
                    IOUtils.closeQuietly(entity.getContent());
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }
                throw new HttpException(status.getReasonPhrase());
            }

            try {
                stream = entity.getContent();

                long parseStart = System.currentTimeMillis();
                Iterator<DataEvent<D>> iter = parse(stream);
                long parseEnd = System.currentTimeMillis();
                _responseParseLatency = parseEnd - parseStart;
                return iter;
            } catch (Exception e) {
                logger.error(e.getMessage(), e);
                httpget.abort();
                throw new HttpException(e.getMessage(), e);
            }
        } catch (IOException ioe) {
            throw new HttpException(ioe.getMessage(), ioe);
        } finally {
            if (stream != null) {
                IOUtils.closeQuietly(stream);
            }
        }
    }

    @Override
    public DataEvent<D> next() {
        if (_stopped) {
            return null;
        }
        if (_currentDataIter == null || !_currentDataIter.hasNext()) {
            while (true && !_stopped) {
                try {
                    Iterator<DataEvent<D>> data = fetchBatch();

                    if (data == null || !data.hasNext()) {
                        if (logger.isDebugEnabled()) {
                            logger.debug("no more data");
                        }
                        synchronized (this) {
                            try {
                                this.wait(_retryTime);
                                return null;
                            } catch (InterruptedException e1) {
                                return null;
                            }
                        }

                    }
                    _currentDataIter = data;
                    break;
                } catch (HttpException e) {
                    logger.error(e.getMessage(), e);
                    try {
                        logger.error("retrying in " + _retryTime + "ms");
                        synchronized (this) {
                            this.wait(_retryTime);
                        }
                        continue;
                    } catch (InterruptedException e1) {
                        return null;
                    }
                }
            }
        }

        DataEvent<D> data = null;
        if (_currentDataIter != null && _currentDataIter.hasNext()) {
            data = _currentDataIter.next();
            if (data != null) {
                _offset = data.getVersion();
            }
        }
        return data;
    }

    @Override
    public void reset() {
        if (_initialOffset != null) {
            _offset = _initialOffset;
        }
    }

    @Override
    public long getHttpGetLatency() {
        return _httpGetLatency;
    }

    @Override
    public long getResponseParseLatency() {
        return _responseParseLatency;
    }

    @Override
    public void start() {
        super.start();
        _stopped = false;
    }

    @Override
    public void stop() {
        synchronized (this) {
            _stopped = true;
            this.notifyAll();
        }
        try {
            super.stop();
        } finally {

            if (_httpClientManager != null) {
                _httpClientManager.shutdown();
            }
        }
    }
}