org.apache.streams.elasticsearch.ElasticsearchQuery.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.streams.elasticsearch.ElasticsearchQuery.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.streams.elasticsearch;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.collect.Lists;
import com.google.common.base.Objects;
import com.typesafe.config.Config;
import org.apache.streams.config.StreamsConfigurator;
import org.apache.streams.jackson.StreamsJacksonMapper;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.FilterBuilders;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class ElasticsearchQuery implements Iterable<SearchHit>, Iterator<SearchHit>, Serializable {

    private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchQuery.class);
    private static final int SCROLL_POSITION_NOT_INITIALIZED = -3;

    private ElasticsearchClientManager elasticsearchClientManager;
    private ElasticsearchReaderConfiguration config;
    private List<String> indexes = Lists.newArrayList();
    private List<String> types = Lists.newArrayList();
    private int limit = 1000 * 1000 * 1000; // we are going to set the default limit very high to 1bil
    private boolean random = false;
    private int batchSize = 100;
    private String scrollTimeout = "5m";
    private org.elasticsearch.index.query.QueryBuilder queryBuilder;
    private org.elasticsearch.index.query.FilterBuilder filterBuilder;// These are private to help us manage the scroll
    private SearchRequestBuilder search;
    private SearchResponse scrollResp;
    private int scrollPositionInScroll = SCROLL_POSITION_NOT_INITIALIZED;
    private SearchHit next = null;
    private long totalHits = 0;
    private long totalRead = 0;

    private StreamsJacksonMapper mapper = StreamsJacksonMapper.getInstance();

    public ElasticsearchQuery() {
        this(ElasticsearchConfigurator
                .detectReaderConfiguration(StreamsConfigurator.config.getConfig("elasticsearch")));
    }

    public ElasticsearchQuery(ElasticsearchReaderConfiguration config) {
        this.config = config;
        this.elasticsearchClientManager = new ElasticsearchClientManager(config);
        this.indexes.addAll(config.getIndexes());
        this.types.addAll(config.getTypes());
        this.scrollTimeout = config.getScrollTimeout();
    }

    public long getHitCount() {
        return this.search == null ? 0 : this.totalHits;
    }

    public long getReadCount() {
        return this.totalRead;
    }

    public double getReadPercent() {
        return (double) this.getReadCount() / (double) this.getHitCount();
    }

    public long getRemainingCount() {
        return this.totalRead - this.totalHits;
    }

    public void setBatchSize(int batchSize) {
        this.batchSize = batchSize;
    }

    public void setScrollTimeout(String scrollTimeout) {
        this.scrollTimeout = scrollTimeout;
    }

    public void setQueryBuilder(QueryBuilder queryBuilder) {
        this.queryBuilder = queryBuilder;
    }

    public void setFilterBuilder(FilterBuilder filterBuilder) {
        this.filterBuilder = filterBuilder;
    }

    public void execute(Object o) {

        // If we haven't already set up the search, then set up the search.
        if (search == null) {

            search = elasticsearchClientManager.getClient().prepareSearch(indexes.toArray(new String[0]))
                    .setSearchType(SearchType.SCAN).setExplain(true).addField("*").setFetchSource(true)
                    .setSize(batchSize).setScroll(scrollTimeout).addField("_timestamp");

            String searchJson;
            if (config.getSearch() != null) {
                LOGGER.debug("Have config in Reader: " + config.getSearch().toString());

                try {
                    searchJson = mapper.writeValueAsString(config.getSearch());
                    LOGGER.debug("Setting source: " + searchJson);
                    search = search.setExtraSource(searchJson);

                } catch (JsonProcessingException e) {
                    LOGGER.warn("Could not apply _search supplied by config", e.getMessage());
                }

                LOGGER.debug("Search Source is now " + search.toString());

            }

            if (this.queryBuilder != null)
                search = search.setQuery(this.queryBuilder);

            // If the types are null, then don't specify a type
            if (this.types != null && this.types.size() > 0)
                search = search.setTypes(types.toArray(new String[0]));

            // TODO: Replace when all clusters are upgraded past 0.90.4 so we can implement a RANDOM scroll.
            if (this.random)
                search = search.addSort(SortBuilders.scriptSort("random()", "number"));
        }

        // We don't have a scroll, we need to create a scroll
        if (scrollResp == null) {
            scrollResp = search.execute().actionGet();
            LOGGER.trace(search.toString());
        }
    }

    //Iterable methods
    @Override
    public Iterator<SearchHit> iterator() {
        return this;
    }

    //Iterator methods
    @Override
    public SearchHit next() {
        return this.next;
    }

    @Override
    public boolean hasNext() {
        calcNext();
        return hasRecords();
    }

    public void calcNext() {
        try {
            // We have exhausted our scroll create another scroll.
            if (scrollPositionInScroll == SCROLL_POSITION_NOT_INITIALIZED
                    || scrollPositionInScroll >= scrollResp.getHits().getHits().length) {
                // reset the scroll position
                scrollPositionInScroll = 0;

                // get the next hits of the scroll
                scrollResp = elasticsearchClientManager.getClient().prepareSearchScroll(scrollResp.getScrollId())
                        .setScroll(scrollTimeout).execute().actionGet();

                this.totalHits = scrollResp.getHits().getTotalHits();
            }

            // If this scroll has 0 items then we set the scroll position to -1
            // letting the iterator know that we are done.
            if (scrollResp.getHits().getTotalHits() == 0 || scrollResp.getHits().getHits().length == 0)
                scrollPositionInScroll = -1;
            else {
                // get the next record
                next = scrollResp.getHits().getAt(scrollPositionInScroll);

                // Increment our counters
                scrollPositionInScroll += 1;
                totalRead += 1;
            }
        } catch (Exception e) {
            LOGGER.error("Unexpected scrolling error: {}", e.getMessage());
            scrollPositionInScroll = -1;
            next = null;
        }
    }

    public void remove() {
    }

    public void cleanUp() {
    }

    protected boolean isCompleted() {
        return totalRead >= this.limit && hasRecords();
    }

    protected boolean hasRecords() {
        return scrollPositionInScroll != -1 && (!(this.totalRead > this.limit));
    }

}