org.tallison.solr.search.concordance.KWICRequestHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.tallison.solr.search.concordance.KWICRequestHandler.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.tallison.solr.search.concordance;

import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.concordance.classic.AbstractConcordanceWindowCollector;
import org.apache.lucene.search.concordance.classic.ConcordanceSearcher;
import org.apache.lucene.search.concordance.classic.ConcordanceSortOrder;
import org.apache.lucene.search.concordance.classic.DocMetadataExtractor;
import org.apache.lucene.search.concordance.classic.WindowBuilder;
import org.apache.lucene.search.concordance.classic.impl.ConcordanceWindowCollector;
import org.apache.lucene.search.concordance.classic.impl.DefaultSortKeyBuilder;
import org.apache.lucene.search.concordance.classic.impl.FieldBasedDocIdBuilder;
import org.apache.lucene.search.concordance.classic.impl.SimpleDocMetadataExtractor;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.cloud.RequestThreads;
import org.apache.solr.cloud.RequestWorker;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QParser;

/**
 * <requestHandler name="/kwic" class="org.apache.solr.handler.KWICRequestHandler">
 * <lst name="defaults">
 * <str name="echoParams">explicit</str>
 * <str name="f">content_txt</str>
 * <str name="df">content_txt</str>
 * <str name="maxWindows">500</str>
 * <str name="wt">xml</str>
 * <p>
 * <!--  other parameters:
 * <p>
 * <str name="debug">false</str>
 * <str name="fl">metadata field1,metadata field2,metadata field3</str>
 * <str name="targetOverlaps">true</str>
 * <str name="contentDisplaySize">42</str>
 * <str name="targetDisplaySize">42</str>
 * <str name="tokensAfter">42</str>
 * <str name="tokensBefore">42</str>
 * <str name="sortOrder">TARGET_PRE</str> //TODO: add options here: TARGET_POST, PRE, POST
 * <p>
 * -->
 * </lst>
 * <p>
 * </requestHandler>
 *
 * @author JRROBINSON
 */

//TODO: refactor to extend SearchHandler, and move Concordance logic into ConconcordanceSearchComponent
//as planned???

public class KWICRequestHandler extends SolrConcordanceBase {
    public static final String DefaultName = "/concordance";
    public static final String NODE = "contextWindows";
    /**
     * Max number of request threads to spawn.  Since this service wasn't intended to return
     * ALL possible results, it seems reasonable to cap this at something
     */
    public final static int MAX_THREADS = 25;

    ;

    public static NamedList doLocalSearch(SolrQueryRequest req) throws Exception {
        return doLocalSearch(null, req);
    }

    public static NamedList doLocalSearch(Query filter, SolrQueryRequest req) throws Exception {
        SolrParams params = req.getParams();
        String field = getField(params, req.getSchema().getDefaultSearchFieldName());

        String q = params.get(CommonParams.Q);

        String fl = params.get(CommonParams.FL);
        String solrUniqueKeyField = req.getSchema().getUniqueKeyField().getName();
        DocMetadataExtractor metadataExtractor = (fl != null && fl.length() > 0)
                ? new SimpleDocMetadataExtractor(fl.split(","))
                : new SimpleDocMetadataExtractor();

        Filter queryFilter = getFilterQuery(req);

        //TODO remove and only use index
        String anType = params.get("anType", "query").toLowerCase();

        IndexSchema schema = req.getSchema();
        Analyzer analyzer = null;
        SchemaField sf = schema.getField(field);
        if (sf != null && sf.getType() != null) {
            if (anType.equals("query")) {
                analyzer = sf.getType().getQueryAnalyzer();
            } else {
                analyzer = sf.getType().getIndexAnalyzer();
            }
        } else {
            throw new RuntimeException("No analyzer found for field " + field);
        }

        Query query = QParser.getParser(q, null, req).parse();

        IndexReader reader = req.getSearcher().getIndexReader();
        ConcordanceConfig config = buildConcordanceConfig(field, solrUniqueKeyField, params);

        WindowBuilder windowBuilder = new WindowBuilder(config.getTokensBefore(), config.getTokensAfter(), 100,
                new DefaultSortKeyBuilder(config.getSortOrder()), metadataExtractor,
                new FieldBasedDocIdBuilder(solrUniqueKeyField));

        ConcordanceSearcher searcher = new ConcordanceSearcher(windowBuilder);

        AbstractConcordanceWindowCollector collector = new ConcordanceWindowCollector(config.getMaxWindows());

        searcher.search(reader, field, query, queryFilter, analyzer, collector);

        NamedList results = convertToList(solrUniqueKeyField, collector);

        return results;
    }

    public static Results spinWait(RequestThreads<ConcordanceConfig> threads) {
        Results results = new Results(threads.getMetadata());
        return spinWait(threads, results);
    }

    ;

    public static Results spinWait(RequestThreads<ConcordanceConfig> threads, Results results) {
        if (threads == null || threads.empty())
            return results;

        while (!threads.isTerminated() && !threads.empty() && !results.hitMax) {
            //TODO: should iterate completed and not last inserted (!Stack)
            RequestWorker req = threads.next();
            if (!req.isRunning()) {
                NamedList nl = req.getResults();
                if (nl != null) {
                    results.add(nl, req.getName());
                }
                threads.removeLast();
            }
        }

        //force complete shutdown
        threads.shutdownNow();

        //if not enough hits, check any remaining threads that haven't been collected
        //for(RequestWorker req : otherRequests)
        while (!threads.empty() && !results.hitMax) {

            RequestWorker req = threads.next();

            if (req != null && !req.isRunning()) {
                NamedList nl = req.getResults();
                if (nl != null) {
                    results.add(nl, req.getName());
                }
                threads.removeLast();
            }
        }

        threads.clear();
        threads = null;

        return results;
    }

    static public RequestThreads<ConcordanceConfig> initRequestPump(List<String> shards, SolrQueryRequest req) {
        return initRequestPump(shards, req, MAX_THREADS);
    }

    static public RequestThreads<ConcordanceConfig> initRequestPump(List<String> shards, SolrQueryRequest req,
            int maxThreads) {
        SolrParams params = req.getParams();
        String field = SolrConcordanceBase.getField(params, req.getSchema().getDefaultSearchFieldName());
        String q = params.get(CommonParams.Q);
        ConcordanceConfig config = buildConcordanceConfig(field, req.getSchema().getUniqueKeyField().getName(),
                params);

        /**/
        RequestThreads<ConcordanceConfig> threads = RequestThreads
                .<ConcordanceConfig>newFixedThreadPool(Math.min(shards.size(), maxThreads)).setMetadata(config);

        String handler = getHandlerName(req, DefaultName, KWICRequestHandler.class);
        int windowsForEach = config.getMaxWindows();//Math.round(config.getMaxWindows() / (float)shards.size()) ;

        ModifiableSolrParams p = getWorkerParams(field, q, params, windowsForEach);

        int i = 0;
        for (String node : shards) {
            if (i++ > maxThreads)
                break;

            //could be https, no?
            String url = "http://" + node;

            RequestWorker worker = new RequestWorker(url, handler, p).setName(node);
            threads.addExecute(worker);
        }
        threads.seal(); //disallow future requests (& execute

        return threads;
    }

    private static ModifiableSolrParams getWorkerParams(String field, String q, SolrParams parent,
            Integer maxWindows) {
        ModifiableSolrParams params = new ModifiableSolrParams();

        params.set("f", field);
        params.set("q", q);
        params.set("maxWindows", maxWindows);
        //TODO false if distrib
        params.set("lq", true); //flag to disallow recursive zoo queries

        //don't need rows of docs if SearchComponent is already returning them
        params.set("rows", 0);
        setParam("anType", params, parent);

        setParam("fq", params, parent);
        setParam("echoParams", params, parent);
        setParam("defType", params, parent);
        setParam("wt", params, parent);
        setParam("debug", params, parent);
        setParam("fl", params, parent);
        setParam("targetOverlaps", params, parent);
        setParam("contentDisplaySize", params, parent);
        setParam("targetDisplaySize", params, parent);
        setParam("tokensAfter", params, parent);
        setParam("tokensBefore", params, parent);
        setParam("sortOrder", params, parent);
        return params;
    }

    private static ConcordanceConfig buildConcordanceConfig(String field, String idField, SolrParams params) {
        ConcordanceConfig config = new ConcordanceConfig(field);

        String param = params.get("targetOverlaps");
        if (param != null && param.length() > 0) {
            try {
                config.setAllowTargetOverlaps(Boolean.parseBoolean(param));
            } catch (Exception e) {
            }
        }
        param = params.get("contentDisplaySize");
        if (param != null && param.length() > 0) {
            try {
                config.setMaxContextDisplaySizeChars(Integer.parseInt(param));
            } catch (Exception e) {
            }
        }
        param = params.get("targetDisplaySize");
        if (param != null && param.length() > 0) {
            try {
                config.setMaxTargetDisplaySizeChars(Integer.parseInt(param));
            } catch (Exception e) {
            }
        }
        param = params.get("maxWindows");
        if (param != null && param.length() > 0) {
            try {
                config.setMaxWindows(Integer.parseInt(param));
            } catch (Exception e) {
            }
        }

        param = params.get("tokensAfter");
        if (param != null && param.length() > 0) {
            try {
                config.setTokensAfter(Integer.parseInt(param));
            } catch (Exception e) {
            }
        }
        param = params.get("tokensBefore");
        if (param != null && param.length() > 0) {
            try {
                config.setTokensBefore(Integer.parseInt(param));
            } catch (Exception e) {
            }
        }

        param = params.get("sortOrder");
        if (param != null && param.length() > 0) {
            try {
                config.setSortOrder(ConcordanceSortOrder.valueOf(param));
            } catch (Exception e) {
            }
        }
        return config;
    }

    @Override
    public void init(@SuppressWarnings("rawtypes") NamedList args) {
        super.init(args);
    }

    @Override
    public String getDescription() {
        return "Returns concordance results for your query";
    }

    ;

    @Override
    public String getSource() {
        return "$Source$";
    }

    @Override
    protected String getHandlerName(SolrQueryRequest req) {
        return getHandlerName(req, DefaultName, this.getClass());
    }

    @SuppressWarnings("unchecked")
    @Override
    public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
        boolean isDistrib = isDistributed(req);

        if (isDistrib) {
            doZooQuery(req, rsp);
        } else {
            doQuery(req, rsp);
        }
    }

    private void doQuery(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
        NamedList results = doLocalSearch(req);
        rsp.add(NODE, results);
    }

    @SuppressWarnings("unchecked")
    private void doZooQuery(SolrQueryRequest req, SolrQueryResponse rsp) throws SolrServerException, Exception {

        List<String> shards = getShards(req, false);

        RequestThreads<ConcordanceConfig> threads = initRequestPump(shards, req);

        Results results = new Results(threads.getMetadata());

        NamedList nl = doLocalSearch(req);
        results.add(nl, "local");

        results = spinWait(threads, results);

        rsp.add(NODE, results.toNamedList());

    }

    static class Results {
        int maxWindows = -1;
        boolean hitMax = false;
        long numDocs = 0;
        int totalDocs = 0;
        int totalWindows = 0;
        int numWindows = 0;
        NamedList windows = new SimpleOrderedMap<Object>();

        Results(int maxWindows) {
            this.maxWindows = maxWindows;
        }

        Results(ConcordanceConfig config) {
            this.maxWindows = config.getMaxWindows();
        }

        void add(NamedList nl, String extra) {
            NamedList nlRS = (NamedList) nl.get(NODE);

            if (nlRS == null)
                nlRS = nl;

            numDocs += getLong("numDocs", nlRS);
            totalDocs += getInt("totalDocs", nlRS);
            totalWindows += getInt("totalWindows", nlRS);
            numWindows += getInt("numWindows", nlRS);

            hitMax = numWindows >= maxWindows;

            Object o = nlRS.get("windows");
            if (o != null) {
                NamedList nlWindows = (NamedList) o;

                List<NamedList> wins = nlWindows.getAll("window");

                for (NamedList nlWin : wins) {
                    if (extra != null && extra.length() > 0)
                        nlWin.add("source", extra);

                    //TODO: if one wanted to sort this, they'd have to convert it to a class and then sort
                    //before returning
                    windows.add("window", nlWin);
                }
            }

        }

        int getInt(String name, NamedList nl) {
            Object o = nl.get(name);
            if (o != null)
                return (int) o;
            return 0;
        }

        long getLong(String name, NamedList nl) {
            Object o = nl.get(name);
            if (o != null)
                return (long) o;
            return 0;
        }

        NamedList toNamedList() {
            NamedList nl = new SimpleOrderedMap<>();
            nl.add("hitMax", hitMax);
            nl.add("numDocs", numDocs);
            nl.add("totalDocs", totalDocs);
            nl.add("totalWindows", totalWindows);
            nl.add("numWindows", numWindows);

            nl.add("windows", windows);
            return nl;
        }
    }

}