org.dice.solrenhancements.morelikethis.DiceMoreLikeThisHandler.java Source code

Java tutorial

Introduction

Here is the source code for org.dice.solrenhancements.morelikethis.DiceMoreLikeThisHandler.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dice.solrenhancements.morelikethis;

import com.google.common.base.Strings;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.payloads.PayloadTermQuery;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.*;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.*;
import org.apache.solr.util.SolrPluginUtils;

import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;

/**
 * Solr MoreLikeThis --
 *
 * Return similar documents either based on a single document or based on posted text.
 *
 * @since solr 1.3
 */
public class DiceMoreLikeThisHandler extends RequestHandlerBase {
    private final static String EDISMAX = ExtendedDismaxQParserPlugin.NAME;
    private String version = null;

    @Override
    public void init(NamedList args) {
        super.init(args);
    }

    @Override
    public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
        // set and override parameters
        SolrIndexSearcher searcher = req.getSearcher();
        SchemaField uniqueKeyField = searcher.getSchema().getUniqueKeyField();
        ModifiableSolrParams params = new ModifiableSolrParams(req.getParams());
        configureSolrParameters(req, params, uniqueKeyField.getName());

        // Set field flags
        ReturnFields returnFields = new SolrReturnFields(req);
        rsp.setReturnFields(returnFields);
        int flags = 0;
        if (returnFields.wantsScore()) {
            flags |= SolrIndexSearcher.GET_SCORES;
        }
        // note: set in configureSolrParameters
        String defType = params.get(QueryParsing.DEFTYPE, EDISMAX);
        String q = params.get(CommonParams.Q);
        Query query = null;
        SortSpec sortSpec = null;
        QParser parser = null;

        List<Query> targetFqFilters = null;
        List<Query> mltFqFilters = null;

        try {
            if (q != null) {
                parser = QParser.getParser(q, defType, req);
                query = parser.getQuery();
                sortSpec = parser.getSort(true);
            } else {
                parser = QParser.getParser(null, defType, req);
                sortSpec = parser.getSort(true);
            }

            targetFqFilters = getFilters(req, CommonParams.FQ);
            mltFqFilters = getFilters(req, MoreLikeThisParams.FQ);
        } catch (SyntaxError e) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        }

        MoreLikeThisHelper mlt = new MoreLikeThisHelper(params, searcher, uniqueKeyField, parser);

        // Hold on to the interesting terms if relevant
        MoreLikeThisParams.TermStyle termStyle = MoreLikeThisParams.TermStyle
                .get(params.get(MoreLikeThisParams.INTERESTING_TERMS));

        MLTResult mltResult = null;
        DocListAndSet mltDocs = null;

        // Parse Required Params
        // This will either have a single Reader or valid query
        Reader reader = null;
        try {
            int start = params.getInt(CommonParams.START, 0);
            int rows = params.getInt(CommonParams.ROWS, 10);

            // for use when passed a content stream
            if (q == null || q.trim().length() < 1) {
                reader = getContentStreamReader(req, reader);
            }
            // Find documents MoreLikeThis - either with a reader or a query
            // --------------------------------------------------------------------------------
            if (reader != null) {
                // this will only be initialized if used with a content stream (see above)
                mltResult = mlt.getMoreLikeThisFromContentSteam(reader, start, rows, mltFqFilters, flags,
                        sortSpec.getSort());
            } else if (q != null) {
                // Matching options
                mltResult = getMoreLikeTheseFromQuery(rsp, params, flags, q, query, sortSpec, targetFqFilters,
                        mltFqFilters, searcher, mlt, start, rows);
            } else {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                        "MoreLikeThis requires either a query (?q=) or text to find similar documents.");
            }
            if (mltResult != null) {
                mltDocs = mltResult.getDoclist();
            }

        } finally {
            if (reader != null) {
                reader.close();
            }
        }

        if (mltDocs == null) {
            mltDocs = new DocListAndSet(); // avoid NPE
        }
        rsp.add("response", mltDocs.docList);

        if (mltResult != null && termStyle != MoreLikeThisParams.TermStyle.NONE) {
            addInterestingTerms(rsp, termStyle, mltResult);
        }

        // maybe facet the results
        if (params.getBool(FacetParams.FACET, false)) {
            addFacet(req, rsp, params, mltDocs);
        }

        addDebugInfo(req, rsp, q, mltFqFilters, mlt, mltResult);
    }

    private void configureSolrParameters(SolrQueryRequest req, ModifiableSolrParams params, String uniqueKeyField) {

        // default to the the edismax parser
        String defType = params.get(QueryParsing.DEFTYPE, EDISMAX);
        // allow useage of custom edismax implementations, such as our own
        if (defType.toLowerCase().contains(EDISMAX.toLowerCase())) {
            params.set(DisMaxParams.MM, 0);
            // edismax blows up without df field, even if you specify the field to match on in the query
            params.set(CommonParams.DF, uniqueKeyField);
        }
        params.set(QueryParsing.DEFTYPE, defType);
        req.setParams(params);
    }

    private Reader getContentStreamReader(SolrQueryRequest req, Reader reader) throws IOException {
        Iterable<ContentStream> streams = req.getContentStreams();
        if (streams != null) {
            Iterator<ContentStream> iter = streams.iterator();
            if (iter.hasNext()) {
                reader = iter.next().getReader();
            }
            if (iter.hasNext()) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                        "MoreLikeThis does not support multiple ContentStreams");
            }
        }
        return reader;
    }

    private MLTResult getMoreLikeTheseFromQuery(SolrQueryResponse rsp, SolrParams params, int flags, String q,
            Query query, SortSpec sortSpec, List<Query> targetFqFilters, List<Query> mltFqFilters,
            SolrIndexSearcher searcher, MoreLikeThisHelper mlt, int start, int rows)
            throws IOException, SyntaxError {

        boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
        int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
        // Find the base match
        DocList match = searcher.getDocList(query, targetFqFilters, null, matchOffset, 10000, flags); // only get the first one...
        if (match.matches() == 0) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    String.format("MoreLikeThis was unable to find any documents matching the query: '%s'.", q));
        }

        if (includeMatch) {
            rsp.add("match", match);
        }

        // This is an iterator, but we only handle the first match
        DocIterator iterator = match.iterator();
        if (iterator.hasNext()) {
            // do a MoreLikeThis query for each document in results
            return mlt.getMoreLikeTheseFromDocs(iterator, start, rows, mltFqFilters, flags, sortSpec.getSort());
        }
        return null;
    }

    private List<InterestingTerm> extractInterestingTerms(Query query) {
        List<InterestingTerm> terms = new ArrayList<InterestingTerm>();
        List clauses = ((BooleanQuery) query).clauses();
        for (Object o : clauses) {
            Query q = ((BooleanClause) o).getQuery();
            InterestingTerm it = new InterestingTerm();
            it.boost = q.getBoost();
            if (q instanceof TermQuery) {
                TermQuery tq = (TermQuery) q;
                it.term = tq.getTerm();
            } else if (q instanceof PayloadTermQuery) {
                PayloadTermQuery ptq = (PayloadTermQuery) q;
                it.term = ptq.getTerm();
            }
            terms.add(it);
        }
        Collections.sort(terms, InterestingTerm.BOOST_ORDER);

        return terms;
    }

    private void addInterestingTerms(SolrQueryResponse rsp, MoreLikeThisParams.TermStyle termStyle,
            MLTResult mltResult) {

        List<MLTTerm> mltTerms = mltResult.mltTerms;
        Collections.sort(mltTerms, MLTTerm.FLD_BOOST_X_SCORE_ORDER);

        if (termStyle == MoreLikeThisParams.TermStyle.DETAILS) {
            List<InterestingTerm> interesting = extractInterestingTerms(mltResult.rawMLTQuery);

            int longest = 0;
            for (InterestingTerm t : interesting) {
                longest = Math.max(t.term.toString().length(), longest);
            }

            NamedList<Float> it = new NamedList<Float>();
            for (InterestingTerm t : interesting) {
                it.add(Strings.padEnd(t.term.toString(), longest, ' '), t.boost);
            }
            rsp.add("interestingTerms", it);
        } else {
            List<String> it = new ArrayList<String>(mltTerms.size());
            for (MLTTerm mltTerm : mltTerms) {
                it.add(mltTerm.getWord());
            }
            rsp.add("interestingTerms", it);
        }
    }

    private void addFacet(SolrQueryRequest req, SolrQueryResponse rsp, SolrParams params, DocListAndSet mltDocs) {
        if (mltDocs.docSet == null) {
            rsp.add("facet_counts", null);
        } else {
            SimpleFacets f = new SimpleFacets(req, mltDocs.docSet, params);
            rsp.add("facet_counts", f.getFacetCounts());
        }
    }

    private void addDebugInfo(SolrQueryRequest req, SolrQueryResponse rsp, String q, List<Query> mltFqFilters,
            MoreLikeThisHelper mlt, MLTResult mltResult) {
        DocListAndSet mltDocs = mltResult.getDoclist();

        boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);
        boolean dbgQuery = false, dbgResults = false;
        if (dbg == false) {//if it's true, we are doing everything anyway.
            String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
            if (dbgParams != null) {
                for (int i = 0; i < dbgParams.length; i++) {
                    if (dbgParams[i].equals(CommonParams.QUERY)) {
                        dbgQuery = true;
                    } else if (dbgParams[i].equals(CommonParams.RESULTS)) {
                        dbgResults = true;
                    }
                }
            }
        } else {
            dbgQuery = true;
            dbgResults = true;
        }
        // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
        if (dbg == true) {
            try {

                NamedList<String> it = getMltTermsForDebug(mltResult);

                NamedList<Object> dbgInfo = new NamedList<Object>();
                NamedList<Object> stdDbg = SolrPluginUtils.doStandardDebug(req, q, mlt.getRealMLTQuery(),
                        mltDocs.docList, dbgQuery, dbgResults);
                if (null != dbgInfo) {
                    rsp.add("debug", dbgInfo);
                    dbgInfo.add("mltTerms", it);
                    dbgInfo.addAll(stdDbg);

                    if (null != mltFqFilters) {
                        dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ));
                        List<String> fqs = new ArrayList<String>(mltFqFilters.size());
                        for (Query fq : mltFqFilters) {
                            fqs.add(QueryParsing.toString(fq, req.getSchema()));
                        }
                        dbgInfo.add("mlt_filter_queries", fqs);
                    }
                }
            } catch (Exception e) {
                SolrException.log(SolrCore.log, "Exception during debug", e);
                rsp.add("exception_during_debug", SolrException.toStr(e));
            }
        }
    }

    private NamedList<String> getMltTermsForDebug(MLTResult mltResult) {
        List<MLTTerm> mltTerms = mltResult.mltTerms;
        Collections.sort(mltTerms);
        NamedList<String> it = new NamedList<String>();
        int longestWd = 0;
        int longestFieldName = 0;
        for (MLTTerm mltTerm : mltTerms) {
            longestWd = Math.max(mltTerm.getWord().length(), longestWd);
            longestFieldName = Math.max(mltTerm.getFieldName().length(), longestFieldName);
        }
        for (MLTTerm mltTerm : mltTerms) {
            String paddedfieldName = Strings.padEnd(mltTerm.getFieldName(), longestFieldName, ' ');
            String paddedWd = Strings.padEnd(mltTerm.getWord(), longestWd, ' ');
            it.add(paddedfieldName, paddedWd + " - " + mltTerm.valuesToString());
        }
        return it;
    }

    private List<Query> getFilters(SolrQueryRequest req, String param) throws SyntaxError {
        String[] fqs = req.getParams().getParams(param);
        if (fqs != null && fqs.length != 0) {
            List<Query> filters = new ArrayList<Query>();
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    filters.add(fqp.getQuery());
                }
            }
            return filters;
        }
        return new ArrayList<Query>();
    }

    //////////////////////// SolrInfoMBeans methods //////////////////////

    @Override
    public String getDescription() {
        return "Dice custom MoreLikeThis handler";
    }

    @Override
    public String getSource() {
        return "$URL$";
    }

    @Override
    public String getVersion() {

        if (version != null)
            return version;
        Enumeration<URL> resources;
        StringBuilder stringBuilder = new StringBuilder();
        try {
            resources = getClass().getClassLoader().getResources("META-INF/MANIFEST.MF");
            while (resources.hasMoreElements()) {
                URL url = resources.nextElement();
                /* let's not read other jar's manifests */
                if (!url.toString().contains("DiceSolrEnhancements-1.0.jar"))
                    continue;
                InputStream reader = url.openStream();
                while (reader.available() > 0) {
                    char c = (char) reader.read();
                    stringBuilder.append(c);
                    /* skip lines that don't contain the built-date */
                    if (stringBuilder.toString().contains(System.getProperty("line.separator"))
                            && !stringBuilder.toString().contains("Built-Date"))
                        stringBuilder.setLength(0);
                }
            }
        } catch (Exception e) {
            return "Error reading manifest!";
        }
        version = stringBuilder.toString();
        return stringBuilder.toString();
    };

    @Override
    public URL[] getDocs() {
        try {
            return new URL[] { new URL("http://wiki.apache.org/solr/MoreLikeThis") };
        } catch (MalformedURLException ex) {
            return null;
        }
    }
}