com.github.healthonnet.search.SynonymExpandingExtendedDismaxQParserPlugin.java Source code

Introduction

Here is the source code for com.github.healthonnet.search.SynonymExpandingExtendedDismaxQParserPlugin.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.github.healthonnet.search;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.queries.function.BoostedQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.ExtendedDismaxQParser;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SyntaxError;

import com.github.healthonnet.search.SynonymExpandingExtendedDismaxQParserPlugin.Const;
import com.github.healthonnet.search.SynonymExpandingExtendedDismaxQParserPlugin.Params;
import com.github.healthonnet.synonyms.AlternateQuery;
import com.github.healthonnet.synonyms.NoBoostSolrParams;
import com.github.healthonnet.synonyms.ReasonForNotExpandingSynonyms;
import com.github.healthonnet.synonyms.TextInQuery;
import com.google.common.collect.Ordering;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;

/**
 * Main implementation of the synonym-expanding ExtendedDismaxQParser plugin for Solr. This parser was originally derived from ExtendedDismaxQParser, which itself was derived from the DismaxQParser from Solr.
 * 
 * @see <a href="https://github.com/healthonnet/hon-lucene-synonyms">https://github.com/healthonnet/hon-lucene-synonyms</a>
 */
public class SynonymExpandingExtendedDismaxQParserPlugin extends QParserPlugin implements ResourceLoaderAware {
    public static final String name = "synonym_edismax";

    /**
     * Convenience class for parameters
     */
    public static class Params {

        /**
         * @see <a href="https://cwiki.apache.org/confluence/display/solr/The+Extended+DisMax+Query+Parser">The Extended DisMax Query Parser</a>
         */
        public static String MULT_BOOST = "boost";

        public static final String SYNONYMS = "synonyms";
        public static final String SYNONYMS_ANALYZER = "synonyms.analyzer";
        public static final String SYNONYMS_DEFAULT_ANALYZER = "synonyms.default";
        public static final String SYNONYMS_ORIGINAL_BOOST = "synonyms.originalBoost";
        public static final String SYNONYMS_SYNONYM_BOOST = "synonyms.synonymBoost";
        public static final String SYNONYMS_DISABLE_PHRASE_QUERIES = "synonyms.disablePhraseQueries";
        public static final String SYNONYMS_CONSTRUCT_PHRASES = "synonyms.constructPhrases";
        public static final String SYNONYMS_IGNORE_QUERY_OPERATORS = "synonyms.ignoreQueryOperators";

        public static final String MAIN_PREANALYZIS = "synonyms.preanalyzis";
        public static final String MAIN_ANALYZER = "synonyms.preanalyzer";
        public static final String MAIN_DEFAULT_ANALYZER = "synonyms.defaultPreanalyzer";

        /**
         * instead of splicing synonyms into the original query string, ie dog bite canine familiaris bite dog chomp canine familiaris chomp do this: dog bite "canine familiaris" chomp with phrases off: dog bite canine familiaris chomp
         */
        public static final String SYNONYMS_BAG = "synonyms.bag";

        /**
         * if true, ignore mm param for the synonym query and use it only for the main query
         * 
         * @see org.apache.solr.common.params.DisMaxParams#MM
         */
        public static final String SYNONYMS_IGNORE_MM = "synonyms.ignoreMM";
    }

    /**
     * Convenience class for calling constants.
     * 
     * @author nolan
     */
    public static class Const {
        /**
         * A field we can't ever find in any schema, so we can safely tell DisjunctionMaxQueryParser to use it as our defaultField, and map aliases from it to any field in our schema.
         */
        static final String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";

        static final Pattern COMPLEX_QUERY_OPERATORS_PATTERN = Pattern
                .compile("(?:\\*|\\s-\\b|\\b(?:OR|AND|\\+)\\b)");
    }

    private NamedList<?> args;
    private Map<String, Analyzer> synonymAnalyzers;
    private Map<String, Analyzer> mainAnalyzers;
    private Version luceneMatchVersion = null;
    private SolrResourceLoader loader;

    @Override
    @SuppressWarnings("rawtypes")
    // TODO it would be nice if the user didn't have to encode tokenizers/filters
    // as a NamedList. But for now this is the hack I'm using
    public void init(NamedList args) {
        this.args = args;
    }

    @Override
    public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
        if (luceneMatchVersion == null) {
            this.luceneMatchVersion = req.getCore().getSolrConfig().luceneMatchVersion;
            synonymAnalyzers = new HashMap<>();
            parseConfig(synonymAnalyzers, "synonymAnalyzers");
            mainAnalyzers = new HashMap<>();
            parseConfig(mainAnalyzers, "queryAnalyzers");

        }
        return new SynonymExpandingExtendedDismaxQParser(qstr, localParams, params, req, synonymAnalyzers,
                mainAnalyzers);
    }

    private Map<String, String> convertNamedListToMap(NamedList<?> namedList) {
        Map<String, String> result = new HashMap<>();

        for (Entry<String, ?> entry : namedList) {
            if (entry.getValue() instanceof String) {
                result.put(entry.getKey(), (String) entry.getValue());
            }
        }

        return result;
    }

    @Override
    public void inform(ResourceLoader loader) throws IOException {
        // TODO: Can we assume that loader always is a sub type of SolrResourceLoader?
        this.loader = (SolrResourceLoader) loader;
    }

    /*
     * Expected call pattern: init(), inform(loader), createParser(), so we should now have config, loader and luceneMatchVersion needed for creating analyzer components
     */
    private void parseConfig(Map<String, Analyzer> analyzers, String argName) {
        try {

            Object xmlAnalyzers = args.get(argName);

            if (xmlAnalyzers != null && xmlAnalyzers instanceof NamedList) {
                NamedList<?> AnalyzersList = (NamedList<?>) xmlAnalyzers;
                for (Entry<String, ?> entry : AnalyzersList) {
                    String analyzerName = entry.getKey();
                    if (!(entry.getValue() instanceof NamedList)) {
                        continue;
                    }
                    NamedList<?> analyzerAsNamedList = (NamedList<?>) entry.getValue();

                    TokenizerFactory tokenizerFactory = null;
                    TokenFilterFactory filterFactory;
                    List<TokenFilterFactory> filterFactories = new LinkedList<>();

                    for (Entry<String, ?> analyzerEntry : analyzerAsNamedList) {
                        String key = analyzerEntry.getKey();
                        if (!(entry.getValue() instanceof NamedList)) {
                            continue;
                        }
                        Map<String, String> params = convertNamedListToMap((NamedList<?>) analyzerEntry.getValue());

                        String className = params.get("class");
                        if (className == null) {
                            continue;
                        }

                        params.put("luceneMatchVersion", luceneMatchVersion.toString());

                        if (key.equals("tokenizer")) {
                            try {
                                tokenizerFactory = TokenizerFactory.forName(className, params);
                            } catch (IllegalArgumentException iae) {
                                if (!className.contains(".")) {
                                    iae.printStackTrace();
                                }
                                // Now try by classname instead of SPI keyword
                                tokenizerFactory = loader.newInstance(className, TokenizerFactory.class,
                                        new String[] {}, new Class[] { Map.class }, new Object[] { params });
                            }
                            if (tokenizerFactory instanceof ResourceLoaderAware) {
                                ((ResourceLoaderAware) tokenizerFactory).inform(loader);
                            }
                        } else if (key.equals("filter")) {
                            try {
                                filterFactory = TokenFilterFactory.forName(className, params);
                            } catch (IllegalArgumentException iae) {
                                if (!className.contains(".")) {
                                    iae.printStackTrace();
                                }
                                // Now try by classname instead of SPI keyword
                                filterFactory = loader.newInstance(className, TokenFilterFactory.class,
                                        new String[] {}, new Class[] { Map.class }, new Object[] { params });
                            }
                            if (filterFactory instanceof ResourceLoaderAware) {
                                ((ResourceLoaderAware) filterFactory).inform(loader);
                            }
                            filterFactories.add(filterFactory);
                        }
                    }
                    if (tokenizerFactory == null) {
                        throw new SolrException(ErrorCode.SERVER_ERROR,
                                "tokenizer must not be null for analyzer: " + analyzerName);
                    } else if (filterFactories.isEmpty()) {
                        throw new SolrException(ErrorCode.SERVER_ERROR,
                                "filter factories must be defined for analyzer: " + analyzerName);
                    }

                    TokenizerChain analyzer = new TokenizerChain(tokenizerFactory,
                            filterFactories.toArray(new TokenFilterFactory[filterFactories.size()]));

                    analyzers.put(analyzerName, analyzer);
                }
            }
        } catch (IOException e) {
            throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to create parser. Check your config.", e);
        }
    }
}

class SynonymExpandingExtendedDismaxQParser extends QParser {

    // delegate all our parsing to these two parsers - one for the "synonym" query and the other for the main query
    private ExtendedDismaxQParser synonymQueryParser;
    private ExtendedDismaxQParser mainQueryParser;

    private Map<String, Analyzer> synonymAnalyzers;
    private Map<String, Analyzer> mainAnalyzers;
    private Analyzer mainAnalyzer = null;
    private Query queryToHighlight;

    /**
     * variables used purely for debugging
     */
    private List<String> expandedSynonyms;
    private ReasonForNotExpandingSynonyms reasonForNotExpandingSynonyms;
    private String parsedQuery = null;
    private String originalQuery = null;

    public SynonymExpandingExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params,
            SolrQueryRequest req, Map<String, Analyzer> synonymAnalyzers, Map<String, Analyzer> mainAnalyzers) {
        super(qstr, localParams, params, req);
        mainQueryParser = new ExtendedDismaxQParser(qstr, localParams, params, req);
        originalQuery = qstr;
        // ensure the synonyms aren't artificially boosted
        synonymQueryParser = new ExtendedDismaxQParser(qstr, NoBoostSolrParams.wrap(localParams),
                NoBoostSolrParams.wrap(params), req);
        this.synonymAnalyzers = synonymAnalyzers;
        this.mainAnalyzers = mainAnalyzers;
    }

    @Override
    public String[] getDefaultHighlightFields() {
        return mainQueryParser.getDefaultHighlightFields();
    }

    @Override
    public Query getHighlightQuery() throws SyntaxError {
        return queryToHighlight != null ? queryToHighlight : mainQueryParser.getHighlightQuery();
    }

    @Override
    public void addDebugInfo(NamedList<Object> debugInfo) {
        if (queryToHighlight != null) {
            debugInfo.add("queryToHighlight", queryToHighlight);
        }
        if (expandedSynonyms != null) {
            debugInfo.add("expandedSynonyms", Ordering.natural().nullsFirst().sortedCopy(expandedSynonyms));
        }
        if (reasonForNotExpandingSynonyms != null) {
            debugInfo.add("reasonForNotExpandingSynonyms", reasonForNotExpandingSynonyms.toNamedList());
        }
        debugInfo.add("originalQuery", originalQuery);
        if (parsedQuery != null)
            debugInfo.add("originalPreparsedQuery", parsedQuery);
        debugInfo.add("mainQueryParser", createDebugInfo(mainQueryParser));
        debugInfo.add("synonymQueryParser", createDebugInfo(synonymQueryParser));
    }

    @Override
    public Query parse() throws SyntaxError {

        SolrParams localParams = getLocalParams();
        SolrParams params = getParams();
        SolrParams solrParams = localParams == null ? params : SolrParams.wrapDefaults(localParams, params);

        String defmainAnalyser = solrParams.get(Params.MAIN_DEFAULT_ANALYZER, null);
        String defSynonymsAnalyser = solrParams.get(Params.SYNONYMS_DEFAULT_ANALYZER, null);

        Boolean preanalyzis = solrParams.getBool(Params.MAIN_PREANALYZIS, false);

        // check to make sure the analyzer exists
        if (preanalyzis) {
            String preAnalyzerName = solrParams.get(Params.MAIN_ANALYZER, null);
            if (preAnalyzerName == null) { // no query analyzer specified
                if (defmainAnalyser != null && defmainAnalyser.length() > 0) {
                    preAnalyzerName = defmainAnalyser;
                } else {
                    if (mainAnalyzers.size() >= 1) {
                        // only one analyzer defined; just use that one
                        preAnalyzerName = mainAnalyzers.keySet().iterator().next();
                    }
                }
            }

            if (preAnalyzerName != null) {

                mainAnalyzer = mainAnalyzers.get(preAnalyzerName);

                analyzeMainQuery(mainAnalyzer);
            } else
                mainAnalyzer = null;
        } else {
            mainAnalyzer = null;
        }

        Query query = mainQueryParser.parse();

        // disable/enable synonym handling altogether
        if (!solrParams.getBool(Params.SYNONYMS, false)) {
            reasonForNotExpandingSynonyms = ReasonForNotExpandingSynonyms.PluginDisabled;
            return query;
        }

        // check to make sure the analyzer exists
        String analyzerName = solrParams.get(Params.SYNONYMS_ANALYZER, null);
        if (analyzerName == null) { // no synonym analyzer specified
            if (defSynonymsAnalyser != null && defSynonymsAnalyser.length() > 0) {
                analyzerName = defSynonymsAnalyser;
            } else {
                if (synonymAnalyzers.size() >= 1) {
                    // only one analyzer defined; just use that one
                    analyzerName = synonymAnalyzers.keySet().iterator().next();
                } else {
                    reasonForNotExpandingSynonyms = ReasonForNotExpandingSynonyms.NoAnalyzerSpecified;
                    return query;
                }
            }
        }

        Analyzer synonymAnalyzer = synonymAnalyzers.get(analyzerName);

        if (synonymAnalyzer == null) { // couldn't find analyzer
            reasonForNotExpandingSynonyms = ReasonForNotExpandingSynonyms.AnalyzerNotFound;
            return query;
        }

        if (solrParams.getBool(Params.SYNONYMS_DISABLE_PHRASE_QUERIES, false)
                && getQueryStringFromParser().indexOf('"') != -1) {
            // disable if a phrase query is detected, i.e. there's a '"'
            reasonForNotExpandingSynonyms = ReasonForNotExpandingSynonyms.IgnoringPhrases;
            return query;
        }

        try {
            query = attemptToApplySynonymsToQuery(query, solrParams, synonymAnalyzer);
        } catch (IOException e) {
            // TODO: better error handling - for now just bail out
            reasonForNotExpandingSynonyms = ReasonForNotExpandingSynonyms.UnhandledException;
            e.printStackTrace(System.err);
        }

        return query;
    }

    private void analyzeMainQuery(Analyzer analyzer) {

        String newQuery = analyzeQuery(getString(), analyzer);

        parsedQuery = newQuery;
        this.mainQueryParser.setString(newQuery);
        this.synonymQueryParser.setString(newQuery);
        this.setString(newQuery);

    }

    private String analyzeQuery(String query, Analyzer analyzer) {

        if (analyzer != null && query != null && query.length() > 0) {
            TokenStream tokenStream = analyzer.tokenStream(Const.IMPOSSIBLE_FIELD_NAME, new StringReader(query));

            StringBuilder newQueryB = new StringBuilder();
            try {
                tokenStream.reset();
                while (tokenStream.incrementToken()) {
                    CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class);
                    // OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
                    // TypeAttribute typeAttribute = tokenStream.getAttribute(TypeAttribute.class);

                    newQueryB.append(term.toString());
                    newQueryB.append(' ');

                }
                tokenStream.end();
                return newQueryB.toString().trim();

            } catch (IOException e) {
                throw new RuntimeException("uncaught exception in synonym processing", e);
            } finally {
                try {
                    tokenStream.close();
                } catch (IOException e) {
                    throw new RuntimeException("uncaught exception in synonym processing", e);
                }
            }
        }

        return query;

    }

    private Query attemptToApplySynonymsToQuery(Query query, SolrParams solrParams, Analyzer synonymAnalyzer)
            throws IOException {

        List<Query> synonymQueries = generateSynonymQueries(synonymAnalyzer, solrParams);

        boolean ignoreQueryOperators = solrParams.getBool(Params.SYNONYMS_IGNORE_QUERY_OPERATORS, false);
        boolean hasComplexQueryOperators = ignoreQueryOperators ? false
                : Const.COMPLEX_QUERY_OPERATORS_PATTERN.matcher(getQueryStringFromParser()).find();

        if (hasComplexQueryOperators) { // TODO: support complex operators
            reasonForNotExpandingSynonyms = ReasonForNotExpandingSynonyms.HasComplexQueryOperators;
            return query;
        } else if (synonymQueries.isEmpty()) { // didn't find more than 0 synonyms, i.e. it's just the original phrase
            reasonForNotExpandingSynonyms = ReasonForNotExpandingSynonyms.DidntFindAnySynonyms;
            return query;
        }

        float originalBoost = solrParams.getFloat(Params.SYNONYMS_ORIGINAL_BOOST, 1.0F);
        float synonymBoost = solrParams.getFloat(Params.SYNONYMS_SYNONYM_BOOST, 1.0F);

        query = applySynonymQueries(query, synonymQueries, originalBoost, synonymBoost);
        return query;
    }

    /**
     * Find the main query and its surrounding clause, make it SHOULD instead of MUST and append a bunch of other SHOULDs to it, then wrap it in a MUST E.g. +(text:dog) becomes +((text:dog)^1.5 ((text:hound) (text:pooch))^1.2)
     * 
     * @param query
     * @param synonymQueries
     * @param originalBoost
     * @param synonymBoost
     */
    private Query applySynonymQueries(Query query, List<Query> synonymQueries, float originalBoost,
            float synonymBoost) {
        if (query instanceof BoostedQuery) {
            return applySynonymQueries(((BoostedQuery) query).getQuery(), synonymQueries, originalBoost,
                    synonymBoost);
        } else if (query instanceof BooleanQuery) {
            BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
            for (BooleanClause booleanClause : ((BooleanQuery) query).clauses()) {
                if (Occur.MUST == booleanClause.getOccur()) {
                    BooleanQuery.Builder combinedQueryBuilder = new BooleanQuery.Builder();
                    combinedQueryBuilder.add(new BoostQuery(booleanClause.getQuery(), originalBoost), Occur.SHOULD);
                    // standard 'must occur' clause - i.e. the main user query

                    for (Query synonymQuery : synonymQueries) {
                        if (synonymQuery != null) {
                            BooleanQuery.Builder booleanSynonymQueryBuilder = new BooleanQuery.Builder();
                            booleanSynonymQueryBuilder.add(new BoostQuery(synonymQuery, synonymBoost),
                                    Occur.SHOULD);
                            combinedQueryBuilder.add(booleanSynonymQueryBuilder.build(), Occur.SHOULD);
                        }
                    }
                    booleanQueryBuilder.add(combinedQueryBuilder.build(), Occur.MUST);
                } else {
                    booleanQueryBuilder.add(booleanClause);
                }
            }
            query = booleanQueryBuilder.build();
            queryToHighlight = query;
        }
        return query;
    }

    /**
     * Given the synonymAnalyzer, returns a list of all alternate queries expanded from the original user query.
     * 
     * @param synonymAnalyzer
     * @param solrParams
     * @return
     */
    private List<Query> generateSynonymQueries(Analyzer synonymAnalyzer, SolrParams solrParams) {

        String origQuery = getQueryStringFromParser();
        int queryLen = origQuery.length();

        // TODO: make the token stream reusable?
        TokenStream tokenStream = synonymAnalyzer.tokenStream(Const.IMPOSSIBLE_FIELD_NAME,
                new StringReader(origQuery));

        SortedSetMultimap<Integer, TextInQuery> startPosToTextsInQuery = TreeMultimap.create();

        boolean constructPhraseQueries = solrParams.getBool(Params.SYNONYMS_CONSTRUCT_PHRASES, false);

        boolean bag = solrParams.getBool(Params.SYNONYMS_BAG, false);
        List<String> synonymBag = new ArrayList<>();

        try {
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                CharTermAttribute term = tokenStream.getAttribute(CharTermAttribute.class);
                OffsetAttribute offsetAttribute = tokenStream.getAttribute(OffsetAttribute.class);
                TypeAttribute typeAttribute = tokenStream.getAttribute(TypeAttribute.class);

                if (!typeAttribute.type().equals("shingle")) {
                    // ignore shingles; we only care about synonyms and the original text
                    // TODO: filter other types as well

                    String termToAdd = term.toString();

                    if (typeAttribute.type().equals("SYNONYM")) {
                        synonymBag.add(termToAdd);
                    }

                    // Don't quote sibgle term term synonyms
                    if (constructPhraseQueries && typeAttribute.type().equals("SYNONYM")
                            && termToAdd.contains(" ")) {
                        // Don't Quote when original is already surrounded by quotes
                        if (offsetAttribute.startOffset() == 0 || offsetAttribute.endOffset() == queryLen
                                || origQuery.charAt(offsetAttribute.startOffset() - 1) != '"'
                                || origQuery.charAt(offsetAttribute.endOffset()) != '"') {
                            // make a phrase out of the synonym
                            termToAdd = new StringBuilder(termToAdd).insert(0, '"').append('"').toString();
                        }
                    }
                    if (!bag) {
                        // create a graph of all possible synonym combinations,
                        // e.g. dog bite, hound bite, dog nibble, hound nibble, etc.
                        TextInQuery textInQuery = new TextInQuery(termToAdd, offsetAttribute.startOffset(),
                                offsetAttribute.endOffset());

                        startPosToTextsInQuery.put(offsetAttribute.startOffset(), textInQuery);
                    }
                }
            }
            tokenStream.end();
        } catch (IOException e) {
            throw new RuntimeException("uncaught exception in synonym processing", e);
        } finally {
            try {
                tokenStream.close();
            } catch (IOException e) {
                throw new RuntimeException("uncaught exception in synonym processing", e);
            }
        }

        List<String> alternateQueries = synonymBag;

        if (!bag) {
            // use a graph rather than a bag
            List<List<TextInQuery>> sortedTextsInQuery = new ArrayList<>(startPosToTextsInQuery.values().size());
            sortedTextsInQuery.addAll(startPosToTextsInQuery.asMap().values().stream().map(ArrayList::new)
                    .collect(Collectors.toList()));

            // have to use the start positions and end positions to figure out all possible combinations
            alternateQueries = buildUpAlternateQueries(solrParams, sortedTextsInQuery);
        }

        // save for debugging purposes
        expandedSynonyms = alternateQueries;

        return createSynonymQueries(solrParams, alternateQueries);
    }

    /**
     * From a list of texts in the original query that were deemed to be interested (i.e. synonyms or the original text itself), build up all possible alternate queries as strings. For instance, if the query is "dog bite" and the synonyms are dog -> [dog,hound,pooch] and bite -> [bite,nibble], then the result will be: dog bite hound bite pooch bite dog nibble hound nibble pooch nibble
     * 
     * @param solrParams
     * @param textsInQueryLists
     * @return
     */
    private List<String> buildUpAlternateQueries(SolrParams solrParams, List<List<TextInQuery>> textsInQueryLists) {

        String originalUserQuery = getQueryStringFromParser();

        if (textsInQueryLists.isEmpty()) {
            return Collections.emptyList();
        }

        // initialize results
        List<AlternateQuery> alternateQueries = new ArrayList<>();
        for (TextInQuery textInQuery : textsInQueryLists.get(0)) {
            // add the text before the first user query token, e.g. a space or a "
            StringBuilder stringBuilder = new StringBuilder(
                    originalUserQuery.subSequence(0, textInQuery.getStartPosition())).append(textInQuery.getText());
            alternateQueries.add(new AlternateQuery(stringBuilder, textInQuery.getEndPosition()));
        }

        for (int i = 1; i < textsInQueryLists.size(); i++) {
            List<TextInQuery> textsInQuery = textsInQueryLists.get(i);

            // compute the length in advance, because we'll be adding new ones as we go
            int alternateQueriesLength = alternateQueries.size();

            for (int j = 0; j < alternateQueriesLength; j++) {

                // When we're working with a lattice, assuming there's only one path to take in the next column,
                // we can (and MUST) use all the original objects in the current column.
                // It's only when we have >1 paths in the next column that we need to start taking copies.
                // So if a lot of this logic seems tortured, it's only because I'm trying to minimize object
                // creation.
                AlternateQuery originalAlternateQuery = alternateQueries.get(j);

                boolean usedFirst = false;

                for (int k = 0; k < textsInQuery.size(); k++) {

                    TextInQuery textInQuery = textsInQuery.get(k);
                    if (originalAlternateQuery.getEndPosition() > textInQuery.getStartPosition()) {
                        // cannot be appended, e.g. "canis" token in "canis familiaris"
                        continue;
                    }

                    AlternateQuery currentAlternateQuery;

                    if (!usedFirst) {
                        // re-use the existing object
                        usedFirst = true;
                        currentAlternateQuery = originalAlternateQuery;

                        if (k < textsInQuery.size() - 1) {
                            // make a defensive clone for future usage
                            originalAlternateQuery = (AlternateQuery) currentAlternateQuery.clone();
                        }
                    } else if (k == textsInQuery.size() - 1) {
                        // we're sure we're the last one to use it, so we can just use the original clone
                        currentAlternateQuery = originalAlternateQuery;
                        alternateQueries.add(currentAlternateQuery);
                    } else {
                        // need to clone to a new object
                        currentAlternateQuery = (AlternateQuery) originalAlternateQuery.clone();
                        alternateQueries.add(currentAlternateQuery);
                    }
                    // text in the original query between the two tokens, usually a space, comma, etc.
                    CharSequence betweenTokens = originalUserQuery
                            .subSequence(currentAlternateQuery.getEndPosition(), textInQuery.getStartPosition());
                    currentAlternateQuery.getStringBuilder().append(betweenTokens).append(textInQuery.getText());
                    currentAlternateQuery.setEndPosition(textInQuery.getEndPosition());
                }
            }
        }

        // Make sure result is unique
        HashSet<String> result = new LinkedHashSet<>();

        for (AlternateQuery alternateQuery : alternateQueries) {

            StringBuilder sb = alternateQuery.getStringBuilder();

            // append whatever text followed the last token, e.g. '"'
            sb.append(originalUserQuery.subSequence(alternateQuery.getEndPosition(), originalUserQuery.length()));

            result.add(sb.toString());
        }
        return new ArrayList<>(result);
    }

    /**
     * From a list of alternate queries in text format, parse them using the default ExtendedSolrQueryParser and return the queries.
     * 
     * @param solrParams
     * @param alternateQueryTexts
     * @return
     */
    private List<Query> createSynonymQueries(SolrParams solrParams, List<String> alternateQueryTexts) {

        String nullsafeOriginalString = getQueryStringFromParser();

        List<Query> result = new ArrayList<>();
        for (String alternateQueryText : alternateQueryTexts) {
            if (alternateQueryText.equalsIgnoreCase(nullsafeOriginalString)) {
                // alternate query is the same as what the user entered
                continue;
            }

            synonymQueryParser.setString(analyzeQuery(alternateQueryText, mainAnalyzer));
            try {
                result.add(synonymQueryParser.parse());
            } catch (SyntaxError e) {
                // TODO: better error handling - for now just bail out; ignore this synonym
                e.printStackTrace(System.err);
            }
        }

        return result;
    }

    /**
     * Ensures that we return a valid string, even if null
     * 
     * @return the entered query string fetched from QParser.getString()
     */
    private String getQueryStringFromParser() {
        return (getString() == null) ? "" : getString();
    }

    /**
     * Convenience method to simplify code
     * 
     * @param qparser
     * @return
     */
    private static NamedList<Object> createDebugInfo(QParser qparser) {
        NamedList<Object> result = new NamedList<>();
        qparser.addDebugInfo(result);
        return result;
    }

}