Example usage for org.apache.solr.spelling SpellCheckCorrection getOriginal

List of usage examples for org.apache.solr.spelling SpellCheckCorrection getOriginal

Introduction

In this page you can find the example usage for org.apache.solr.spelling SpellCheckCorrection getOriginal.

Prototype

public Token getOriginal() 

Source Link

Usage

From source file:org.alfresco.solr.component.spellcheck.AlfrescoSpellCheckCollator.java

License:Open Source License

public List<AlfrescoSpellCheckCollation> collate(SpellingResult result, String originalQuery,
        ResponseBuilder ultimateResponse) {
    List<AlfrescoSpellCheckCollation> collations = new ArrayList<>();

    QueryComponent queryComponent = null;
    if (ultimateResponse.components != null) {
        for (SearchComponent sc : ultimateResponse.components) {
            if (sc instanceof QueryComponent) {
                queryComponent = (QueryComponent) sc;
                break;
            }//from  w  w w .j a v  a  2  s  . co m
        }
    }

    boolean verifyCandidateWithQuery = true;
    int maxTries = maxCollationTries;
    int maxNumberToIterate = maxTries;
    if (maxTries < 1) {
        maxTries = 1;
        maxNumberToIterate = maxCollations;
        verifyCandidateWithQuery = false;
    }
    if (queryComponent == null && verifyCandidateWithQuery) {
        LOG.info(
                "Could not find an instance of QueryComponent. Disabling collation verification against the index.");
        maxTries = 1;
        verifyCandidateWithQuery = false;
    }
    docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
    int maxDocId = -1;
    if (verifyCandidateWithQuery && docCollectionLimit > 0) {
        IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
        maxDocId = reader.maxDoc();
    }

    JSONObject alfrescoJSON = (JSONObject) ultimateResponse.req.getContext().get(AbstractQParser.ALFRESCO_JSON);
    String originalAftsQuery = alfrescoJSON != null ? alfrescoJSON.getString("query")
            : ultimateResponse.getQueryString();

    int tryNo = 0;
    int collNo = 0;
    PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate,
            maxCollationEvaluations, suggestionsMayOverlap);
    while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
        PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
        String collationQueryStr = getCollation(originalQuery, possibility.corrections);
        int hits = 0;
        String aftsQuery = null;

        if (verifyCandidateWithQuery) {
            tryNo++;
            SolrQueryRequest req = ultimateResponse.req;
            SolrParams origParams = req.getParams();
            ModifiableSolrParams params = new ModifiableSolrParams(origParams);
            Iterator<String> origParamIterator = origParams.getParameterNamesIterator();
            int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length();
            while (origParamIterator.hasNext()) {
                String origParamName = origParamIterator.next();
                if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE)
                        && origParamName.length() > pl) {
                    String[] val = origParams.getParams(origParamName);
                    if (val.length == 1 && val[0].length() == 0) {
                        params.set(origParamName.substring(pl), (String[]) null);
                    } else {
                        params.set(origParamName.substring(pl), val);
                    }
                }
            }
            // we don't set the 'q' param, as we'll pass the query via JSON.
            // params.set(CommonParams.Q, collationQueryStr);
            params.remove(CommonParams.START);
            params.set(CommonParams.ROWS, "" + docCollectionLimit);
            // we don't want any stored fields
            params.set(CommonParams.FL, "id");
            // we'll sort by doc id to ensure no scoring is done.
            params.set(CommonParams.SORT, "_docid_ asc");
            // If a dismax query, don't add unnecessary clauses for scoring
            params.remove(DisMaxParams.TIE);
            params.remove(DisMaxParams.PF);
            params.remove(DisMaxParams.PF2);
            params.remove(DisMaxParams.PF3);
            params.remove(DisMaxParams.BQ);
            params.remove(DisMaxParams.BF);
            // Collate testing does not support Grouping (see SOLR-2577)
            params.remove(GroupParams.GROUP);

            boolean useQStr = true;

            if (alfrescoJSON != null) {
                try {
                    aftsQuery = originalAftsQuery.replaceAll(Pattern.quote(originalQuery),
                            Matcher.quoteReplacement(collationQueryStr));
                    alfrescoJSON.put("query", aftsQuery);
                    req.getContext().put(AbstractQParser.ALFRESCO_JSON, alfrescoJSON);
                    useQStr = false;
                } catch (JSONException e) {
                    LOG.warn("Exception trying to get/set the query from/to ALFRESCO_JSON.]" + e);
                }
            } else {
                aftsQuery = collationQueryStr;
            }
            req.setParams(params);
            // creating a request here... make sure to close it!
            ResponseBuilder checkResponse = new ResponseBuilder(req, new SolrQueryResponse(),
                    Arrays.<SearchComponent>asList(queryComponent));
            checkResponse.setQparser(ultimateResponse.getQparser());
            checkResponse.setFilters(ultimateResponse.getFilters());
            checkResponse.components = Arrays.<SearchComponent>asList(queryComponent);
            if (useQStr) {
                checkResponse.setQueryString(collationQueryStr);
            }
            try {
                queryComponent.prepare(checkResponse);
                if (docCollectionLimit > 0) {
                    int f = checkResponse.getFieldFlags();
                    checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
                }
                queryComponent.process(checkResponse);
                hits = (Integer) checkResponse.rsp.getToLog().get("hits");
            } catch (EarlyTerminatingCollectorException etce) {
                assert (docCollectionLimit > 0);
                assert 0 < etce.getNumberScanned();
                assert 0 < etce.getNumberCollected();

                if (etce.getNumberScanned() == maxDocId) {
                    hits = etce.getNumberCollected();
                } else {
                    hits = (int) (((float) (maxDocId * etce.getNumberCollected()))
                            / (float) etce.getNumberScanned());
                }
            } catch (Exception e) {
                LOG.warn(
                        "Exception trying to re-query to check if a spell check possibility would return any hits."
                                + e);
            } finally {
                checkResponse.req.close();
            }
        }
        if (hits > 0 || !verifyCandidateWithQuery) {
            collNo++;
            AlfrescoSpellCheckCollation collation = new AlfrescoSpellCheckCollation();
            collation.setCollationQuery(aftsQuery);
            collation.setCollationQueryString(collationQueryStr);
            collation.setHits(hits);
            collation.setInternalRank(
                    suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank);

            NamedList<String> misspellingsAndCorrections = new NamedList<>();
            for (SpellCheckCorrection corr : possibility.corrections) {
                misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
            }
            collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
            collations.add(collation);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Collation: " + aftsQuery
                    + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : ""));
        }
    }
    return collations;
}

From source file:org.alfresco.solr.component.spellcheck.AlfrescoSpellCheckCollator.java

License:Open Source License

@SuppressWarnings("deprecation")
private String getCollation(String origQuery, List<SpellCheckCorrection> corrections) {
    StringBuilder collation = new StringBuilder(origQuery);
    int offset = 0;
    String corr = "";
    for (int i = 0; i < corrections.size(); i++) {
        SpellCheckCorrection correction = corrections.get(i);
        Token tok = correction.getOriginal();
        // we are replacing the query in order, but injected terms might
        // cause illegal offsets due to previous replacements.
        if (tok.getPositionIncrement() == 0)
            continue;
        corr = correction.getCorrection();
        boolean addParenthesis = false;
        Character requiredOrProhibited = null;
        int indexOfSpace = corr.indexOf(' ');
        StringBuilder corrSb = new StringBuilder(corr);
        int bump = 1;

        // If the correction contains whitespace (because it involved
        // breaking a word in 2+ words),
        // then be sure all of the new words have the same
        // optional/required/prohibited status in the query.
        while (indexOfSpace > -1 && indexOfSpace < corr.length() - 1) {
            addParenthesis = true;//from ww  w . j  a  v a  2 s. co m
            char previousChar = tok.startOffset() > 0 ? origQuery.charAt(tok.startOffset() - 1) : ' ';
            if (previousChar == '-' || previousChar == '+') {
                corrSb.insert(indexOfSpace + bump, previousChar);
                if (requiredOrProhibited == null) {
                    requiredOrProhibited = previousChar;
                }
                bump++;
            } else if ((tok.getFlags()
                    & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) {
                corrSb.insert(indexOfSpace + bump, "AND ");
                bump += 4;
            }
            indexOfSpace = correction.getCorrection().indexOf(' ', indexOfSpace + bump);
        }

        int oneForReqOrProhib = 0;
        if (addParenthesis) {
            if (requiredOrProhibited != null) {
                corrSb.insert(0, requiredOrProhibited);
                oneForReqOrProhib++;
            }
            corrSb.insert(0, '(');
            corrSb.append(')');
        }
        corr = corrSb.toString();
        int startIndex = tok.startOffset() + offset - oneForReqOrProhib;
        int endIndex = tok.endOffset() + offset;
        collation.replace(startIndex, endIndex, corr);
        offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset());
    }
    return collation.toString();
}