List of usage examples for org.apache.solr.spelling SpellingResult getSuggestions
public Map<Token, LinkedHashMap<String, Integer>> getSuggestions()
From source file:org.alfresco.solr.component.spellcheck.AlfrescoSpellCheckCollator.java
License:Open Source License
public List<AlfrescoSpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse) { List<AlfrescoSpellCheckCollation> collations = new ArrayList<>(); QueryComponent queryComponent = null; if (ultimateResponse.components != null) { for (SearchComponent sc : ultimateResponse.components) { if (sc instanceof QueryComponent) { queryComponent = (QueryComponent) sc; break; }//from w w w . ja va 2 s. c o m } } boolean verifyCandidateWithQuery = true; int maxTries = maxCollationTries; int maxNumberToIterate = maxTries; if (maxTries < 1) { maxTries = 1; maxNumberToIterate = maxCollations; verifyCandidateWithQuery = false; } if (queryComponent == null && verifyCandidateWithQuery) { LOG.info( "Could not find an instance of QueryComponent. Disabling collation verification against the index."); maxTries = 1; verifyCandidateWithQuery = false; } docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0; int maxDocId = -1; if (verifyCandidateWithQuery && docCollectionLimit > 0) { IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader(); maxDocId = reader.maxDoc(); } JSONObject alfrescoJSON = (JSONObject) ultimateResponse.req.getContext().get(AbstractQParser.ALFRESCO_JSON); String originalAftsQuery = alfrescoJSON != null ? alfrescoJSON.getString("query") : ultimateResponse.getQueryString(); int tryNo = 0; int collNo = 0; PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap); while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) { PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next(); String collationQueryStr = getCollation(originalQuery, possibility.corrections); int hits = 0; String aftsQuery = null; if (verifyCandidateWithQuery) { tryNo++; SolrQueryRequest req = ultimateResponse.req; SolrParams origParams = req.getParams(); ModifiableSolrParams params = new ModifiableSolrParams(origParams); Iterator<String> origParamIterator = origParams.getParameterNamesIterator(); int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length(); while (origParamIterator.hasNext()) { String origParamName = origParamIterator.next(); if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE) && origParamName.length() > pl) { String[] val = origParams.getParams(origParamName); if (val.length == 1 && val[0].length() == 0) { params.set(origParamName.substring(pl), (String[]) null); } else { params.set(origParamName.substring(pl), val); } } } // we don't set the 'q' param, as we'll pass the query via JSON. // params.set(CommonParams.Q, collationQueryStr); params.remove(CommonParams.START); params.set(CommonParams.ROWS, "" + docCollectionLimit); // we don't want any stored fields params.set(CommonParams.FL, "id"); // we'll sort by doc id to ensure no scoring is done. params.set(CommonParams.SORT, "_docid_ asc"); // If a dismax query, don't add unnecessary clauses for scoring params.remove(DisMaxParams.TIE); params.remove(DisMaxParams.PF); params.remove(DisMaxParams.PF2); params.remove(DisMaxParams.PF3); params.remove(DisMaxParams.BQ); params.remove(DisMaxParams.BF); // Collate testing does not support Grouping (see SOLR-2577) params.remove(GroupParams.GROUP); boolean useQStr = true; if (alfrescoJSON != null) { try { aftsQuery = originalAftsQuery.replaceAll(Pattern.quote(originalQuery), Matcher.quoteReplacement(collationQueryStr)); alfrescoJSON.put("query", aftsQuery); req.getContext().put(AbstractQParser.ALFRESCO_JSON, alfrescoJSON); useQStr = false; } catch (JSONException e) { LOG.warn("Exception trying to get/set the query from/to ALFRESCO_JSON.]" + e); } } else { aftsQuery = collationQueryStr; } req.setParams(params); // creating a request here... make sure to close it! ResponseBuilder checkResponse = new ResponseBuilder(req, new SolrQueryResponse(), Arrays.<SearchComponent>asList(queryComponent)); checkResponse.setQparser(ultimateResponse.getQparser()); checkResponse.setFilters(ultimateResponse.getFilters()); checkResponse.components = Arrays.<SearchComponent>asList(queryComponent); if (useQStr) { checkResponse.setQueryString(collationQueryStr); } try { queryComponent.prepare(checkResponse); if (docCollectionLimit > 0) { int f = checkResponse.getFieldFlags(); checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY); } queryComponent.process(checkResponse); hits = (Integer) checkResponse.rsp.getToLog().get("hits"); } catch (EarlyTerminatingCollectorException etce) { assert (docCollectionLimit > 0); assert 0 < etce.getNumberScanned(); assert 0 < etce.getNumberCollected(); if (etce.getNumberScanned() == maxDocId) { hits = etce.getNumberCollected(); } else { hits = (int) (((float) (maxDocId * etce.getNumberCollected())) / (float) etce.getNumberScanned()); } } catch (Exception e) { LOG.warn( "Exception trying to re-query to check if a spell check possibility would return any hits." + e); } finally { checkResponse.req.close(); } } if (hits > 0 || !verifyCandidateWithQuery) { collNo++; AlfrescoSpellCheckCollation collation = new AlfrescoSpellCheckCollation(); collation.setCollationQuery(aftsQuery); collation.setCollationQueryString(collationQueryStr); collation.setHits(hits); collation.setInternalRank( suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank); NamedList<String> misspellingsAndCorrections = new NamedList<>(); for (SpellCheckCorrection corr : possibility.corrections) { misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection()); } collation.setMisspellingsAndCorrections(misspellingsAndCorrections); collations.add(collation); } if (LOG.isDebugEnabled()) { LOG.debug("Collation: " + aftsQuery + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : "")); } } return collations; }
From source file:org.dice.solrenhancements.spellchecker.DiceSpellCheckComponent.java
License:Apache License
protected NamedList toNamedList(boolean shardRequest, SpellingResult spellingResult, String origQuery, boolean extendedResults, boolean collate, boolean correctlySpelled) { NamedList result = new NamedList(); Map<Token, LinkedHashMap<String, Integer>> suggestions = spellingResult.getSuggestions(); boolean hasFreqInfo = spellingResult.hasTokenFrequencyInfo(); boolean hasSuggestions = false; boolean hasZeroFrequencyToken = false; for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) { Token inputToken = entry.getKey(); String tokenString = new String(inputToken.buffer(), 0, inputToken.length()); Map<String, Integer> theSuggestions = new LinkedHashMap<String, Integer>(entry.getValue()); Iterator<String> sugIter = theSuggestions.keySet().iterator(); while (sugIter.hasNext()) { String sug = sugIter.next(); if (sug.equals(tokenString)) { sugIter.remove();/*from w ww . jav a2 s . c o m*/ } } if (theSuggestions.size() > 0) { hasSuggestions = true; } if (theSuggestions != null && (theSuggestions.size() > 0 || shardRequest)) { SimpleOrderedMap suggestionList = new SimpleOrderedMap(); suggestionList.add("numFound", theSuggestions.size()); suggestionList.add("startOffset", inputToken.startOffset()); suggestionList.add("endOffset", inputToken.endOffset()); // Logical structure of normal (non-extended) results: // "suggestion":["alt1","alt2"] // // Logical structure of the extended results: // "suggestion":[ // {"word":"alt1","freq":7}, // {"word":"alt2","freq":4} // ] if (extendedResults && hasFreqInfo) { suggestionList.add("origFreq", spellingResult.getTokenFrequency(inputToken)); ArrayList<SimpleOrderedMap> sugs = new ArrayList<SimpleOrderedMap>(); suggestionList.add("suggestion", sugs); for (Map.Entry<String, Integer> suggEntry : theSuggestions.entrySet()) { SimpleOrderedMap sugEntry = new SimpleOrderedMap(); sugEntry.add("word", suggEntry.getKey()); sugEntry.add("freq", suggEntry.getValue()); sugs.add(sugEntry); } } else { suggestionList.add("suggestion", theSuggestions.keySet()); } if (hasFreqInfo) { int tokenFrequency = spellingResult.getTokenFrequency(inputToken); if (tokenFrequency == 0) { hasZeroFrequencyToken = true; } } result.add(tokenString, suggestionList); } } if (extendedResults) { result.add("correctlySpelled", correctlySpelled); } return result; }