List of usage examples for org.apache.lucene.search.spans SpanMultiTermQueryWrapper getRewriteMethod
public final SpanRewriteMethod getRewriteMethod()
From source file:de.mirkosertic.desktopsearch.SearchPhraseSuggester.java
License:Open Source License
public List<Suggestion> suggestSearchPhrase(String aFieldName, String aPhrase) throws IOException { LOGGER.info("Trying to find suggestions for phrase " + aPhrase); long theStartTime = System.currentTimeMillis(); try {/*from w w w .j a va 2 s . com*/ List<String> theTokens = toTokens(aFieldName, aPhrase); List<SpanQuery> theSpanQueries = theTokens.stream().map(s -> { if (QueryUtils.isWildCard(s)) { WildcardQuery theWildcardQuery = new WildcardQuery(new Term(aFieldName, s)); SpanMultiTermQueryWrapper theWrapper = new SpanMultiTermQueryWrapper(theWildcardQuery); try { return theWrapper.getRewriteMethod().rewrite(indexReader, theWildcardQuery); } catch (IOException e) { throw new RuntimeException(e); } } return new SpanTermQuery(new Term(aFieldName, s)); }).collect(Collectors.toList()); SpanQuery theSpanQuery = new SpanNearQuery(theSpanQueries.toArray(new SpanQuery[theSpanQueries.size()]), configuration.getSuggestionSlop(), configuration.isSuggestionInOrder()); LOGGER.info("created span query " + theSpanQuery); LeafReader theAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader); Map<Term, TermContext> theTermContexts = new HashMap<>(); Map<String, Long> theSpanFrequencies = new HashMap<>(); // These are all the matching spans over all documents Spans theMatchingSpans = theSpanQuery.getSpans(theAtomicReader.getContext(), new Bits.MatchAllBits(indexReader.numDocs()), theTermContexts); while (theMatchingSpans.next()) { // This maps the position of a term and the term string itself // the positions must be in order, so we have to use a treemap. Map<Integer, String> theEntries = new TreeMap<>(); Terms theAllTermsFromDocument = indexReader.getTermVector(theMatchingSpans.doc(), IndexFields.CONTENT_NOT_STEMMED); int theSpanStart = theMatchingSpans.start() - configuration.getSuggestionWindowBefore(); int theSpanEnd = theMatchingSpans.end() + configuration.getSuggestionWindowAfter(); TermsEnum theTermsEnum = theAllTermsFromDocument.iterator(null); BytesRef theTerm; while ((theTerm = theTermsEnum.next()) != null) { DocsAndPositionsEnum thePositionEnum = theTermsEnum.docsAndPositions(null, null); if (thePositionEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int i = 0; int position; while (i < thePositionEnum.freq() && (position = thePositionEnum.nextPosition()) != -1) { if (position >= theSpanStart && position <= theSpanEnd) { theEntries.put(position, theTerm.utf8ToString()); } i++; } } } StringBuilder theResultString = new StringBuilder(); theEntries.entrySet().forEach(e -> { if (theResultString.length() > 0) { theResultString.append(" "); } theResultString.append(e.getValue()); }); String theTotalSpan = theResultString.toString().trim(); Long theFrequency = theSpanFrequencies.get(theTotalSpan); if (theFrequency == null) { theSpanFrequencies.put(theTotalSpan, 1L); } else { theSpanFrequencies.put(theTotalSpan, theFrequency + 1); } } return theSpanFrequencies.entrySet().stream().filter(t -> t.getValue() > 1) .sorted((o1, o2) -> o2.getValue().compareTo(o1.getValue())) .limit(configuration.getNumberOfSuggestions()) .map(T -> new Suggestion(highlight(T.getKey(), theTokens), T.getKey())) .collect(Collectors.toList()); } finally { long theDuration = System.currentTimeMillis() - theStartTime; LOGGER.info("Took " + theDuration + "ms"); } }