Example usage for org.apache.lucene.search.spans SpanMultiTermQueryWrapper getRewriteMethod

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans SpanMultiTermQueryWrapper getRewriteMethod.

Prototype

public final SpanRewriteMethod getRewriteMethod()

Source Link

Document

Expert: returns the rewriteMethod

Usage

From source file:de.mirkosertic.desktopsearch.SearchPhraseSuggester.java

License:Open Source License

public List<Suggestion> suggestSearchPhrase(String aFieldName, String aPhrase) throws IOException {

    LOGGER.info("Trying to find suggestions for phrase " + aPhrase);

    long theStartTime = System.currentTimeMillis();
    try {/*from  w w  w  .j a va 2  s  .  com*/
        List<String> theTokens = toTokens(aFieldName, aPhrase);

        List<SpanQuery> theSpanQueries = theTokens.stream().map(s -> {
            if (QueryUtils.isWildCard(s)) {
                WildcardQuery theWildcardQuery = new WildcardQuery(new Term(aFieldName, s));
                SpanMultiTermQueryWrapper theWrapper = new SpanMultiTermQueryWrapper(theWildcardQuery);
                try {
                    return theWrapper.getRewriteMethod().rewrite(indexReader, theWildcardQuery);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return new SpanTermQuery(new Term(aFieldName, s));
        }).collect(Collectors.toList());

        SpanQuery theSpanQuery = new SpanNearQuery(theSpanQueries.toArray(new SpanQuery[theSpanQueries.size()]),
                configuration.getSuggestionSlop(), configuration.isSuggestionInOrder());

        LOGGER.info("created span query " + theSpanQuery);

        LeafReader theAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);

        Map<Term, TermContext> theTermContexts = new HashMap<>();
        Map<String, Long> theSpanFrequencies = new HashMap<>();

        // These are all the matching spans over all documents
        Spans theMatchingSpans = theSpanQuery.getSpans(theAtomicReader.getContext(),
                new Bits.MatchAllBits(indexReader.numDocs()), theTermContexts);

        while (theMatchingSpans.next()) {

            // This maps the position of a term and the term string itself
            // the positions must be in order, so we have to use a treemap.
            Map<Integer, String> theEntries = new TreeMap<>();

            Terms theAllTermsFromDocument = indexReader.getTermVector(theMatchingSpans.doc(),
                    IndexFields.CONTENT_NOT_STEMMED);
            int theSpanStart = theMatchingSpans.start() - configuration.getSuggestionWindowBefore();
            int theSpanEnd = theMatchingSpans.end() + configuration.getSuggestionWindowAfter();
            TermsEnum theTermsEnum = theAllTermsFromDocument.iterator(null);
            BytesRef theTerm;
            while ((theTerm = theTermsEnum.next()) != null) {
                DocsAndPositionsEnum thePositionEnum = theTermsEnum.docsAndPositions(null, null);
                if (thePositionEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                    int i = 0;
                    int position;
                    while (i < thePositionEnum.freq() && (position = thePositionEnum.nextPosition()) != -1) {
                        if (position >= theSpanStart && position <= theSpanEnd) {
                            theEntries.put(position, theTerm.utf8ToString());
                        }
                        i++;
                    }
                }
            }

            StringBuilder theResultString = new StringBuilder();
            theEntries.entrySet().forEach(e -> {
                if (theResultString.length() > 0) {
                    theResultString.append(" ");
                }
                theResultString.append(e.getValue());
            });

            String theTotalSpan = theResultString.toString().trim();

            Long theFrequency = theSpanFrequencies.get(theTotalSpan);
            if (theFrequency == null) {
                theSpanFrequencies.put(theTotalSpan, 1L);
            } else {
                theSpanFrequencies.put(theTotalSpan, theFrequency + 1);
            }
        }

        return theSpanFrequencies.entrySet().stream().filter(t -> t.getValue() > 1)
                .sorted((o1, o2) -> o2.getValue().compareTo(o1.getValue()))
                .limit(configuration.getNumberOfSuggestions())
                .map(T -> new Suggestion(highlight(T.getKey(), theTokens), T.getKey()))
                .collect(Collectors.toList());
    } finally {
        long theDuration = System.currentTimeMillis() - theStartTime;
        LOGGER.info("Took " + theDuration + "ms");
    }
}