Example usage for org.apache.lucene.search.spans SpanMultiTermQueryWrapper getRewriteMethod

List of usage examples for org.apache.lucene.search.spans SpanMultiTermQueryWrapper getRewriteMethod

Introduction

In this page you can find the example usage for org.apache.lucene.search.spans SpanMultiTermQueryWrapper getRewriteMethod.

Prototype

public final SpanRewriteMethod getRewriteMethod() 

Source Link

Document

Expert: returns the rewriteMethod

Usage

From source file:de.mirkosertic.desktopsearch.SearchPhraseSuggester.java

License:Open Source License

public List<Suggestion> suggestSearchPhrase(String aFieldName, String aPhrase) throws IOException {

    LOGGER.info("Trying to find suggestions for phrase " + aPhrase);

    long theStartTime = System.currentTimeMillis();
    try {/*from  w w  w  .j a va 2  s  .  com*/
        List<String> theTokens = toTokens(aFieldName, aPhrase);

        List<SpanQuery> theSpanQueries = theTokens.stream().map(s -> {
            if (QueryUtils.isWildCard(s)) {
                WildcardQuery theWildcardQuery = new WildcardQuery(new Term(aFieldName, s));
                SpanMultiTermQueryWrapper theWrapper = new SpanMultiTermQueryWrapper(theWildcardQuery);
                try {
                    return theWrapper.getRewriteMethod().rewrite(indexReader, theWildcardQuery);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return new SpanTermQuery(new Term(aFieldName, s));
        }).collect(Collectors.toList());

        SpanQuery theSpanQuery = new SpanNearQuery(theSpanQueries.toArray(new SpanQuery[theSpanQueries.size()]),
                configuration.getSuggestionSlop(), configuration.isSuggestionInOrder());

        LOGGER.info("created span query " + theSpanQuery);

        LeafReader theAtomicReader = SlowCompositeReaderWrapper.wrap(indexReader);

        Map<Term, TermContext> theTermContexts = new HashMap<>();
        Map<String, Long> theSpanFrequencies = new HashMap<>();

        // These are all the matching spans over all documents
        Spans theMatchingSpans = theSpanQuery.getSpans(theAtomicReader.getContext(),
                new Bits.MatchAllBits(indexReader.numDocs()), theTermContexts);

        while (theMatchingSpans.next()) {

            // This maps the position of a term and the term string itself
            // the positions must be in order, so we have to use a treemap.
            Map<Integer, String> theEntries = new TreeMap<>();

            Terms theAllTermsFromDocument = indexReader.getTermVector(theMatchingSpans.doc(),
                    IndexFields.CONTENT_NOT_STEMMED);
            int theSpanStart = theMatchingSpans.start() - configuration.getSuggestionWindowBefore();
            int theSpanEnd = theMatchingSpans.end() + configuration.getSuggestionWindowAfter();
            TermsEnum theTermsEnum = theAllTermsFromDocument.iterator(null);
            BytesRef theTerm;
            while ((theTerm = theTermsEnum.next()) != null) {
                DocsAndPositionsEnum thePositionEnum = theTermsEnum.docsAndPositions(null, null);
                if (thePositionEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                    int i = 0;
                    int position;
                    while (i < thePositionEnum.freq() && (position = thePositionEnum.nextPosition()) != -1) {
                        if (position >= theSpanStart && position <= theSpanEnd) {
                            theEntries.put(position, theTerm.utf8ToString());
                        }
                        i++;
                    }
                }
            }

            StringBuilder theResultString = new StringBuilder();
            theEntries.entrySet().forEach(e -> {
                if (theResultString.length() > 0) {
                    theResultString.append(" ");
                }
                theResultString.append(e.getValue());
            });

            String theTotalSpan = theResultString.toString().trim();

            Long theFrequency = theSpanFrequencies.get(theTotalSpan);
            if (theFrequency == null) {
                theSpanFrequencies.put(theTotalSpan, 1L);
            } else {
                theSpanFrequencies.put(theTotalSpan, theFrequency + 1);
            }
        }

        return theSpanFrequencies.entrySet().stream().filter(t -> t.getValue() > 1)
                .sorted((o1, o2) -> o2.getValue().compareTo(o1.getValue()))
                .limit(configuration.getNumberOfSuggestions())
                .map(T -> new Suggestion(highlight(T.getKey(), theTokens), T.getKey()))
                .collect(Collectors.toList());
    } finally {
        long theDuration = System.currentTimeMillis() - theStartTime;
        LOGGER.info("Took " + theDuration + "ms");
    }
}