Example usage for org.apache.lucene.search IndexSearcher getTopReaderContext

List of usage examples for org.apache.lucene.search IndexSearcher getTopReaderContext

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getTopReaderContext.

Prototype


public IndexReaderContext getTopReaderContext() 

Source Link

Document

Returns this searchers the top-level IndexReaderContext .

Usage

From source file:com.elasticsearch.custom.query.keyword.KeywordTermQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
    final IndexReaderContext context = searcher.getTopReaderContext();
    final TermContext termState;
    if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
        // make TermQuery single-pass if we don't have a PRTS or if the context differs!
        termState = TermContext.build(context, term, true); // cache term lookups!
    } else {//  w  w  w .  j ava 2s.  com
        // PRTS was pre-build for this IS
        termState = this.perReaderTermState;
    }

    // we must not ignore the given docFreq - if set use the given value (lie)
    if (docFreq != -1)
        termState.setDocFreq(docFreq);

    return new TermWeight(searcher, termState);
}

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }//from   ww  w . ja va 2  s  . c o  m
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:com.o19s.es.explore.PostingsExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    IndexReaderContext context = searcher.getTopReaderContext();
    assert needsScores : "Should not be used in filtering mode";
    return new PostingsExplorerWeight(this, this.term, TermContext.build(context, this.term), this.type);
}

From source file:com.o19s.es.ltr.logging.LoggingFetchSubPhase.java

License:Apache License

void doLog(Query query, List<HitLogConsumer> loggers, IndexSearcher searcher, SearchHit[] hits)
        throws IOException {
    // Reorder hits by id so we can scan all the docs belonging to the same
    // segment by reusing the same scorer.
    SearchHit[] reordered = new SearchHit[hits.length];
    System.arraycopy(hits, 0, reordered, 0, hits.length);
    Arrays.sort(reordered, Comparator.comparingInt(SearchHit::docId));

    int hitUpto = 0;
    int readerUpto = -1;
    int endDoc = 0;
    int docBase = 0;
    Scorer scorer = null;/*from   w w  w  . j a v  a  2s .c o m*/
    Weight weight = searcher.createNormalizedWeight(query, true);
    // Loop logic borrowed from lucene QueryRescorer
    while (hitUpto < reordered.length) {
        SearchHit hit = reordered[hitUpto];
        int docID = hit.docId();
        loggers.forEach((l) -> l.nextDoc(hit));
        LeafReaderContext readerContext = null;
        while (docID >= endDoc) {
            readerUpto++;
            readerContext = searcher.getTopReaderContext().leaves().get(readerUpto);
            endDoc = readerContext.docBase + readerContext.reader().maxDoc();
        }

        if (readerContext != null) {
            // We advanced to another segment:
            docBase = readerContext.docBase;
            scorer = weight.scorer(readerContext);
        }

        if (scorer != null) {
            int targetDoc = docID - docBase;
            int actualDoc = scorer.docID();
            if (actualDoc < targetDoc) {
                actualDoc = scorer.iterator().advance(targetDoc);
            }
            if (actualDoc == targetDoc) {
                // Scoring will trigger log collection
                scorer.score();
            }
        }

        hitUpto++;
    }
}

From source file:com.shaie.annots.MultiPositionSpanTermQuery.java

License:Apache License

@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    final IndexReaderContext topContext = searcher.getTopReaderContext();
    final TermContext context;
    if (termContext == null || termContext.wasBuiltFor(topContext) == false) {
        context = TermContext.build(topContext, term);
    } else {/*from w  w  w.  j  a v  a  2  s  .  co m*/
        context = termContext;
    }
    final Map<Term, TermContext> terms = needsScores ? Collections.singletonMap(term, context) : null;
    return new SpanTermWeight(context, searcher, terms, boost) {
        @Override
        public Spans getSpans(LeafReaderContext context, Postings requiredPostings) throws IOException {
            final TermSpans spans = (TermSpans) super.getSpans(context,
                    requiredPostings.atLeast(Postings.PAYLOADS));
            if (spans == null) { // term is not present in that reader
                assert context.reader().docFreq(term) == 0 : "no term exists in reader term=" + term;
                return null;
            }
            return new Spans() {

                private final PositionSpansCollector payloadCollector = new PositionSpansCollector();
                private int end = -1;

                @Override
                public int advance(int target) throws IOException {
                    end = -1;
                    return spans.advance(target);
                }

                @Override
                public void collect(SpanCollector collector) throws IOException {
                    spans.collect(collector);
                }

                @Override
                public long cost() {
                    return spans.cost();
                }

                @Override
                public int docID() {
                    return spans.docID();
                }

                @Override
                public int endPosition() {
                    return end;
                }

                @Override
                public int nextDoc() throws IOException {
                    end = -1;
                    return spans.nextDoc();
                }

                @Override
                public int nextStartPosition() throws IOException {
                    final int pos = spans.nextStartPosition();
                    if (pos == NO_MORE_POSITIONS) {
                        end = NO_MORE_POSITIONS;
                        return NO_MORE_POSITIONS;
                    }
                    spans.collect(payloadCollector);
                    end = payloadCollector.payloadValue + pos;
                    return pos;
                }

                @Override
                public float positionsCost() {
                    return spans.positionsCost();
                }

                @Override
                public int startPosition() {
                    return spans.startPosition();
                }

                @Override
                public int width() {
                    return spans.width();
                }
            };
        }
    };
}

From source file:com.sindicetech.siren.search.node.NodeTermQuery.java

License:Open Source License

@Override
public Weight createWeight(final IndexSearcher searcher) throws IOException {
    final IndexReaderContext context = searcher.getTopReaderContext();
    final TermContext termState;
    if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
        // make TermQuery single-pass if we don't have a PRTS or if the context differs!
        termState = TermContext.build(context, term); // cache term lookups!
    } else {//from   w  w w. j  a  va 2  s  .c  om
        // PRTS was pre-build for this IS
        termState = this.perReaderTermState;
    }

    // we must not ignore the given docFreq - if set use the given value (lie)
    if (docFreq != -1)
        termState.setDocFreq(docFreq);

    return new NodeTermWeight(searcher, termState);
}

From source file:com.sindicetech.siren.search.spans.TermSpanQuery.java

License:Open Source License

@Override
public Weight createWeight(final IndexSearcher searcher) throws IOException {
    final IndexReaderContext context = searcher.getTopReaderContext();
    final TermContext termState = TermContext.build(context, term);
    return new TermSpanWeight(searcher, termState);
}

From source file:com.xiaomi.linden.lucene.query.flexiblequery.FlexibleWeight.java

License:Apache License

public FlexibleWeight(FlexibleQuery query, IndexSearcher searcher) throws IOException {
    this.query = query;
    this.similarity = searcher.getSimilarity();
    final IndexReaderContext context = searcher.getTopReaderContext();

    int[] maxDocFreqs = null;
    long[] maxTotalTermFreqs = null;
    Map<Term, TermContext> builtTermMap = new HashMap<>();
    if (query.enableGlobalIDF()) {
        FlexibleQuery.FlexibleTerm[][] globalTerms = query.getGlobalTerms();
        TermContext[][] globalStates = new TermContext[globalTerms.length][];
        for (int i = 0; i < globalTerms.length; ++i) {
            globalStates[i] = new TermContext[globalTerms[i].length];
            for (int j = 0; j < globalTerms[i].length; ++j) {
                Term term = globalTerms[i][j].term;
                TermContext termContext = builtTermMap.get(term);
                if (termContext != null) {
                    globalStates[i][j] = termContext;
                } else {
                    globalStates[i][j] = TermContext.build(context, globalTerms[i][j].term);
                    builtTermMap.put(term, globalStates[i][j]);
                }//from   ww w.j  ava2s . c om
            }
        }
        maxDocFreqs = new int[globalTerms[0].length];
        maxTotalTermFreqs = new long[globalTerms[0].length];
        int fieldLength = globalTerms.length;
        int termLength = globalTerms[0].length;
        for (int i = 0; i < termLength; ++i) {
            int maxDocFreq = 0;
            long maxTotalTermFreq = 0;
            for (int j = 0; j < fieldLength; ++j) {
                maxDocFreq = Math.max(globalStates[j][i].docFreq(), maxDocFreq);
                maxTotalTermFreq = Math.max(globalStates[j][i].totalTermFreq(), maxTotalTermFreq);
            }
            maxDocFreqs[i] = maxDocFreq;
            maxTotalTermFreqs[i] = maxTotalTermFreq;
        }
    }

    FlexibleQuery.FlexibleTerm[][] terms = query.getTerms();
    TermContext[][] states = new TermContext[terms.length][];
    for (int i = 0; i < terms.length; ++i) {
        states[i] = new TermContext[terms[i].length];
        for (int j = 0; j < terms[i].length; ++j) {
            Term term = terms[i][j].term;
            TermContext termContext = builtTermMap.get(term);
            if (termContext != null) {
                states[i][j] = termContext;
            } else {
                states[i][j] = TermContext.build(context, terms[i][j].term);
                builtTermMap.put(term, states[i][j]);
            }
        }
    }
    termStatsMatrix = new TermStats[terms.length][];
    for (int i = 0; i < terms.length; ++i) {
        termStatsMatrix[i] = new TermStats[terms[i].length];
        for (int j = 0; j < terms[i].length; ++j) {
            FlexibleQuery.FlexibleTerm term = terms[i][j];
            TermContext state = states[i][j];
            TermStatistics termStats;
            if (query.enableGlobalIDF()) {
                termStats = new TermStatistics(term.term.bytes(), maxDocFreqs[j], maxTotalTermFreqs[j]);
            } else {
                termStats = searcher.termStatistics(term.term, state);
            }
            Similarity.SimWeight stats = similarity.computeWeight(term.boost,
                    searcher.collectionStatistics(term.term.field()), termStats);
            TermStats termStatsInfo = new TermStats();
            termStatsInfo.stats = stats;
            termStatsInfo.term = term.term;
            termStatsInfo.termContext = state;
            termStatsMatrix[i][j] = termStatsInfo;
        }
    }
}

From source file:de.jetsli.lumeo.RawLucene.java

License:Apache License

public Document findById(final long id) {
    //Check cache
    IndexOp result = getCurrentRTCache(latestGen).get(id);
    if (result != null) {
        if (result.type == IndexOp.Type.DELETE)
            return null;
        return result.document;
    }/*www  .  j  av a2 s  . co m*/

    return searchSomething(new SearchExecutor<Document>() {

        @Override
        public Document execute(IndexSearcher searcher) throws Exception {
            // TODO optimize via indexReader.termDocsEnum !?
            IndexReaderContext trc = searcher.getTopReaderContext();
            AtomicReaderContext[] arc = trc.leaves();
            for (int i = 0; i < arc.length; i++) {
                AtomicReader subreader = arc[i].reader();
                DocsEnum docs = subreader.terms(UID).iterator(null).docs(subreader.getLiveDocs(), null, false);
                if (docs != null) {
                    int docID = docs.nextDoc();
                    if (docID != DocsEnum.NO_MORE_DOCS) {
                        return subreader.document(docID);
                    }
                }
            }
            return null;
        }
    });
}

From source file:de.uop.code.disambiguation.ltr.lucene.query.TermQuery.java

License:Apache License

@Override
public Weight createWeight(final IndexSearcher searcher) throws IOException {
    final IndexReaderContext context = searcher.getTopReaderContext();
    final TermContext termState;
    if ((this.perReaderTermS == null) || (this.perReaderTermS.topReaderContext != context)) {
        // make TermQuery single-pass if we don't have a PRTS or if the
        // context differs!
        termState = TermContext.build(context, this.term);
    } else {/*from w w w  . j a va2s.c  om*/
        // PRTS was pre-build for this IS
        termState = this.perReaderTermS;
    }

    // we must not ignore the given docFreq - if set use the given value
    // (lie)
    if (this.docFreq != -1) {
        termState.setDocFreq(this.docFreq);
    }

    return new TermWeight(searcher, termState);
}