Example usage for org.apache.lucene.search Scorer iterator

List of usage examples for org.apache.lucene.search Scorer iterator

Introduction

In this page you can find the example usage for org.apache.lucene.search Scorer iterator.

Prototype

public abstract DocIdSetIterator iterator();

Source Link

Document

Return a DocIdSetIterator over matching documents.

Usage

From source file:com.floragunn.searchguard.configuration.DlsFlsFilterLeafReader.java

License:Open Source License

DlsFlsFilterLeafReader(final LeafReader delegate, final Set<String> includes, final Query dlsQuery) {
    super(delegate);

    flsEnabled = includes != null && !includes.isEmpty();
    dlsEnabled = dlsQuery != null;/* w  w  w.  ja  v  a 2  s . c  o  m*/

    if (flsEnabled) {
        this.includes = includes.toArray(new String[0]);
        final FieldInfos infos = delegate.getFieldInfos();

        final List<FieldInfo> fi = new ArrayList<FieldInfo>(infos.size());
        for (final FieldInfo info : infos) {
            final String fname = info.name;
            if ((!WildcardMatcher.containsWildcard(fname) && includes.contains(fname))
                    || WildcardMatcher.matchAny(this.includes, fname)) {
                fi.add(info);
            }
        }

        this.flsFieldInfos = new FieldInfos(fi.toArray(new FieldInfo[0]));
    } else {
        this.includes = null;
        this.flsFieldInfos = null;
    }

    if (dlsEnabled) {
        try {

            //borrowed from Apache Lucene (Copyright Apache Software Foundation (ASF))
            final IndexSearcher searcher = new IndexSearcher(this);
            searcher.setQueryCache(null);
            final boolean needsScores = false;
            final Weight preserveWeight = searcher.createNormalizedWeight(dlsQuery, needsScores);

            final int maxDoc = in.maxDoc();
            final FixedBitSet bits = new FixedBitSet(maxDoc);
            final Scorer preverveScorer = preserveWeight.scorer(this.getContext());
            if (preverveScorer != null) {
                bits.or(preverveScorer.iterator());
            }

            if (in.hasDeletions()) {
                final Bits oldLiveDocs = in.getLiveDocs();
                assert oldLiveDocs != null;
                final DocIdSetIterator it = new BitSetIterator(bits, 0L);
                for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) {
                    if (!oldLiveDocs.get(i)) {
                        bits.clear(i);
                    }
                }
            }

            this.liveDocs = bits;
            this.numDocs = bits.cardinality();

        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    } else {
        this.liveDocs = null;
        this.numDocs = -1;
    }
}

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }//from  w ww. j a  va  2 s . c o m
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:com.o19s.es.ltr.logging.LoggingFetchSubPhase.java

License:Apache License

void doLog(Query query, List<HitLogConsumer> loggers, IndexSearcher searcher, SearchHit[] hits)
        throws IOException {
    // Reorder hits by id so we can scan all the docs belonging to the same
    // segment by reusing the same scorer.
    SearchHit[] reordered = new SearchHit[hits.length];
    System.arraycopy(hits, 0, reordered, 0, hits.length);
    Arrays.sort(reordered, Comparator.comparingInt(SearchHit::docId));

    int hitUpto = 0;
    int readerUpto = -1;
    int endDoc = 0;
    int docBase = 0;
    Scorer scorer = null;
    Weight weight = searcher.createNormalizedWeight(query, true);
    // Loop logic borrowed from lucene QueryRescorer
    while (hitUpto < reordered.length) {
        SearchHit hit = reordered[hitUpto];
        int docID = hit.docId();
        loggers.forEach((l) -> l.nextDoc(hit));
        LeafReaderContext readerContext = null;
        while (docID >= endDoc) {
            readerUpto++;/*from ww  w. j  av a 2 s  . com*/
            readerContext = searcher.getTopReaderContext().leaves().get(readerUpto);
            endDoc = readerContext.docBase + readerContext.reader().maxDoc();
        }

        if (readerContext != null) {
            // We advanced to another segment:
            docBase = readerContext.docBase;
            scorer = weight.scorer(readerContext);
        }

        if (scorer != null) {
            int targetDoc = docID - docBase;
            int actualDoc = scorer.docID();
            if (actualDoc < targetDoc) {
                actualDoc = scorer.iterator().advance(targetDoc);
            }
            if (actualDoc == targetDoc) {
                // Scoring will trigger log collection
                scorer.score();
            }
        }

        hitUpto++;
    }
}

From source file:com.o19s.es.ltr.query.LtrScorer.java

License:Apache License

@Override
public float score() throws IOException {
    DataPoint allScores = new DenseProgramaticDataPoint(_subScorers.size());
    int featureIdx = 1; // RankLib is 1-based
    for (Scorer scorer : _subScorers) {
        if (scorer.docID() < docID()) {
            scorer.iterator().advance(docID());
        }// ww w. ja v  a2s.  c  o  m
        float featureVal = 0.0f;
        if (scorer.docID() == docID()) {
            featureVal = scorer.score();
        }
        //System.out.printf("Doc %d, feature %d, val %f\n", docID(), featureIdx, featureVal);
        allScores.setFeatureValue(featureIdx, featureVal);
        featureIdx++;
    }
    float score = (float) _rankModel.eval(allScores);
    //System.out.printf("Doc %d, score %f\n", docID(), score);
    return score;
}

From source file:io.crate.execution.engine.collect.collectors.LuceneBatchIterator.java

License:Apache License

private boolean tryAdvanceDocIdSetIterator() throws IOException {
    if (currentDocIdSetIt != null) {
        return true;
    }//from   w w w .j a  v a2s . c  o  m
    while (leavesIt.hasNext()) {
        LeafReaderContext leaf = leavesIt.next();
        Scorer scorer = weight.scorer(leaf);
        if (scorer == null) {
            continue;
        }
        currentScorer = scorer;
        currentLeaf = leaf;
        currentDocIdSetIt = scorer.iterator();
        for (LuceneCollectorExpression expression : expressions) {
            expression.setScorer(currentScorer);
            expression.setNextReader(currentLeaf);
        }
        return true;
    }
    return false;
}

From source file:io.crate.lucene.GenericFunctionQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    return new Weight(this) {
        @Override/*  w w w .  j  av  a2  s .  com*/
        public void extractTerms(Set<Term> terms) {
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            final Scorer s = scorer(context);
            final boolean match;
            final TwoPhaseIterator twoPhase = s.twoPhaseIterator();
            if (twoPhase == null) {
                match = s.iterator().advance(doc) == doc;
            } else {
                match = twoPhase.approximation().advance(doc) == doc && twoPhase.matches();
            }
            if (match) {
                assert s.score() == 0f : "score must be 0";
                return Explanation.match(0f, "Match on id " + doc);
            } else {
                return Explanation.match(0f, "No match on id " + doc);
            }
        }

        @Override
        public float getValueForNormalization() throws IOException {
            return 0;
        }

        @Override
        public void normalize(float norm, float boost) {
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            return new ConstantScoreScorer(this, 0f, getTwoPhaseIterator(context));
        }
    };
}

From source file:org.apache.solr.ltr.TestLTRScoringQuery.java

License:Apache License

private LTRScoringQuery.ModelWeight performQuery(TopDocs hits, IndexSearcher searcher, int docid,
        LTRScoringQuery model) throws IOException, ModelException {
    final List<LeafReaderContext> leafContexts = searcher.getTopReaderContext().leaves();
    final int n = ReaderUtil.subIndex(hits.scoreDocs[0].doc, leafContexts);
    final LeafReaderContext context = leafContexts.get(n);
    final int deBasedDoc = hits.scoreDocs[0].doc - context.docBase;

    final Weight weight = searcher.createNormalizedWeight(model, true);
    final Scorer scorer = weight.scorer(context);

    // rerank using the field final-score
    scorer.iterator().advance(deBasedDoc);
    scorer.score();/*from  ww  w.j a  va  2  s  . com*/

    // assertEquals(42.0f, score, 0.0001);
    // assertTrue(weight instanceof AssertingWeight);
    // (AssertingIndexSearcher)
    assertTrue(weight instanceof LTRScoringQuery.ModelWeight);
    final LTRScoringQuery.ModelWeight modelWeight = (LTRScoringQuery.ModelWeight) weight;
    return modelWeight;

}

From source file:org.apache.solr.ltr.TestSelectiveWeightCreation.java

License:Apache License

private LTRScoringQuery.ModelWeight performQuery(TopDocs hits, IndexSearcher searcher, int docid,
        LTRScoringQuery model) throws IOException, ModelException {
    final List<LeafReaderContext> leafContexts = searcher.getTopReaderContext().leaves();
    final int n = ReaderUtil.subIndex(hits.scoreDocs[0].doc, leafContexts);
    final LeafReaderContext context = leafContexts.get(n);
    final int deBasedDoc = hits.scoreDocs[0].doc - context.docBase;

    final Weight weight = searcher.createNormalizedWeight(model, true);
    final Scorer scorer = weight.scorer(context);

    // rerank using the field final-score
    scorer.iterator().advance(deBasedDoc);
    scorer.score();/*w  ww  .j  ava 2s  .  c  o m*/
    assertTrue(weight instanceof LTRScoringQuery.ModelWeight);
    final LTRScoringQuery.ModelWeight modelWeight = (LTRScoringQuery.ModelWeight) weight;
    return modelWeight;

}

From source file:org.apache.solr.search.Filter.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new Weight(this) {

        @Override/*www.java2 s. co  m*/
        public void extractTerms(Set<Term> terms) {
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            final Scorer scorer = scorer(context);
            final boolean match = (scorer != null && scorer.iterator().advance(doc) == doc);
            if (match) {
                assert scorer.score() == 0f;
                return Explanation.match(0f, "Match on id " + doc);
            } else {
                return Explanation.match(0f, "No match on id " + doc);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSet set = getDocIdSet(context, null);
            if (set == null) {
                return null;
            }
            if (applyLazily && set.bits() != null) {
                final Bits bits = set.bits();
                final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
                final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
                    @Override
                    public boolean matches() throws IOException {
                        return bits.get(approximation.docID());
                    }

                    @Override
                    public float matchCost() {
                        return 10; // TODO use cost of bits.get()
                    }
                };
                return new ConstantScoreScorer(this, 0f, twoPhase);
            }
            final DocIdSetIterator iterator = set.iterator();
            if (iterator == null) {
                return null;
            }
            return new ConstantScoreScorer(this, 0f, iterator);
        }

    };
}

From source file:org.apache.solr.search.QueryWrapperFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) throws IOException {
    // get a private context that is used to rewrite, createWeight and score eventually
    final LeafReaderContext privateContext = context.reader().getContext();
    final Weight weight = new IndexSearcher(privateContext).createNormalizedWeight(query, false);

    DocIdSet set = new DocIdSet() {
        @Override// w  w w .  j  av a 2  s .  co  m
        public DocIdSetIterator iterator() throws IOException {
            Scorer scorer = weight.scorer(privateContext);
            return scorer == null ? null : scorer.iterator();
        }

        @Override
        public long ramBytesUsed() {
            return 0L;
        }
    };
    return BitsFilteredDocIdSet.wrap(set, acceptDocs);
}