Example usage for org.apache.lucene.search DocIdSetIterator all

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator all.

Prototype

public static final DocIdSetIterator all(int maxDoc)

Source Link

Document

A DocIdSetIterator that matches all documents up to maxDoc - 1 .

Usage

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }/*  ww  w.  j a  v a 2  s. c  o m*/
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:com.o19s.es.ltr.query.DerivedExpressionQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        // If scores are not needed simply return a constant score on all docs
        return new ConstantScoreWeight(this, boost) {
            @Override/*from  w  w  w  .ja v a2 s  .co  m*/
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
            }

        };
    }

    return new FVWeight(this);
}

From source file:com.o19s.es.ltr.query.LtrScorer.java

License:Apache License

protected LtrScorer(Weight weight, List<Scorer> subScorers, boolean needsScores, LeafReaderContext context,
        Ranker rankModel) {//ww w  .ja  v  a 2s . c  o  m
    super(weight);
    this._rankModel = rankModel;
    _subScorers = subScorers;
    _allDocsIter = DocIdSetIterator.all(context.reader().maxDoc());
}

From source file:com.o19s.es.ltr.query.NoopScorer.java

License:Apache License

/**
 * Constructs a Scorer/*from w  w  w .ja v  a  2 s  . c o m*/
 *
 * @param weight The scorers <code>Weight</code>.
 */
public NoopScorer(Weight weight, int maxDocs) {
    super(weight);
    _noopIter = DocIdSetIterator.all(maxDocs);

}

From source file:com.o19s.es.ltr.query.RankerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        // If scores are not needed simply return a constant score on all docs
        return new ConstantScoreWeight(this, boost) {
            @Override//from w  w w .  j  a  v  a2s .co  m
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }
        };
    }
    List<Weight> weights = new ArrayList<>(queries.size());
    for (Query q : queries) {
        weights.add(searcher.createWeight(q, needsScores, boost));
    }
    return new RankerWeight(weights);
}

From source file:org.apache.solr.search.Filter.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new Weight(this) {

        @Override/*from  w w  w  . ja v  a2  s . c o m*/
        public void extractTerms(Set<Term> terms) {
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            final Scorer scorer = scorer(context);
            final boolean match = (scorer != null && scorer.iterator().advance(doc) == doc);
            if (match) {
                assert scorer.score() == 0f;
                return Explanation.match(0f, "Match on id " + doc);
            } else {
                return Explanation.match(0f, "No match on id " + doc);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSet set = getDocIdSet(context, null);
            if (set == null) {
                return null;
            }
            if (applyLazily && set.bits() != null) {
                final Bits bits = set.bits();
                final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
                final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
                    @Override
                    public boolean matches() throws IOException {
                        return bits.get(approximation.docID());
                    }

                    @Override
                    public float matchCost() {
                        return 10; // TODO use cost of bits.get()
                    }
                };
                return new ConstantScoreScorer(this, 0f, twoPhase);
            }
            final DocIdSetIterator iterator = set.iterator();
            if (iterator == null) {
                return null;
            }
            return new ConstantScoreScorer(this, 0f, iterator);
        }

    };
}

From source file:org.elasticsearch.common.lucene.search.function.MinScoreScorerTests.java

License:Apache License

private static Scorer scorer(int maxDoc, final int[] docs, final float[] scores, final boolean twoPhase) {
    final DocIdSetIterator iterator = twoPhase ? DocIdSetIterator.all(maxDoc) : iterator(docs);
    return new Scorer(null) {
        public DocIdSetIterator iterator() {
            if (twoPhase) {
                return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
            } else {
                return iterator;
            }/* ww w  .j a  v a 2  s .co  m*/
        }

        public TwoPhaseIterator twoPhaseIterator() {
            if (twoPhase) {
                return new TwoPhaseIterator(iterator) {

                    @Override
                    public boolean matches() throws IOException {
                        return Arrays.binarySearch(docs, iterator.docID()) >= 0;
                    }

                    @Override
                    public float matchCost() {
                        return 10;
                    }
                };
            } else {
                return null;
            }
        }

        @Override
        public int docID() {
            return iterator.docID();
        }

        @Override
        public float score() throws IOException {
            final int idx = Arrays.binarySearch(docs, docID());
            return scores[idx];
        }

        @Override
        public int freq() throws IOException {
            return 1;
        }
    };
}

From source file:org.elasticsearch.index.search.geo.GeoDistanceRangeQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    final Weight boundingBoxWeight;
    if (boundingBoxFilter != null) {
        boundingBoxWeight = searcher.createNormalizedWeight(boundingBoxFilter, false);
    } else {// w ww .  ja v a 2 s .c o m
        boundingBoxWeight = null;
    }
    return new ConstantScoreWeight(this) {
        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSetIterator approximation;
            if (boundingBoxWeight != null) {
                Scorer s = boundingBoxWeight.scorer(context);
                if (s == null) {
                    // if the approximation does not match anything, we're done
                    return null;
                }
                approximation = s.iterator();
            } else {
                approximation = DocIdSetIterator.all(context.reader().maxDoc());
            }
            final MultiGeoPointValues values = indexFieldData.load(context).getGeoPointValues();
            final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
                @Override
                public boolean matches() throws IOException {
                    final int doc = approximation.docID();
                    values.setDocument(doc);
                    final int length = values.count();
                    for (int i = 0; i < length; i++) {
                        GeoPoint point = values.valueAt(i);
                        if (distanceBoundingCheck.isWithin(point.lat(), point.lon())) {
                            double d = fixedSourceDistance.calculate(point.lat(), point.lon());
                            if (d >= inclusiveLowerPoint && d <= inclusiveUpperPoint) {
                                return true;
                            }
                        }
                    }
                    return false;
                }

                @Override
                public float matchCost() {
                    if (distanceBoundingCheck == GeoDistance.ALWAYS_INSTANCE) {
                        return 0.0f;
                    } else {
                        // TODO: is this right (up to 4 comparisons from GeoDistance.SimpleDistanceBoundingCheck)?
                        return 4.0f;
                    }
                }
            };
            return new ConstantScoreScorer(this, score(), twoPhaseIterator);
        }
    };
}

From source file:org.elasticsearch.index.search.geo.LegacyGeoDistanceRangeQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    final Weight boundingBoxWeight;
    if (boundingBoxFilter != null) {
        boundingBoxWeight = searcher.createNormalizedWeight(boundingBoxFilter, false);
    } else {//from w w w.  j  a v a 2s.c o m
        boundingBoxWeight = null;
    }
    return new ConstantScoreWeight(this) {
        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSetIterator approximation;
            if (boundingBoxWeight != null) {
                Scorer s = boundingBoxWeight.scorer(context);
                if (s == null) {
                    // if the approximation does not match anything, we're done
                    return null;
                }
                approximation = s.iterator();
            } else {
                approximation = DocIdSetIterator.all(context.reader().maxDoc());
            }
            final MultiGeoPointValues values = indexFieldData.load(context).getGeoPointValues();
            final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
                @Override
                public boolean matches() throws IOException {
                    final int doc = approximation.docID();
                    values.setDocument(doc);
                    final int length = values.count();
                    for (int i = 0; i < length; i++) {
                        GeoPoint point = values.valueAt(i);
                        if (bbox == null || GeoUtils.rectangleContainsPoint(bbox, point.lat(), point.lon())) {
                            double d = geoDistance.calculate(lat, lon, point.lat(), point.lon(),
                                    DistanceUnit.DEFAULT);
                            if (d >= inclusiveLowerPoint && d <= inclusiveUpperPoint) {
                                return true;
                            }
                        }
                    }
                    return false;
                }

                @Override
                public float matchCost() {
                    if (bbox != null) {
                        // always within bounds so we're going to compute distance for every point
                        return values.count();
                    } else {
                        // TODO: come up with better estimate of boundary points
                        return 4.0f;
                    }
                }
            };
            return new ConstantScoreScorer(this, score(), twoPhaseIterator);
        }
    };
}

From source file:org.neo4j.kernel.api.impl.index.LucenePartitionAllDocumentsReader.java

License:Open Source License

private DocIdSetIterator iterateAllDocs() {
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    DocIdSetIterator allDocs = DocIdSetIterator.all(reader.maxDoc());
    if (liveDocs == null) {
        return allDocs;
    }//from   w  w w .  jav  a  2  s  . c  o  m

    return new FilteredDocIdSetIterator(allDocs) {
        @Override
        protected boolean match(int doc) {
            return liveDocs.get(doc);
        }
    };
}