Example usage for org.apache.lucene.search DocIdSetIterator all

List of usage examples for org.apache.lucene.search DocIdSetIterator all

Introduction

In this page you can find the example usage for org.apache.lucene.search DocIdSetIterator all.

Prototype

public static final DocIdSetIterator all(int maxDoc) 

Source Link

Document

A DocIdSetIterator that matches all documents up to maxDoc - 1 .

Usage

From source file:com.o19s.es.explore.ExplorerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        return searcher.createWeight(query, false, boost);
    }/*  ww  w.  j a  v a 2  s. c  o m*/
    final Weight subWeight = searcher.createWeight(query, true, boost);
    Set<Term> terms = new HashSet<>();
    subWeight.extractTerms(terms);
    if (isCollectionScoped()) {
        ClassicSimilarity sim = new ClassicSimilarity();
        StatisticsHelper df_stats = new StatisticsHelper();
        StatisticsHelper idf_stats = new StatisticsHelper();
        StatisticsHelper ttf_stats = new StatisticsHelper();

        for (Term term : terms) {
            TermContext ctx = TermContext.build(searcher.getTopReaderContext(), term);
            TermStatistics tStats = searcher.termStatistics(term, ctx);
            df_stats.add(tStats.docFreq());
            idf_stats.add(sim.idf(tStats.docFreq(), searcher.getIndexReader().numDocs()));
            ttf_stats.add(tStats.totalTermFreq());
        }

        /*
        If no terms are parsed in the query we opt for returning 0
        instead of throwing an exception that could break various
        pipelines.
         */
        float constantScore;

        if (terms.size() > 0) {
            switch (type) {
            case ("sum_classic_idf"):
                constantScore = idf_stats.getSum();
                break;
            case ("mean_classic_idf"):
                constantScore = idf_stats.getMean();
                break;
            case ("max_classic_idf"):
                constantScore = idf_stats.getMax();
                break;
            case ("min_classic_idf"):
                constantScore = idf_stats.getMin();
                break;
            case ("stddev_classic_idf"):
                constantScore = idf_stats.getStdDev();
                break;
            case "sum_raw_df":
                constantScore = df_stats.getSum();
                break;
            case "min_raw_df":
                constantScore = df_stats.getMin();
                break;
            case "max_raw_df":
                constantScore = df_stats.getMax();
                break;
            case "mean_raw_df":
                constantScore = df_stats.getMean();
                break;
            case "stddev_raw_df":
                constantScore = df_stats.getStdDev();
                break;
            case "sum_raw_ttf":
                constantScore = ttf_stats.getSum();
                break;
            case "min_raw_ttf":
                constantScore = ttf_stats.getMin();
                break;
            case "max_raw_ttf":
                constantScore = ttf_stats.getMax();
                break;
            case "mean_raw_ttf":
                constantScore = ttf_stats.getMean();
                break;
            case "stddev_raw_ttf":
                constantScore = ttf_stats.getStdDev();
                break;
            case "unique_terms_count":
                constantScore = terms.size();
                break;

            default:
                throw new RuntimeException("Invalid stat type specified.");
            }
        } else {
            constantScore = 0.0f;
        }

        return new ConstantScoreWeight(ExplorerQuery.this, constantScore) {

            @Override
            public Explanation explain(LeafReaderContext context, int doc) throws IOException {
                Scorer scorer = scorer(context);
                int newDoc = scorer.iterator().advance(doc);
                assert newDoc == doc; // this is a DocIdSetIterator.all
                return Explanation.match(scorer.score(), "Stat Score: " + type);
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, constantScore,
                        DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

        };
    } else if (type.endsWith("_raw_tf")) {
        // Rewrite this into a boolean query where we can inject our PostingsExplorerQuery
        BooleanQuery.Builder qb = new BooleanQuery.Builder();
        for (Term t : terms) {
            qb.add(new BooleanClause(new PostingsExplorerQuery(t, PostingsExplorerQuery.Type.TF),
                    BooleanClause.Occur.SHOULD));
        }
        // FIXME: completely refactor this class and stop accepting a random query but a list of terms directly
        // rewriting at this point is wrong, additionally we certainly build the TermContext twice for every terms
        // problem is that we rely on extractTerms which happen too late in the process
        Query q = qb.build().rewrite(searcher.getIndexReader());
        return new ExplorerQuery.ExplorerWeight(this, searcher.createWeight(q, true, boost), type);
    }
    throw new IllegalArgumentException("Unknown ExplorerQuery type [" + type + "]");
}

From source file:com.o19s.es.ltr.query.DerivedExpressionQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        // If scores are not needed simply return a constant score on all docs
        return new ConstantScoreWeight(this, boost) {
            @Override/*from  w  w  w  .ja v a2 s  .co  m*/
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }

            @Override
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
            }

        };
    }

    return new FVWeight(this);
}

From source file:com.o19s.es.ltr.query.LtrScorer.java

License:Apache License

protected LtrScorer(Weight weight, List<Scorer> subScorers, boolean needsScores, LeafReaderContext context,
        Ranker rankModel) {//ww w  .ja  v  a 2s . c  o  m
    super(weight);
    this._rankModel = rankModel;
    _subScorers = subScorers;
    _allDocsIter = DocIdSetIterator.all(context.reader().maxDoc());
}

From source file:com.o19s.es.ltr.query.NoopScorer.java

License:Apache License

/**
 * Constructs a Scorer/*from w  w  w .ja v  a  2 s  . c o m*/
 *
 * @param weight The scorers <code>Weight</code>.
 */
public NoopScorer(Weight weight, int maxDocs) {
    super(weight);
    _noopIter = DocIdSetIterator.all(maxDocs);

}

From source file:com.o19s.es.ltr.query.RankerQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    if (!needsScores) {
        // If scores are not needed simply return a constant score on all docs
        return new ConstantScoreWeight(this, boost) {
            @Override//from w  w w .  j  a  v  a2s .co  m
            public Scorer scorer(LeafReaderContext context) throws IOException {
                return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
            }

            @Override
            public boolean isCacheable(LeafReaderContext ctx) {
                return true;
            }
        };
    }
    List<Weight> weights = new ArrayList<>(queries.size());
    for (Query q : queries) {
        weights.add(searcher.createWeight(q, needsScores, boost));
    }
    return new RankerWeight(weights);
}

From source file:org.apache.solr.search.Filter.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new Weight(this) {

        @Override/*from  w w  w  . ja v  a2  s . c o m*/
        public void extractTerms(Set<Term> terms) {
        }

        @Override
        public Explanation explain(LeafReaderContext context, int doc) throws IOException {
            final Scorer scorer = scorer(context);
            final boolean match = (scorer != null && scorer.iterator().advance(doc) == doc);
            if (match) {
                assert scorer.score() == 0f;
                return Explanation.match(0f, "Match on id " + doc);
            } else {
                return Explanation.match(0f, "No match on id " + doc);
            }
        }

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSet set = getDocIdSet(context, null);
            if (set == null) {
                return null;
            }
            if (applyLazily && set.bits() != null) {
                final Bits bits = set.bits();
                final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
                final TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
                    @Override
                    public boolean matches() throws IOException {
                        return bits.get(approximation.docID());
                    }

                    @Override
                    public float matchCost() {
                        return 10; // TODO use cost of bits.get()
                    }
                };
                return new ConstantScoreScorer(this, 0f, twoPhase);
            }
            final DocIdSetIterator iterator = set.iterator();
            if (iterator == null) {
                return null;
            }
            return new ConstantScoreScorer(this, 0f, iterator);
        }

    };
}

From source file:org.elasticsearch.common.lucene.search.function.MinScoreScorerTests.java

License:Apache License

private static Scorer scorer(int maxDoc, final int[] docs, final float[] scores, final boolean twoPhase) {
    final DocIdSetIterator iterator = twoPhase ? DocIdSetIterator.all(maxDoc) : iterator(docs);
    return new Scorer(null) {
        public DocIdSetIterator iterator() {
            if (twoPhase) {
                return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
            } else {
                return iterator;
            }/* ww w  .j a  v a 2  s .co  m*/
        }

        public TwoPhaseIterator twoPhaseIterator() {
            if (twoPhase) {
                return new TwoPhaseIterator(iterator) {

                    @Override
                    public boolean matches() throws IOException {
                        return Arrays.binarySearch(docs, iterator.docID()) >= 0;
                    }

                    @Override
                    public float matchCost() {
                        return 10;
                    }
                };
            } else {
                return null;
            }
        }

        @Override
        public int docID() {
            return iterator.docID();
        }

        @Override
        public float score() throws IOException {
            final int idx = Arrays.binarySearch(docs, docID());
            return scores[idx];
        }

        @Override
        public int freq() throws IOException {
            return 1;
        }
    };
}

From source file:org.elasticsearch.index.search.geo.GeoDistanceRangeQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    final Weight boundingBoxWeight;
    if (boundingBoxFilter != null) {
        boundingBoxWeight = searcher.createNormalizedWeight(boundingBoxFilter, false);
    } else {// w ww .  ja v a 2 s .c o m
        boundingBoxWeight = null;
    }
    return new ConstantScoreWeight(this) {
        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSetIterator approximation;
            if (boundingBoxWeight != null) {
                Scorer s = boundingBoxWeight.scorer(context);
                if (s == null) {
                    // if the approximation does not match anything, we're done
                    return null;
                }
                approximation = s.iterator();
            } else {
                approximation = DocIdSetIterator.all(context.reader().maxDoc());
            }
            final MultiGeoPointValues values = indexFieldData.load(context).getGeoPointValues();
            final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
                @Override
                public boolean matches() throws IOException {
                    final int doc = approximation.docID();
                    values.setDocument(doc);
                    final int length = values.count();
                    for (int i = 0; i < length; i++) {
                        GeoPoint point = values.valueAt(i);
                        if (distanceBoundingCheck.isWithin(point.lat(), point.lon())) {
                            double d = fixedSourceDistance.calculate(point.lat(), point.lon());
                            if (d >= inclusiveLowerPoint && d <= inclusiveUpperPoint) {
                                return true;
                            }
                        }
                    }
                    return false;
                }

                @Override
                public float matchCost() {
                    if (distanceBoundingCheck == GeoDistance.ALWAYS_INSTANCE) {
                        return 0.0f;
                    } else {
                        // TODO: is this right (up to 4 comparisons from GeoDistance.SimpleDistanceBoundingCheck)?
                        return 4.0f;
                    }
                }
            };
            return new ConstantScoreScorer(this, score(), twoPhaseIterator);
        }
    };
}

From source file:org.elasticsearch.index.search.geo.LegacyGeoDistanceRangeQuery.java

License:Apache License

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
    final Weight boundingBoxWeight;
    if (boundingBoxFilter != null) {
        boundingBoxWeight = searcher.createNormalizedWeight(boundingBoxFilter, false);
    } else {//from w w w.  j  a v a 2s.c o m
        boundingBoxWeight = null;
    }
    return new ConstantScoreWeight(this) {
        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            final DocIdSetIterator approximation;
            if (boundingBoxWeight != null) {
                Scorer s = boundingBoxWeight.scorer(context);
                if (s == null) {
                    // if the approximation does not match anything, we're done
                    return null;
                }
                approximation = s.iterator();
            } else {
                approximation = DocIdSetIterator.all(context.reader().maxDoc());
            }
            final MultiGeoPointValues values = indexFieldData.load(context).getGeoPointValues();
            final TwoPhaseIterator twoPhaseIterator = new TwoPhaseIterator(approximation) {
                @Override
                public boolean matches() throws IOException {
                    final int doc = approximation.docID();
                    values.setDocument(doc);
                    final int length = values.count();
                    for (int i = 0; i < length; i++) {
                        GeoPoint point = values.valueAt(i);
                        if (bbox == null || GeoUtils.rectangleContainsPoint(bbox, point.lat(), point.lon())) {
                            double d = geoDistance.calculate(lat, lon, point.lat(), point.lon(),
                                    DistanceUnit.DEFAULT);
                            if (d >= inclusiveLowerPoint && d <= inclusiveUpperPoint) {
                                return true;
                            }
                        }
                    }
                    return false;
                }

                @Override
                public float matchCost() {
                    if (bbox != null) {
                        // always within bounds so we're going to compute distance for every point
                        return values.count();
                    } else {
                        // TODO: come up with better estimate of boundary points
                        return 4.0f;
                    }
                }
            };
            return new ConstantScoreScorer(this, score(), twoPhaseIterator);
        }
    };
}

From source file:org.neo4j.kernel.api.impl.index.LucenePartitionAllDocumentsReader.java

License:Open Source License

private DocIdSetIterator iterateAllDocs() {
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    DocIdSetIterator allDocs = DocIdSetIterator.all(reader.maxDoc());
    if (liveDocs == null) {
        return allDocs;
    }//from   w  w w .  jav  a  2  s  . c  o  m

    return new FilteredDocIdSetIterator(allDocs) {
        @Override
        protected boolean match(int doc) {
            return liveDocs.get(doc);
        }
    };
}