Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.apache.solr.search.function.ScaleFloatFunction.java

License:Apache License

@Override
public DocValues getValues(Map context, IndexReader reader) throws IOException {
    final DocValues vals = source.getValues(context, reader);
    int maxDoc = reader.maxDoc();

    // this doesn't take into account deleted docs!
    float minVal = 0.0f;
    float maxVal = 0.0f;

    if (maxDoc > 0) {
        minVal = maxVal = vals.floatVal(0);
    }//from   www.  j a  va 2  s  .co  m

    // Traverse the complete set of values to get the min and the max.
    // Future alternatives include being able to ask a DocValues for min/max
    // Another memory-intensive option is to cache the values in
    // a float[] on this first pass.

    for (int i = 0; i < maxDoc; i++) {
        float val = vals.floatVal(i);
        if ((Float.floatToRawIntBits(val) & (0xff << 23)) == 0xff << 23) {
            // if the exponent in the float is all ones, then this is +Inf, -Inf or NaN
            // which don't make sense to factor into the scale function
            continue;
        }
        if (val < minVal) {
            minVal = val;
        } else if (val > maxVal) {
            maxVal = val;
        }
    }

    final float scale = (maxVal - minVal == 0) ? 0 : (max - min) / (maxVal - minVal);
    final float minSource = minVal;
    final float maxSource = maxVal;

    return new DocValues() {
        @Override
        public float floatVal(int doc) {
            return (vals.floatVal(doc) - minSource) * scale + min;
        }

        @Override
        public int intVal(int doc) {
            return (int) floatVal(doc);
        }

        @Override
        public long longVal(int doc) {
            return (long) floatVal(doc);
        }

        @Override
        public double doubleVal(int doc) {
            return (double) floatVal(doc);
        }

        @Override
        public String strVal(int doc) {
            return Float.toString(floatVal(doc));
        }

        @Override
        public String toString(int doc) {
            return "scale(" + vals.toString(doc) + ",toMin=" + min + ",toMax=" + max + ",fromMin=" + minSource
                    + ",fromMax=" + maxSource + ")";
        }
    };
}

From source file:org.apache.solr.search.function.ValueSourceScorer.java

License:Apache License

protected ValueSourceScorer(IndexReader reader, DocValues values) {
    super(null, null);
    this.reader = reader;
    this.maxDoc = reader.maxDoc();
    this.values = values;
    setCheckDeletes(true);/*from w ww  . j a  v  a2 s.c  o  m*/
}

From source file:org.apache.solr.search.PrefixFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    new PrefixGenerator(prefix) {
        public void handleDoc(int doc) {
            bitSet.set(doc);/*from   w  w w.  j av  a  2  s .c o  m*/
        }
    }.generate(reader);
    return bitSet;
}

From source file:org.apache.solr.search.TestDocSet.java

License:Apache License

public void doFilterTest(IndexReader reader) throws IOException {
    IndexReaderContext topLevelContext = reader.getContext();
    OpenBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc() + 1));
    DocSet a = new BitDocSet(bs);
    DocSet b = getIntDocSet(bs);// w w  w. j a va2  s.co  m

    Filter fa = a.getTopFilter();
    Filter fb = b.getTopFilter();

    /*** top level filters are no longer supported
    // test top-level
    DocIdSet da = fa.getDocIdSet(topLevelContext);
    DocIdSet db = fb.getDocIdSet(topLevelContext);
    doTestIteratorEqual(da, db);
    ***/

    DocIdSet da;
    DocIdSet db;
    List<AtomicReaderContext> leaves = topLevelContext.leaves();

    // first test in-sequence sub readers
    for (AtomicReaderContext readerContext : leaves) {
        da = fa.getDocIdSet(readerContext, null);
        db = fb.getDocIdSet(readerContext, null);
        doTestIteratorEqual(da, db);
    }

    int nReaders = leaves.size();
    // now test out-of-sequence sub readers
    for (int i = 0; i < nReaders; i++) {
        AtomicReaderContext readerContext = leaves.get(rand.nextInt(nReaders));
        da = fa.getDocIdSet(readerContext, null);
        db = fb.getDocIdSet(readerContext, null);
        doTestIteratorEqual(da, db);
    }
}

From source file:org.apache.solr.search.WildcardFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    new WildcardGenerator(term) {
        public void handleDoc(int doc) {
            bitSet.set(doc);//from   w  w w. ja  v  a 2s. c  om
        }
    }.generate(reader);
    return bitSet;
}

From source file:org.apache.solr.spelling.SpellCheckCollator.java

License:Apache License

public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery,
        ResponseBuilder ultimateResponse) {
    List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();

    QueryComponent queryComponent = null;
    if (ultimateResponse.components != null) {
        for (SearchComponent sc : ultimateResponse.components) {
            if (sc instanceof QueryComponent) {
                queryComponent = (QueryComponent) sc;
                break;
            }/*from  w w  w . java  2  s  .  c om*/
        }
    }

    boolean verifyCandidateWithQuery = true;
    int maxTries = maxCollationTries;
    int maxNumberToIterate = maxTries;
    if (maxTries < 1) {
        maxTries = 1;
        maxNumberToIterate = maxCollations;
        verifyCandidateWithQuery = false;
    }
    if (queryComponent == null && verifyCandidateWithQuery) {
        LOG.info(
                "Could not find an instance of QueryComponent.  Disabling collation verification against the index.");
        maxTries = 1;
        verifyCandidateWithQuery = false;
    }
    docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
    int maxDocId = -1;
    if (verifyCandidateWithQuery && docCollectionLimit > 0) {
        IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
        maxDocId = reader.maxDoc();
    }

    int tryNo = 0;
    int collNo = 0;
    PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate,
            maxCollationEvaluations, suggestionsMayOverlap);
    while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {

        PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
        String collationQueryStr = getCollation(originalQuery, possibility.corrections);
        int hits = 0;

        if (verifyCandidateWithQuery) {
            tryNo++;
            SolrParams origParams = ultimateResponse.req.getParams();
            ModifiableSolrParams params = new ModifiableSolrParams(origParams);
            Iterator<String> origParamIterator = origParams.getParameterNamesIterator();
            int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length();
            while (origParamIterator.hasNext()) {
                String origParamName = origParamIterator.next();
                if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE)
                        && origParamName.length() > pl) {
                    String[] val = origParams.getParams(origParamName);
                    if (val.length == 1 && val[0].length() == 0) {
                        params.set(origParamName.substring(pl), (String[]) null);
                    } else {
                        params.set(origParamName.substring(pl), val);
                    }
                }
            }
            params.set(CommonParams.Q, collationQueryStr);
            params.remove(CommonParams.START);
            params.set(CommonParams.ROWS, "" + docCollectionLimit);
            // we don't want any stored fields
            params.set(CommonParams.FL, "id");
            // we'll sort by doc id to ensure no scoring is done.
            params.set(CommonParams.SORT, "_docid_ asc");
            // If a dismax query, don't add unnecessary clauses for scoring
            params.remove(DisMaxParams.TIE);
            params.remove(DisMaxParams.PF);
            params.remove(DisMaxParams.PF2);
            params.remove(DisMaxParams.PF3);
            params.remove(DisMaxParams.BQ);
            params.remove(DisMaxParams.BF);
            // Collate testing does not support Grouping (see SOLR-2577)
            params.remove(GroupParams.GROUP);

            // creating a request here... make sure to close it!
            ResponseBuilder checkResponse = new ResponseBuilder(
                    new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params), new SolrQueryResponse(),
                    Arrays.<SearchComponent>asList(queryComponent));
            checkResponse.setQparser(ultimateResponse.getQparser());
            checkResponse.setFilters(ultimateResponse.getFilters());
            checkResponse.setQueryString(collationQueryStr);
            checkResponse.components = Arrays.<SearchComponent>asList(queryComponent);

            try {
                queryComponent.prepare(checkResponse);
                if (docCollectionLimit > 0) {
                    int f = checkResponse.getFieldFlags();
                    checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
                }
                queryComponent.process(checkResponse);
                hits = (Integer) checkResponse.rsp.getToLog().get("hits");
            } catch (EarlyTerminatingCollectorException etce) {
                assert (docCollectionLimit > 0);
                assert 0 < etce.getNumberScanned();
                assert 0 < etce.getNumberCollected();

                if (etce.getNumberScanned() == maxDocId) {
                    hits = etce.getNumberCollected();
                } else {
                    hits = (int) (((float) (maxDocId * etce.getNumberCollected()))
                            / (float) etce.getNumberScanned());
                }
            } catch (Exception e) {
                LOG.warn(
                        "Exception trying to re-query to check if a spell check possibility would return any hits.",
                        e);
            } finally {
                checkResponse.req.close();
            }
        }
        if (hits > 0 || !verifyCandidateWithQuery) {
            collNo++;
            SpellCheckCollation collation = new SpellCheckCollation();
            collation.setCollationQuery(collationQueryStr);
            collation.setHits(hits);
            collation.setInternalRank(
                    suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank);

            NamedList<String> misspellingsAndCorrections = new NamedList<String>();
            for (SpellCheckCorrection corr : possibility.corrections) {
                misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
            }
            collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
            collations.add(collation);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Collation: " + collationQueryStr
                    + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : ""));
        }
    }
    return collations;
}

From source file:org.apache.solr.uninverting.TestFieldCacheSortRandom.java

License:Apache License

private void testRandomStringSort(SortField.Type type) throws Exception {
    Random random = new Random(random().nextLong());

    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    final int maxLength = TestUtil.nextInt(random, 5, 100);
    if (VERBOSE) {
        System.out//from   w w  w  .j  a v a2  s .c o  m
                .println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
    }

    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();
    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final Document doc = new Document();

        // 10% of the time, the document is missing the value:
        final BytesRef br;
        if (random().nextInt(10) != 7) {
            final String s;
            if (random.nextBoolean()) {
                s = TestUtil.randomSimpleString(random, maxLength);
            } else {
                s = TestUtil.randomUnicodeString(random, maxLength);
            }

            if (!allowDups) {
                if (seen.contains(s)) {
                    continue;
                }
                seen.add(s);
            }

            if (VERBOSE) {
                System.out.println("  " + numDocs + ": s=" + s);
            }

            doc.add(new StringField("stringdv", s, Field.Store.NO));
            docValues.add(new BytesRef(s));

        } else {
            br = null;
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": <missing>");
            }
            docValues.add(null);
        }

        doc.add(new IntPoint("id", numDocs));
        doc.add(new StoredField("id", numDocs));
        writer.addDocument(doc);
        numDocs++;

        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }

    Map<String, UninvertingReader.Type> mapping = new HashMap<>();
    mapping.put("stringdv", Type.SORTED);
    mapping.put("id", Type.INTEGER_POINT);
    final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
    writer.close();
    if (VERBOSE) {
        System.out.println("  reader=" + r);
    }

    final IndexSearcher s = newSearcher(r, false);
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final boolean reverse = random.nextBoolean();

        final TopFieldDocs hits;
        final SortField sf;
        final boolean sortMissingLast;
        final boolean missingIsNull;
        sf = new SortField("stringdv", type, reverse);
        sortMissingLast = random().nextBoolean();
        missingIsNull = true;

        if (sortMissingLast) {
            sf.setMissingValue(SortField.STRING_LAST);
        }

        final Sort sort;
        if (random.nextBoolean()) {
            sort = new Sort(sf);
        } else {
            sort = new Sort(sf, SortField.FIELD_DOC);
        }
        final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
        final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
        int queryType = random.nextInt(2);
        if (queryType == 0) {
            hits = s.search(new ConstantScoreQuery(f), hitCount, sort, random.nextBoolean(),
                    random.nextBoolean());
        } else {
            hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
        }

        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount
                    + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
        }

        // Compute expected results:
        Collections.sort(f.matchValues, new Comparator<BytesRef>() {
            @Override
            public int compare(BytesRef a, BytesRef b) {
                if (a == null) {
                    if (b == null) {
                        return 0;
                    }
                    if (sortMissingLast) {
                        return 1;
                    } else {
                        return -1;
                    }
                } else if (b == null) {
                    if (sortMissingLast) {
                        return -1;
                    } else {
                        return 1;
                    }
                } else {
                    return a.compareTo(b);
                }
            }
        });

        if (reverse) {
            Collections.reverse(f.matchValues);
        }
        final List<BytesRef> expected = f.matchValues;
        if (VERBOSE) {
            System.out.println("  expected:");
            for (int idx = 0; idx < expected.size(); idx++) {
                BytesRef br = expected.get(idx);
                if (br == null && missingIsNull == false) {
                    br = new BytesRef();
                }
                System.out.println("    " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
                if (idx == hitCount - 1) {
                    break;
                }
            }
        }

        if (VERBOSE) {
            System.out.println("  actual:");
            for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
                final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
                BytesRef br = (BytesRef) fd.fields[0];

                System.out.println("    " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString())
                        + " id=" + s.doc(fd.doc).get("id"));
            }
        }
        for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
            final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
            BytesRef br = expected.get(hitIDX);
            if (br == null && missingIsNull == false) {
                br = new BytesRef();
            }

            // Normally, the old codecs (that don't support
            // docsWithField via doc values) will always return
            // an empty BytesRef for the missing case; however,
            // if all docs in a given segment were missing, in
            // that case it will return null!  So we must map
            // null here, too:
            BytesRef br2 = (BytesRef) fd.fields[0];
            if (br2 == null && missingIsNull == false) {
                br2 = new BytesRef();
            }

            assertEquals(br, br2);
        }
    }

    r.close();
    dir.close();
}

From source file:org.archive.nutchwax.tools.LengthNormUpdater.java

License:Apache License

/**
 *
 *//*from   w  w  w . ja  va2s .c  o m*/
public static void reSetNorms(IndexReader reader, String fieldName, Map<String, Integer> ranks, Similarity sim)
        throws IOException {
    if (VERBOSE > 0)
        System.out.println("Updating field: " + fieldName);

    int[] termCounts = new int[0];

    TermEnum termEnum = null;
    TermDocs termDocs = null;

    termCounts = new int[reader.maxDoc()];
    try {
        termEnum = reader.terms(new Term(fieldName, ""));
        try {
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {
                    termDocs.seek(termEnum.term());
                    while (termDocs.next()) {
                        termCounts[termDocs.doc()] += termDocs.freq();
                    }
                }
            } while (termEnum.next());
        } finally {
            if (null != termDocs)
                termDocs.close();
        }
    } finally {
        if (null != termEnum)
            termEnum.close();
    }

    for (int d = 0; d < termCounts.length; d++) {
        if (!reader.isDeleted(d)) {
            Document doc = reader.document(d);

            String url = doc.get("url");

            if (url != null) {
                Integer rank = ranks.get(url);
                if (rank == null)
                    continue;

                float originalNorm = sim.lengthNorm(fieldName, termCounts[d]);
                byte encodedOrig = sim.encodeNorm(originalNorm);
                float rankedNorm = originalNorm * (float) (Math.log10(rank) + 1);
                byte encodedRank = sim.encodeNorm(rankedNorm);

                if (VERBOSE > 1)
                    System.out.println(fieldName + "\t" + d + "\t" + originalNorm + "\t" + encodedOrig + "\t"
                            + rankedNorm + "\t" + encodedRank);

                reader.setNorm(d, fieldName, encodedRank);
            }
        }
    }
}

From source file:org.archive.tnh.tools.LengthNormUpdater.java

License:Apache License

/**
 *
 *//*from  www .j a v  a2 s .c  om*/
public static void updateNorms(IndexReader reader, String fieldName, Map<String, Integer> ranks, Similarity sim)
        throws IOException {
    if (VERBOSE > 0)
        System.out.println("Updating field: " + fieldName);

    int[] termCounts = new int[0];

    TermEnum termEnum = null;
    TermDocs termDocs = null;

    termCounts = new int[reader.maxDoc()];
    try {
        termEnum = reader.terms(new Term(fieldName, ""));
        try {
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {
                    termDocs.seek(termEnum.term());
                    while (termDocs.next()) {
                        termCounts[termDocs.doc()] += termDocs.freq();
                    }
                }
            } while (termEnum.next());
        } finally {
            if (null != termDocs)
                termDocs.close();
        }
    } finally {
        if (null != termEnum)
            termEnum.close();
    }

    for (int d = 0; d < termCounts.length; d++) {
        if (!reader.isDeleted(d)) {
            Document doc = reader.document(d);

            String url = doc.get("url");

            if (url != null) {
                Integer rank = ranks.get(url);
                if (rank == null)
                    continue;

                float originalNorm = sim.lengthNorm(fieldName, termCounts[d]);
                byte encodedOrig = sim.encodeNorm(originalNorm);
                float rankedNorm = originalNorm * (float) (Math.log10(rank) + 1);
                byte encodedRank = sim.encodeNorm(rankedNorm);

                if (VERBOSE > 1)
                    System.out.println(fieldName + "\t" + d + "\t" + originalNorm + "\t" + encodedOrig + "\t"
                            + rankedNorm + "\t" + encodedRank);

                reader.setNorm(d, fieldName, encodedRank);
            }
        }
    }
}

From source file:org.codelibs.elasticsearch.index.mapper.MappedFieldType.java

License:Apache License

/**
 * @return a {FieldStats} instance that maps to the type of this
 * field or {@code null} if the provided index has no stats about the
 * current field//from w  w w . j a  v a 2s  .  c  o  m
 */
public FieldStats stats(IndexReader reader) throws IOException {
    int maxDoc = reader.maxDoc();
    FieldInfo fi = MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
    if (fi == null) {
        return null;
    }
    Terms terms = MultiFields.getTerms(reader, name());
    if (terms == null) {
        return new FieldStats.Text(maxDoc, 0, -1, -1, isSearchable(), isAggregatable());
    }
    FieldStats stats = new FieldStats.Text(maxDoc, terms.getDocCount(), terms.getSumDocFreq(),
            terms.getSumTotalTermFreq(), isSearchable(), isAggregatable(), terms.getMin(), terms.getMax());
    return stats;
}