Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.apache.solr.search.function.ScaleFloatFunction.java

License:Apache License

@Override
public DocValues getValues(Map context, IndexReader reader) throws IOException {
    final DocValues vals = source.getValues(context, reader);
    int maxDoc = reader.maxDoc();

    // this doesn't take into account deleted docs!
    float minVal = 0.0f;
    float maxVal = 0.0f;

    if (maxDoc > 0) {
        minVal = maxVal = vals.floatVal(0);
    }//from   www.  j a  va 2  s  .co  m

    // Traverse the complete set of values to get the min and the max.
    // Future alternatives include being able to ask a DocValues for min/max
    // Another memory-intensive option is to cache the values in
    // a float[] on this first pass.

    for (int i = 0; i < maxDoc; i++) {
        float val = vals.floatVal(i);
        if ((Float.floatToRawIntBits(val) & (0xff << 23)) == 0xff << 23) {
            // if the exponent in the float is all ones, then this is +Inf, -Inf or NaN
            // which don't make sense to factor into the scale function
            continue;
        }
        if (val < minVal) {
            minVal = val;
        } else if (val > maxVal) {
            maxVal = val;
        }
    }

    final float scale = (maxVal - minVal == 0) ? 0 : (max - min) / (maxVal - minVal);
    final float minSource = minVal;
    final float maxSource = maxVal;

    return new DocValues() {
        @Override
        public float floatVal(int doc) {
            return (vals.floatVal(doc) - minSource) * scale + min;
        }

        @Override
        public int intVal(int doc) {
            return (int) floatVal(doc);
        }

        @Override
        public long longVal(int doc) {
            return (long) floatVal(doc);
        }

        @Override
        public double doubleVal(int doc) {
            return (double) floatVal(doc);
        }

        @Override
        public String strVal(int doc) {
            return Float.toString(floatVal(doc));
        }

        @Override
        public String toString(int doc) {
            return "scale(" + vals.toString(doc) + ",toMin=" + min + ",toMax=" + max + ",fromMin=" + minSource
                    + ",fromMax=" + maxSource + ")";
        }
    };
}

From source file:org.apache.solr.search.function.ValueSourceScorer.java

License:Apache License

protected ValueSourceScorer(IndexReader reader, DocValues values) {
    super(null, null);
    this.reader = reader;
    this.maxDoc = reader.maxDoc();
    this.values = values;
    setCheckDeletes(true);/*from w ww  . j a  v  a2 s.c  o  m*/
}

From source file:org.apache.solr.search.PrefixFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    new PrefixGenerator(prefix) {
        public void handleDoc(int doc) {
            bitSet.set(doc);/*from   w  w w.  j av  a  2  s .c o  m*/
        }
    }.generate(reader);
    return bitSet;
}

From source file:org.apache.solr.search.TestDocSet.java

License:Apache License

public void doFilterTest(IndexReader reader) throws IOException {
    IndexReaderContext topLevelContext = reader.getContext();
    OpenBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc() + 1));
    DocSet a = new BitDocSet(bs);
    DocSet b = getIntDocSet(bs);// w w  w. j a va2  s.co  m

    Filter fa = a.getTopFilter();
    Filter fb = b.getTopFilter();

    /*** top level filters are no longer supported
    // test top-level
    DocIdSet da = fa.getDocIdSet(topLevelContext);
    DocIdSet db = fb.getDocIdSet(topLevelContext);
    doTestIteratorEqual(da, db);
    ***/

    DocIdSet da;
    DocIdSet db;
    List<AtomicReaderContext> leaves = topLevelContext.leaves();

    // first test in-sequence sub readers
    for (AtomicReaderContext readerContext : leaves) {
        da = fa.getDocIdSet(readerContext, null);
        db = fb.getDocIdSet(readerContext, null);
        doTestIteratorEqual(da, db);
    }

    int nReaders = leaves.size();
    // now test out-of-sequence sub readers
    for (int i = 0; i < nReaders; i++) {
        AtomicReaderContext readerContext = leaves.get(rand.nextInt(nReaders));
        da = fa.getDocIdSet(readerContext, null);
        db = fb.getDocIdSet(readerContext, null);
        doTestIteratorEqual(da, db);
    }
}

From source file:org.apache.solr.search.WildcardFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    new WildcardGenerator(term) {
        public void handleDoc(int doc) {
            bitSet.set(doc);//from   w  w w. ja  v  a 2s. c  om
        }
    }.generate(reader);
    return bitSet;
}

From source file:org.apache.solr.spelling.SpellCheckCollator.java

License:Apache License

public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery,
        ResponseBuilder ultimateResponse) {
    List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();

    QueryComponent queryComponent = null;
    if (ultimateResponse.components != null) {
        for (SearchComponent sc : ultimateResponse.components) {
            if (sc instanceof QueryComponent) {
                queryComponent = (QueryComponent) sc;
                break;
            }/*from  w w  w . java  2  s  .  c om*/
        }
    }

    boolean verifyCandidateWithQuery = true;
    int maxTries = maxCollationTries;
    int maxNumberToIterate = maxTries;
    if (maxTries < 1) {
        maxTries = 1;
        maxNumberToIterate = maxCollations;
        verifyCandidateWithQuery = false;
    }
    if (queryComponent == null && verifyCandidateWithQuery) {
        LOG.info(
                "Could not find an instance of QueryComponent.  Disabling collation verification against the index.");
        maxTries = 1;
        verifyCandidateWithQuery = false;
    }
    docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
    int maxDocId = -1;
    if (verifyCandidateWithQuery && docCollectionLimit > 0) {
        IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
        maxDocId = reader.maxDoc();
    }

    int tryNo = 0;
    int collNo = 0;
    PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate,
            maxCollationEvaluations, suggestionsMayOverlap);
    while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {

        PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
        String collationQueryStr = getCollation(originalQuery, possibility.corrections);
        int hits = 0;

        if (verifyCandidateWithQuery) {
            tryNo++;
            SolrParams origParams = ultimateResponse.req.getParams();
            ModifiableSolrParams params = new ModifiableSolrParams(origParams);
            Iterator<String> origParamIterator = origParams.getParameterNamesIterator();
            int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length();
            while (origParamIterator.hasNext()) {
                String origParamName = origParamIterator.next();
                if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE)
                        && origParamName.length() > pl) {
                    String[] val = origParams.getParams(origParamName);
                    if (val.length == 1 && val[0].length() == 0) {
                        params.set(origParamName.substring(pl), (String[]) null);
                    } else {
                        params.set(origParamName.substring(pl), val);
                    }
                }
            }
            params.set(CommonParams.Q, collationQueryStr);
            params.remove(CommonParams.START);
            params.set(CommonParams.ROWS, "" + docCollectionLimit);
            // we don't want any stored fields
            params.set(CommonParams.FL, "id");
            // we'll sort by doc id to ensure no scoring is done.
            params.set(CommonParams.SORT, "_docid_ asc");
            // If a dismax query, don't add unnecessary clauses for scoring
            params.remove(DisMaxParams.TIE);
            params.remove(DisMaxParams.PF);
            params.remove(DisMaxParams.PF2);
            params.remove(DisMaxParams.PF3);
            params.remove(DisMaxParams.BQ);
            params.remove(DisMaxParams.BF);
            // Collate testing does not support Grouping (see SOLR-2577)
            params.remove(GroupParams.GROUP);

            // creating a request here... make sure to close it!
            ResponseBuilder checkResponse = new ResponseBuilder(
                    new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params), new SolrQueryResponse(),
                    Arrays.<SearchComponent>asList(queryComponent));
            checkResponse.setQparser(ultimateResponse.getQparser());
            checkResponse.setFilters(ultimateResponse.getFilters());
            checkResponse.setQueryString(collationQueryStr);
            checkResponse.components = Arrays.<SearchComponent>asList(queryComponent);

            try {
                queryComponent.prepare(checkResponse);
                if (docCollectionLimit > 0) {
                    int f = checkResponse.getFieldFlags();
                    checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
                }
                queryComponent.process(checkResponse);
                hits = (Integer) checkResponse.rsp.getToLog().get("hits");
            } catch (EarlyTerminatingCollectorException etce) {
                assert (docCollectionLimit > 0);
                assert 0 < etce.getNumberScanned();
                assert 0 < etce.getNumberCollected();

                if (etce.getNumberScanned() == maxDocId) {
                    hits = etce.getNumberCollected();
                } else {
                    hits = (int) (((float) (maxDocId * etce.getNumberCollected()))
                            / (float) etce.getNumberScanned());
                }
            } catch (Exception e) {
                LOG.warn(
                        "Exception trying to re-query to check if a spell check possibility would return any hits.",
                        e);
            } finally {
                checkResponse.req.close();
            }
        }
        if (hits > 0 || !verifyCandidateWithQuery) {
            collNo++;
            SpellCheckCollation collation = new SpellCheckCollation();
            collation.setCollationQuery(collationQueryStr);
            collation.setHits(hits);
            collation.setInternalRank(
                    suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank);

            NamedList<String> misspellingsAndCorrections = new NamedList<String>();
            for (SpellCheckCorrection corr : possibility.corrections) {
                misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
            }
            collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
            collations.add(collation);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Collation: " + collationQueryStr
                    + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : ""));
        }
    }
    return collations;
}

From source file:org.apache.solr.uninverting.TestFieldCacheSortRandom.java

License:Apache License

private void testRandomStringSort(SortField.Type type) throws Exception {
    Random random = new Random(random().nextLong());

    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    final int maxLength = TestUtil.nextInt(random, 5, 100);
    if (VERBOSE) {
        System.out//from   w w  w  .j  a v a2  s .c o  m
                .println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
    }

    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();
    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final Document doc = new Document();

        // 10% of the time, the document is missing the value:
        final BytesRef br;
        if (random().nextInt(10) != 7) {
            final String s;
            if (random.nextBoolean()) {
                s = TestUtil.randomSimpleString(random, maxLength);
            } else {
                s = TestUtil.randomUnicodeString(random, maxLength);
            }

            if (!allowDups) {
                if (seen.contains(s)) {
                    continue;
                }
                seen.add(s);
            }

            if (VERBOSE) {
                System.out.println("  " + numDocs + ": s=" + s);
            }

            doc.add(new StringField("stringdv", s, Field.Store.NO));
            docValues.add(new BytesRef(s));

        } else {
            br = null;
            if (VERBOSE) {
                System.out.println("  " + numDocs + ": <missing>");
            }
            docValues.add(null);
        }

        doc.add(new IntPoint("id", numDocs));
        doc.add(new StoredField("id", numDocs));
        writer.addDocument(doc);
        numDocs++;

        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }

    Map<String, UninvertingReader.Type> mapping = new HashMap<>();
    mapping.put("stringdv", Type.SORTED);
    mapping.put("id", Type.INTEGER_POINT);
    final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
    writer.close();
    if (VERBOSE) {
        System.out.println("  reader=" + r);
    }

    final IndexSearcher s = newSearcher(r, false);
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final boolean reverse = random.nextBoolean();

        final TopFieldDocs hits;
        final SortField sf;
        final boolean sortMissingLast;
        final boolean missingIsNull;
        sf = new SortField("stringdv", type, reverse);
        sortMissingLast = random().nextBoolean();
        missingIsNull = true;

        if (sortMissingLast) {
            sf.setMissingValue(SortField.STRING_LAST);
        }

        final Sort sort;
        if (random.nextBoolean()) {
            sort = new Sort(sf);
        } else {
            sort = new Sort(sf, SortField.FIELD_DOC);
        }
        final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
        final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
        int queryType = random.nextInt(2);
        if (queryType == 0) {
            hits = s.search(new ConstantScoreQuery(f), hitCount, sort, random.nextBoolean(),
                    random.nextBoolean());
        } else {
            hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
        }

        if (VERBOSE) {
            System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount
                    + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
        }

        // Compute expected results:
        Collections.sort(f.matchValues, new Comparator<BytesRef>() {
            @Override
            public int compare(BytesRef a, BytesRef b) {
                if (a == null) {
                    if (b == null) {
                        return 0;
                    }
                    if (sortMissingLast) {
                        return 1;
                    } else {
                        return -1;
                    }
                } else if (b == null) {
                    if (sortMissingLast) {
                        return -1;
                    } else {
                        return 1;
                    }
                } else {
                    return a.compareTo(b);
                }
            }
        });

        if (reverse) {
            Collections.reverse(f.matchValues);
        }
        final List<BytesRef> expected = f.matchValues;
        if (VERBOSE) {
            System.out.println("  expected:");
            for (int idx = 0; idx < expected.size(); idx++) {
                BytesRef br = expected.get(idx);
                if (br == null && missingIsNull == false) {
                    br = new BytesRef();
                }
                System.out.println("    " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
                if (idx == hitCount - 1) {
                    break;
                }
            }
        }

        if (VERBOSE) {
            System.out.println("  actual:");
            for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
                final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
                BytesRef br = (BytesRef) fd.fields[0];

                System.out.println("    " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString())
                        + " id=" + s.doc(fd.doc).get("id"));
            }
        }
        for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) {
            final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
            BytesRef br = expected.get(hitIDX);
            if (br == null && missingIsNull == false) {
                br = new BytesRef();
            }

            // Normally, the old codecs (that don't support
            // docsWithField via doc values) will always return
            // an empty BytesRef for the missing case; however,
            // if all docs in a given segment were missing, in
            // that case it will return null!  So we must map
            // null here, too:
            BytesRef br2 = (BytesRef) fd.fields[0];
            if (br2 == null && missingIsNull == false) {
                br2 = new BytesRef();
            }

            assertEquals(br, br2);
        }
    }

    r.close();
    dir.close();
}

From source file:org.archive.nutchwax.tools.LengthNormUpdater.java

License:Apache License

/**
 *
 *//*from   w  w  w . ja  va2s .c  o m*/
public static void reSetNorms(IndexReader reader, String fieldName, Map<String, Integer> ranks, Similarity sim)
        throws IOException {
    if (VERBOSE > 0)
        System.out.println("Updating field: " + fieldName);

    int[] termCounts = new int[0];

    TermEnum termEnum = null;
    TermDocs termDocs = null;

    termCounts = new int[reader.maxDoc()];
    try {
        termEnum = reader.terms(new Term(fieldName, ""));
        try {
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {
                    termDocs.seek(termEnum.term());
                    while (termDocs.next()) {
                        termCounts[termDocs.doc()] += termDocs.freq();
                    }
                }
            } while (termEnum.next());
        } finally {
            if (null != termDocs)
                termDocs.close();
        }
    } finally {
        if (null != termEnum)
            termEnum.close();
    }

    for (int d = 0; d < termCounts.length; d++) {
        if (!reader.isDeleted(d)) {
            Document doc = reader.document(d);

            String url = doc.get("url");

            if (url != null) {
                Integer rank = ranks.get(url);
                if (rank == null)
                    continue;

                float originalNorm = sim.lengthNorm(fieldName, termCounts[d]);
                byte encodedOrig = sim.encodeNorm(originalNorm);
                float rankedNorm = originalNorm * (float) (Math.log10(rank) + 1);
                byte encodedRank = sim.encodeNorm(rankedNorm);

                if (VERBOSE > 1)
                    System.out.println(fieldName + "\t" + d + "\t" + originalNorm + "\t" + encodedOrig + "\t"
                            + rankedNorm + "\t" + encodedRank);

                reader.setNorm(d, fieldName, encodedRank);
            }
        }
    }
}

From source file:org.archive.tnh.tools.LengthNormUpdater.java

License:Apache License

/**
 *
 *//*from  www .j a v  a2 s .c  om*/
public static void updateNorms(IndexReader reader, String fieldName, Map<String, Integer> ranks, Similarity sim)
        throws IOException {
    if (VERBOSE > 0)
        System.out.println("Updating field: " + fieldName);

    int[] termCounts = new int[0];

    TermEnum termEnum = null;
    TermDocs termDocs = null;

    termCounts = new int[reader.maxDoc()];
    try {
        termEnum = reader.terms(new Term(fieldName, ""));
        try {
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {
                    termDocs.seek(termEnum.term());
                    while (termDocs.next()) {
                        termCounts[termDocs.doc()] += termDocs.freq();
                    }
                }
            } while (termEnum.next());
        } finally {
            if (null != termDocs)
                termDocs.close();
        }
    } finally {
        if (null != termEnum)
            termEnum.close();
    }

    for (int d = 0; d < termCounts.length; d++) {
        if (!reader.isDeleted(d)) {
            Document doc = reader.document(d);

            String url = doc.get("url");

            if (url != null) {
                Integer rank = ranks.get(url);
                if (rank == null)
                    continue;

                float originalNorm = sim.lengthNorm(fieldName, termCounts[d]);
                byte encodedOrig = sim.encodeNorm(originalNorm);
                float rankedNorm = originalNorm * (float) (Math.log10(rank) + 1);
                byte encodedRank = sim.encodeNorm(rankedNorm);

                if (VERBOSE > 1)
                    System.out.println(fieldName + "\t" + d + "\t" + originalNorm + "\t" + encodedOrig + "\t"
                            + rankedNorm + "\t" + encodedRank);

                reader.setNorm(d, fieldName, encodedRank);
            }
        }
    }
}

From source file:org.codelibs.elasticsearch.index.mapper.MappedFieldType.java

License:Apache License

/**
 * @return a {FieldStats} instance that maps to the type of this
 * field or {@code null} if the provided index has no stats about the
 * current field//from w  w w . j a  v a 2s  .  c  o  m
 */
public FieldStats stats(IndexReader reader) throws IOException {
    int maxDoc = reader.maxDoc();
    FieldInfo fi = MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
    if (fi == null) {
        return null;
    }
    Terms terms = MultiFields.getTerms(reader, name());
    if (terms == null) {
        return new FieldStats.Text(maxDoc, 0, -1, -1, isSearchable(), isAggregatable());
    }
    FieldStats stats = new FieldStats.Text(maxDoc, terms.getDocCount(), terms.getSumDocFreq(),
            terms.getSumTotalTermFreq(), isSearchable(), isAggregatable(), terms.getMin(), terms.getMax());
    return stats;
}