Example usage for org.apache.lucene.index LeafReader maxDoc

List of usage examples for org.apache.lucene.index LeafReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.apache.solr.uninverting.DocTermOrds.java

License:Apache License

/** Returns a SortedSetDocValues view of this instance */
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
    if (isEmpty()) {
        return DocValues.emptySortedSet();
    } else {//from  w ww  .  j a  v a2s  .c om
        return new LegacySortedSetDocValuesWrapper(new Iterator(reader), reader.maxDoc());
    }
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

void setDocsWithField(LeafReader reader, String field, Bits docsWithField, Parser parser) {
    final int maxDoc = reader.maxDoc();
    final Bits bits;
    if (docsWithField == null) {
        bits = new Bits.MatchNoBits(maxDoc);
    } else if (docsWithField instanceof FixedBitSet) {
        final int numSet = ((FixedBitSet) docsWithField).cardinality();
        if (numSet >= maxDoc) {
            // The cardinality of the BitSet is maxDoc if all documents have a value.
            assert numSet == maxDoc;
            bits = new Bits.MatchAllBits(maxDoc);
        } else {/* ww  w.j  a v a  2s  . c o m*/
            bits = docsWithField;
        }
    } else {
        bits = docsWithField;
    }
    caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, parser), new BitsEntry(bits));
}

From source file:org.apache.solr.uninverting.FieldCacheImpl.java

License:Apache License

@Override
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException {
    final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
    if (fieldInfo == null) {
        // field does not exist or has no value
        return new Bits.MatchNoBits(reader.maxDoc());
    }//from ww w .  j  a  va  2 s . c  o m

    if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
        // doc values case
    } else if (parser instanceof PointParser) {
        // points case
    } else {
        // postings case
        if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
            return new Bits.MatchNoBits(reader.maxDoc());
        }
    }
    BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader,
            new CacheKey(field, parser));
    return bitsEntry.bits;
}

From source file:org.apache.solr.uninverting.TestDocTermOrds.java

License:Apache License

private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        throws Exception {

    final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "field", prefixRef, Integer.MAX_VALUE,
            TestUtil.nextInt(random(), 2, 10));

    final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER);
    /*/*from  w  w  w .ja  v a 2 s  .com*/
      for(int docID=0;docID<subR.maxDoc();docID++) {
      System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
      }
    */

    if (VERBOSE) {
        System.out.println("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.utf8ToString()));
        System.out.println("TEST: all TERMS:");
        TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
        int ord = 0;
        while (allTE.next() != null) {
            System.out.println("  ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
        }
    }

    //final TermsEnum te = subR.fields().terms("field").iterator();
    final TermsEnum te = dto.getOrdTermsEnum(r);
    if (dto.numTerms() == 0) {
        if (prefixRef == null) {
            assertNull(MultiFields.getTerms(r, "field"));
        } else {
            Terms terms = MultiFields.getTerms(r, "field");
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
                if (result != TermsEnum.SeekStatus.END) {
                    assertFalse(
                            "term=" + termsEnum.term().utf8ToString() + " matches prefix="
                                    + prefixRef.utf8ToString(),
                            StringHelper.startsWith(termsEnum.term(), prefixRef));
                } else {
                    // ok
                }
            } else {
                // ok
            }
        }
        return;
    }

    if (VERBOSE) {
        System.out.println("TEST: TERMS:");
        te.seekExact(0);
        while (true) {
            System.out.println("  ord=" + te.ord() + " term=" + te.term().utf8ToString());
            if (te.next() == null) {
                break;
            }
        }
    }

    SortedSetDocValues iter = dto.iterator(r);
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        assertEquals(docID, docIDToID.nextDoc());
        if (docID > iter.docID()) {
            iter.nextDoc();
        }
        if (docID < iter.docID()) {
            int[] answers = idToOrds[(int) docIDToID.longValue()];
            assertEquals(0, answers.length);
            continue;
        }

        if (VERBOSE) {
            System.out.println(
                    "TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.longValue() + ")");
        }
        final int[] answers = idToOrds[(int) docIDToID.longValue()];
        int upto = 0;
        long ord;
        while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
            te.seekExact(ord);
            final BytesRef expected = termsArray[answers[upto++]];
            if (VERBOSE) {
                System.out.println("  exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
            }
            assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord="
                    + ord, expected, te.term());
        }
        assertEquals(answers.length, upto);
    }
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
    Directory dir = newDirectory();/*from  www.  ja  v  a 2 s.  co  m*/
    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    Field indexedField = new StringField("indexed", "", Field.Store.NO);
    Field dvField = new SortedDocValuesField("dv", new BytesRef());
    doc.add(idField);
    doc.add(indexedField);
    doc.add(dvField);

    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        idField.setStringValue(Integer.toString(i));
        final int length;
        if (minLength == maxLength) {
            length = minLength; // fixed length
        } else {
            length = TestUtil.nextInt(random(), minLength, maxLength);
        }
        String value = TestUtil.randomSimpleString(random(), length);
        indexedField.setStringValue(value);
        dvField.setBytesValue(new BytesRef(value));
        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }

    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    writer.close();

    // compare
    DirectoryReader ir = DirectoryReader.open(dir);
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
        SortedDocValues actual = r.getSortedDocValues("dv");
        assertEquals(r.maxDoc(), expected, actual);
    }
    ir.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java

License:Apache License

private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
    Directory dir = newDirectory();//ww  w.j a va2s  . c om
    IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);

    // index some docs
    int numDocs = atLeast(300);
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
        doc.add(idField);
        final int length = TestUtil.nextInt(random(), minLength, maxLength);
        int numValues = random().nextInt(17);
        // create a random list of strings
        List<String> values = new ArrayList<>();
        for (int v = 0; v < numValues; v++) {
            values.add(TestUtil.randomSimpleString(random(), minLength, length));
        }

        // add in any order to the indexed field
        ArrayList<String> unordered = new ArrayList<>(values);
        Collections.shuffle(unordered, random());
        for (String v : values) {
            doc.add(newStringField("indexed", v, Field.Store.NO));
        }

        // add in any order to the dv field
        ArrayList<String> unordered2 = new ArrayList<>(values);
        Collections.shuffle(unordered2, random());
        for (String v : unordered2) {
            doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
        }

        writer.addDocument(doc);
        if (random().nextInt(31) == 0) {
            writer.commit();
        }
    }

    // delete some docs
    int numDeletions = random().nextInt(numDocs / 10);
    for (int i = 0; i < numDeletions; i++) {
        int id = random().nextInt(numDocs);
        writer.deleteDocuments(new Term("id", Integer.toString(id)));
    }

    // compare per-segment
    DirectoryReader ir = writer.getReader();
    for (LeafReaderContext context : ir.leaves()) {
        LeafReader r = context.reader();
        SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
        SortedSetDocValues actual = r.getSortedSetDocValues("dv");
        assertEquals(r.maxDoc(), expected, actual);
    }
    ir.close();

    writer.forceMerge(1);

    // now compare again after the merge
    ir = writer.getReader();
    LeafReader ar = getOnlyLeafReader(ir);
    SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
    SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
    assertEquals(ir.maxDoc(), expected, actual);
    ir.close();

    writer.close();
    dir.close();
}

From source file:org.apache.solr.uninverting.TestFieldCacheWithThreads.java

License:Apache License

public void test2() throws Exception {
    Random random = random();//from  w  ww .j  av a2 s.  co  m
    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<>();
    if (VERBOSE) {
        System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
    }
    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<>();

    // TODO: deletions
    while (numDocs < NUM_DOCS) {
        final String s;
        if (random.nextBoolean()) {
            s = TestUtil.randomSimpleString(random);
        } else {
            s = TestUtil.randomUnicodeString(random);
        }
        final BytesRef br = new BytesRef(s);

        if (!allowDups) {
            if (seen.contains(s)) {
                continue;
            }
            seen.add(s);
        }

        if (VERBOSE) {
            System.out.println("  " + numDocs + ": s=" + s);
        }

        final Document doc = new Document();
        doc.add(new SortedDocValuesField("stringdv", br));
        doc.add(new NumericDocValuesField("id", numDocs));
        docValues.add(br);
        writer.addDocument(doc);
        numDocs++;

        if (random.nextInt(40) == 17) {
            // force flush
            writer.getReader().close();
        }
    }

    writer.forceMerge(1);
    final DirectoryReader r = writer.getReader();
    writer.close();

    final LeafReader sr = getOnlyLeafReader(r);

    final long END_TIME = System.nanoTime()
            + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);

    final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
    Thread[] threads = new Thread[NUM_THREADS];
    for (int thread = 0; thread < NUM_THREADS; thread++) {
        threads[thread] = new Thread() {
            @Override
            public void run() {
                Random random = random();
                final SortedDocValues stringDVDirect;
                final NumericDocValues docIDToID;
                try {
                    stringDVDirect = sr.getSortedDocValues("stringdv");
                    docIDToID = sr.getNumericDocValues("id");
                    assertNotNull(stringDVDirect);
                } catch (IOException ioe) {
                    throw new RuntimeException(ioe);
                }
                int[] docIDToIDArray = new int[sr.maxDoc()];
                for (int i = 0; i < sr.maxDoc(); i++) {
                    try {
                        assertEquals(i, docIDToID.nextDoc());
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                    try {
                        docIDToIDArray[i] = (int) docIDToID.longValue();
                    } catch (IOException ioe) {
                        throw new RuntimeException(ioe);
                    }
                }
                while (System.nanoTime() < END_TIME) {
                    for (int iter = 0; iter < 100; iter++) {
                        final int docID = random.nextInt(sr.maxDoc());
                        try {
                            SortedDocValues dvs = sr.getSortedDocValues("stringdv");
                            assertEquals(docID, dvs.advance(docID));
                            assertEquals(docValues.get(docIDToIDArray[docID]), dvs.binaryValue());
                        } catch (IOException ioe) {
                            throw new RuntimeException(ioe);
                        }
                    }
                }
            }
        };
        threads[thread].start();
    }

    for (Thread thread : threads) {
        thread.join();
    }

    r.close();
    dir.close();
}

From source file:org.codelibs.elasticsearch.search.slice.TermsSliceQuery.java

License:Apache License

/**
 * Returns a DocIdSet per segments containing the matching docs for the specified slice.
 *//*from   ww  w. j  a  v a  2  s  .co m*/
private DocIdSet build(LeafReader reader) throws IOException {
    final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
    final Terms terms = reader.terms(getField());
    final TermsEnum te = terms.iterator();
    PostingsEnum docsEnum = null;
    for (BytesRef term = te.next(); term != null; term = te.next()) {
        int hashCode = term.hashCode();
        if (contains(hashCode)) {
            docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
            builder.add(docsEnum);
        }
    }
    return builder.build();
}

From source file:org.elasticsearch.index.fielddata.plain.GeoPointArrayIndexFieldData.java

License:Apache License

@Override
public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception {
    LeafReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    AtomicGeoPointFieldData data = null;
    // TODO: Use an actual estimator to estimate before loading.
    NonEstimatingEstimator estimator = new NonEstimatingEstimator(
            breakerService.getBreaker(CircuitBreaker.FIELDDATA));
    if (terms == null) {
        data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc());
        estimator.afterLoad(null, data.ramBytesUsed());
        return data;
    }//from   w  w  w  .j  a  v a 2s  .c  o  m
    return (Version.indexCreated(indexSettings).before(Version.V_2_2_0))
            ? loadLegacyFieldData(reader, estimator, terms, data)
            : loadFieldData22(reader, estimator, terms, data);
}

From source file:org.elasticsearch.index.fielddata.plain.GeoPointArrayIndexFieldData.java

License:Apache License

/**
 * long encoded geopoint field data/*from w  ww .  j av  a2s . c o  m*/
 */
private AtomicGeoPointFieldData loadFieldData22(LeafReader reader, NonEstimatingEstimator estimator,
        Terms terms, AtomicGeoPointFieldData data) throws Exception {
    LongArray indexedPoints = BigArrays.NON_RECYCLING_INSTANCE.newLongArray(128);
    final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat(
            "acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
    boolean success = false;
    try (OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio)) {
        final TermsEnum termsEnum;
        final GeoPointField.TermEncoding termEncoding;
        if (Version.indexCreated(indexSettings).onOrAfter(Version.V_2_3_0)) {
            termEncoding = GeoPointField.TermEncoding.PREFIX;
            termsEnum = OrdinalsBuilder.wrapGeoPointTerms(terms.iterator());
        } else {
            termEncoding = GeoPointField.TermEncoding.NUMERIC;
            termsEnum = OrdinalsBuilder.wrapNumeric64Bit(terms.iterator());
        }

        final GeoPointTermsEnum iter = new GeoPointTermsEnum(builder.buildFromTerms(termsEnum), termEncoding);
        Long hashedPoint;
        long numTerms = 0;
        while ((hashedPoint = iter.next()) != null) {
            indexedPoints = BigArrays.NON_RECYCLING_INSTANCE.resize(indexedPoints, numTerms + 1);
            indexedPoints.set(numTerms++, hashedPoint);
        }
        indexedPoints = BigArrays.NON_RECYCLING_INSTANCE.resize(indexedPoints, numTerms);

        Ordinals build = builder.build(fieldDataType.getSettings());
        RandomAccessOrds ordinals = build.ordinals();
        if (!(FieldData.isMultiValued(ordinals) || CommonSettings
                .getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS)) {
            int maxDoc = reader.maxDoc();
            LongArray sIndexedPoint = BigArrays.NON_RECYCLING_INSTANCE.newLongArray(reader.maxDoc());
            for (int i = 0; i < maxDoc; ++i) {
                ordinals.setDocument(i);
                long nativeOrdinal = ordinals.nextOrd();
                if (nativeOrdinal != RandomAccessOrds.NO_MORE_ORDS) {
                    sIndexedPoint.set(i, indexedPoints.get(nativeOrdinal));
                }
            }
            BitSet set = builder.buildDocsWithValuesSet();
            data = new GeoPointArrayAtomicFieldData.Single(sIndexedPoint, set);
        } else {
            data = new GeoPointArrayAtomicFieldData.WithOrdinals(indexedPoints, build, reader.maxDoc());
        }
        success = true;
        return data;
    } finally {
        if (success) {
            estimator.afterLoad(null, data.ramBytesUsed());
        }
    }
}