Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.elasticsearch.common.lucene.index.FilterableTermsEnum.java

License:Apache License

public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter)
        throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }//from ww w .  j  av  a 2s  .  c om
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}

From source file:org.elasticsearch.common.lucene.search.TermFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet result = null;/*ww  w.  j  a va 2s. com*/
    TermDocs td = reader.termDocs();
    try {
        td.seek(term);
        if (td.next()) {
            result = new OpenBitSet(reader.maxDoc());
            result.fastSet(td.doc());
            while (td.next()) {
                result.fastSet(td.doc());
            }
        }
    } finally {
        td.close();
    }
    return result;
}

From source file:org.elasticsearch.index.cache.filter.support.FilterCacheValue.java

License:Apache License

public static DocSet cacheable(IndexReader reader, @Nullable LongsLAB longsLAB, DocIdSet set)
        throws IOException {
    if (set == null) {
        return DocSet.EMPTY_DOC_SET;
    }//from  ww w  . j a va  2s.co m
    if (set == DocIdSet.EMPTY_DOCIDSET) {
        return DocSet.EMPTY_DOC_SET;
    }

    DocIdSetIterator it = set.iterator();
    if (it == null) {
        return DocSet.EMPTY_DOC_SET;
    }
    int doc = it.nextDoc();
    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
        return DocSet.EMPTY_DOC_SET;
    }

    // we have a LAB, check if can be used...
    if (longsLAB == null) {
        return DocSets.cacheable(reader, set);
    }

    int numOfWords = OpenBitSet.bits2words(reader.maxDoc());
    LongsLAB.Allocation allocation = longsLAB.allocateLongs(numOfWords);
    if (allocation == null) {
        return DocSets.cacheable(reader, set);
    }
    // we have an allocation, use it to create SlicedOpenBitSet
    if (set instanceof OpenBitSet) {
        return new SlicedOpenBitSet(allocation.getData(), allocation.getOffset(), (OpenBitSet) set);
    } else if (set instanceof OpenBitDocSet) {
        return new SlicedOpenBitSet(allocation.getData(), allocation.getOffset(), ((OpenBitDocSet) set).set());
    } else {
        SlicedOpenBitSet slicedSet = new SlicedOpenBitSet(allocation.getData(), numOfWords,
                allocation.getOffset());
        slicedSet.fastSet(doc); // we already have an open iterator, so use it, and don't forget to set the initial one
        while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            slicedSet.fastSet(doc);
        }
        return slicedSet;
    }
}

From source file:org.elasticsearch.index.field.data.support.FieldDataLoader.java

License:Apache License

@SuppressWarnings({ "StringEquality" })
public static <T extends FieldData> T load(IndexReader reader, String field, TypeLoader<T> loader)
        throws IOException {

    loader.init();//  w w  w .  j a v a 2  s.  com

    field = StringHelper.intern(field);
    ArrayList<int[]> ordinals = new ArrayList<int[]>();
    ordinals.add(new int[reader.maxDoc()]);

    int t = 1; // current term number

    TermDocs termDocs = reader.termDocs();
    TermEnum termEnum = reader.terms(new Term(field));
    try {
        do {
            Term term = termEnum.term();
            if (term == null || term.field() != field)
                break;
            loader.collectTerm(term.text());
            termDocs.seek(termEnum);
            while (termDocs.next()) {
                int doc = termDocs.doc();
                boolean found = false;
                for (int i = 0; i < ordinals.size(); i++) {
                    int[] ordinal = ordinals.get(i);
                    if (ordinal[doc] == 0) {
                        // we found a spot, use it
                        ordinal[doc] = t;
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    // did not find one, increase by one and redo
                    int[] ordinal = new int[reader.maxDoc()];
                    ordinals.add(ordinal);
                    ordinal[doc] = t;
                }
            }
            t++;
        } while (termEnum.next());
    } catch (RuntimeException e) {
        if (e.getClass().getName().endsWith("StopFillCacheException")) {
            // all is well, in case numeric parsers are used.
        } else {
            throw e;
        }
    } finally {
        termDocs.close();
        termEnum.close();
    }

    if (ordinals.size() == 1) {
        return loader.buildSingleValue(field, ordinals.get(0));
    } else {
        int[][] nativeOrdinals = new int[ordinals.size()][];
        for (int i = 0; i < nativeOrdinals.length; i++) {
            nativeOrdinals[i] = ordinals.get(i);
        }
        return loader.buildMultiValue(field, nativeOrdinals);
    }
}

From source file:org.elasticsearch.index.fieldstats.FieldStatsProvider.java

License:Apache License

/**
 * @param field/*from w ww .ja v  a2 s  .com*/
 *            the name of the field to return {@link FieldStats} for.
 * @return a {@link FieldStats} object for the given field
 * @throws IOException
 *             if the field statistics cannot be read
 */
public <T extends Comparable<T>> FieldStats<T> get(String field) throws IOException {
    MappedFieldType mappedFieldType = mapperService.fullName(field);
    if (mappedFieldType != null) {
        IndexReader reader = searcher.reader();
        Terms terms = MultiFields.getTerms(reader, field);
        if (terms != null) {
            return mappedFieldType.stats(terms, reader.maxDoc());
        }
    }
    return null;
}

From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java

License:Apache License

public void testLoadQueries() throws Exception {
    Directory directory = newDirectory();
    IndexWriter indexWriter = new IndexWriter(directory,
            new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));

    boolean legacyFormat = randomBoolean();
    Version version = legacyFormat ? Version.V_2_0_0 : Version.CURRENT;
    IndexShard indexShard = mockIndexShard(version, legacyFormat);

    storeQuery("0", indexWriter, termQuery("field1", "value1"), true, legacyFormat);
    storeQuery("1", indexWriter, wildcardQuery("field1", "v*"), true, legacyFormat);
    storeQuery("2", indexWriter,
            boolQuery().must(termQuery("field1", "value1")).must(termQuery("field2", "value2")), true,
            legacyFormat);//  w  w  w . j  a  v a  2 s.  c  o m
    // dymmy docs should be skipped during loading:
    Document doc = new Document();
    doc.add(new StringField("dummy", "value", Field.Store.YES));
    indexWriter.addDocument(doc);
    storeQuery("4", indexWriter, termQuery("field2", "value2"), true, legacyFormat);
    // only documents that .percolator type should be loaded:
    storeQuery("5", indexWriter, termQuery("field2", "value2"), false, legacyFormat);
    storeQuery("6", indexWriter, termQuery("field3", "value3"), true, legacyFormat);
    indexWriter.forceMerge(1);

    // also include queries for percolator docs marked as deleted:
    indexWriter.deleteDocuments(new Term("id", "6"));
    indexWriter.close();

    ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0);
    IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(directory), shardId);
    assertThat(indexReader.leaves().size(), equalTo(1));
    assertThat(indexReader.numDeletedDocs(), equalTo(1));
    assertThat(indexReader.maxDoc(), equalTo(7));

    initialize("field1", "type=keyword", "field2", "type=keyword", "field3", "type=keyword");

    PercolatorQueryCache.QueriesLeaf leaf = cache.loadQueries(indexReader.leaves().get(0), indexShard);
    assertThat(leaf.queries.size(), equalTo(5));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("field1", "value1"))));
    assertThat(leaf.getQuery(1), equalTo(new WildcardQuery(new Term("field1", "v*"))));
    assertThat(leaf.getQuery(2),
            equalTo(new BooleanQuery.Builder()
                    .add(new TermQuery(new Term("field1", "value1")), BooleanClause.Occur.MUST)
                    .add(new TermQuery(new Term("field2", "value2")), BooleanClause.Occur.MUST).build()));
    assertThat(leaf.getQuery(4), equalTo(new TermQuery(new Term("field2", "value2"))));
    assertThat(leaf.getQuery(6), equalTo(new TermQuery(new Term("field3", "value3"))));

    indexReader.close();
    directory.close();
}

From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java

License:Apache License

public void testGetQueries() throws Exception {
    Directory directory = newDirectory();
    IndexWriter indexWriter = new IndexWriter(directory,
            new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));

    storeQuery("0", indexWriter, termQuery("a", "0"), true, false);
    storeQuery("1", indexWriter, termQuery("a", "1"), true, false);
    storeQuery("2", indexWriter, termQuery("a", "2"), true, false);
    indexWriter.flush();/*  w  w  w .  j av a 2  s . c  o  m*/
    storeQuery("3", indexWriter, termQuery("a", "3"), true, false);
    storeQuery("4", indexWriter, termQuery("a", "4"), true, false);
    storeQuery("5", indexWriter, termQuery("a", "5"), true, false);
    indexWriter.flush();
    storeQuery("6", indexWriter, termQuery("a", "6"), true, false);
    storeQuery("7", indexWriter, termQuery("a", "7"), true, false);
    storeQuery("8", indexWriter, termQuery("a", "8"), true, false);
    indexWriter.flush();
    indexWriter.close();

    ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0);
    IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(directory), shardId);
    assertThat(indexReader.leaves().size(), equalTo(3));
    assertThat(indexReader.maxDoc(), equalTo(9));

    initialize("a", "type=keyword");

    try {
        cache.getQueries(indexReader.leaves().get(0));
        fail("IllegalStateException expected");
    } catch (IllegalStateException e) {
        assertThat(e.getMessage(),
                equalTo("queries not loaded, queries should be have been preloaded during index warming..."));
    }

    IndexShard indexShard = mockIndexShard(Version.CURRENT, false);
    ThreadPool threadPool = mockThreadPool();
    IndexWarmer.Listener listener = cache.createListener(threadPool);
    listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader)));
    PercolatorQueryCacheStats stats = cache.getStats(shardId);
    assertThat(stats.getNumQueries(), equalTo(9L));

    PercolateQuery.QueryRegistry.Leaf leaf = cache.getQueries(indexReader.leaves().get(0));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0"))));
    assertThat(leaf.getQuery(1), equalTo(new TermQuery(new Term("a", "1"))));
    assertThat(leaf.getQuery(2), equalTo(new TermQuery(new Term("a", "2"))));

    leaf = cache.getQueries(indexReader.leaves().get(1));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "3"))));
    assertThat(leaf.getQuery(1), equalTo(new TermQuery(new Term("a", "4"))));
    assertThat(leaf.getQuery(2), equalTo(new TermQuery(new Term("a", "5"))));

    leaf = cache.getQueries(indexReader.leaves().get(2));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "6"))));
    assertThat(leaf.getQuery(1), equalTo(new TermQuery(new Term("a", "7"))));
    assertThat(leaf.getQuery(2), equalTo(new TermQuery(new Term("a", "8"))));

    indexReader.close();
    directory.close();
}

From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java

License:Apache License

public void testInvalidateEntries() throws Exception {
    Directory directory = newDirectory();
    IndexWriter indexWriter = new IndexWriter(directory,
            new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));

    storeQuery("0", indexWriter, termQuery("a", "0"), true, false);
    indexWriter.flush();//from   www  .  java2 s  .com
    storeQuery("1", indexWriter, termQuery("a", "1"), true, false);
    indexWriter.flush();
    storeQuery("2", indexWriter, termQuery("a", "2"), true, false);
    indexWriter.flush();

    ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0);
    IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId);
    assertThat(indexReader.leaves().size(), equalTo(3));
    assertThat(indexReader.maxDoc(), equalTo(3));

    initialize("a", "type=keyword");

    IndexShard indexShard = mockIndexShard(Version.CURRENT, false);
    ThreadPool threadPool = mockThreadPool();
    IndexWarmer.Listener listener = cache.createListener(threadPool);
    listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader)));
    assertThat(cache.getStats(shardId).getNumQueries(), equalTo(3L));

    PercolateQuery.QueryRegistry.Leaf leaf = cache.getQueries(indexReader.leaves().get(0));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0"))));

    leaf = cache.getQueries(indexReader.leaves().get(1));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "1"))));

    leaf = cache.getQueries(indexReader.leaves().get(2));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "2"))));

    // change merge policy, so that merges will actually happen:
    indexWriter.getConfig().setMergePolicy(new TieredMergePolicy());
    indexWriter.deleteDocuments(new Term("id", "1"));
    indexWriter.forceMergeDeletes();
    indexReader.close();
    indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId);
    assertThat(indexReader.leaves().size(), equalTo(2));
    assertThat(indexReader.maxDoc(), equalTo(2));
    listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader)));
    assertThat(cache.getStats(shardId).getNumQueries(), equalTo(2L));

    leaf = cache.getQueries(indexReader.leaves().get(0));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0"))));

    leaf = cache.getQueries(indexReader.leaves().get(1));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "2"))));

    indexWriter.forceMerge(1);
    indexReader.close();
    indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(indexWriter), shardId);
    assertThat(indexReader.leaves().size(), equalTo(1));
    assertThat(indexReader.maxDoc(), equalTo(2));
    listener.warmReader(indexShard, new Engine.Searcher("test", new IndexSearcher(indexReader)));
    assertThat(cache.getStats(shardId).getNumQueries(), equalTo(2L));

    leaf = cache.getQueries(indexReader.leaves().get(0));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("a", "0"))));
    assertThat(leaf.getQuery(1), equalTo(new TermQuery(new Term("a", "2"))));

    indexWriter.close();
    indexReader.close();
    directory.close();
}

From source file:org.elasticsearch.index.query.type.child.ChildCollector.java

License:Apache License

@Override
public void collect(int doc) throws IOException {
    BytesWrap parentId = typeCache.parentIdByDoc(doc);
    if (parentId == null) {
        return;//from  w w  w  .  ja v  a 2 s .c o  m
    }
    for (Tuple<IndexReader, IdReaderTypeCache> tuple : readers) {
        IndexReader indexReader = tuple.v1();
        IdReaderTypeCache idReaderTypeCache = tuple.v2();
        if (idReaderTypeCache == null) { // might be if we don't have that doc with that type in this reader
            continue;
        }
        int parentDocId = idReaderTypeCache.docById(parentId);
        if (parentDocId != -1 && !indexReader.isDeleted(parentDocId)) {
            OpenBitSet docIdSet = parentDocs().get(indexReader.getCoreCacheKey());
            if (docIdSet == null) {
                docIdSet = new OpenBitSet(indexReader.maxDoc());
                parentDocs.put(indexReader.getCoreCacheKey(), docIdSet);
            }
            docIdSet.fastSet(parentDocId);
            return;
        }
    }
}

From source file:org.elasticsearch.index.search.child.ChildrenConstantScoreQueryTests.java

License:Apache License

@Test
public void testSimple() throws Exception {
    Directory directory = newDirectory();
    RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);

    for (int parent = 1; parent <= 5; parent++) {
        Document document = new Document();
        document.add(new StringField(UidFieldMapper.NAME, Uid.createUid("parent", Integer.toString(parent)),
                Field.Store.NO));
        document.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));
        indexWriter.addDocument(document);

        for (int child = 1; child <= 3; child++) {
            document = new Document();
            document.add(new StringField(UidFieldMapper.NAME,
                    Uid.createUid("child", Integer.toString(parent * 3 + child)), Field.Store.NO));
            document.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
            document.add(new StringField(ParentFieldMapper.NAME,
                    Uid.createUid("parent", Integer.toString(parent)), Field.Store.NO));
            document.add(new StringField("field1", "value" + child, Field.Store.NO));
            indexWriter.addDocument(document);
        }//ww  w  .  ja v a 2s  .c  o  m
    }

    IndexReader indexReader = DirectoryReader.open(indexWriter.w, false);
    IndexSearcher searcher = new IndexSearcher(indexReader);

    TermQuery childQuery = new TermQuery(new Term("field1", "value" + (1 + random().nextInt(3))));
    TermFilter parentFilter = new TermFilter(new Term(TypeFieldMapper.NAME, "parent"));
    int shortCircuitParentDocSet = random().nextInt(5);
    ParentFieldMapper parentFieldMapper = SearchContext.current().mapperService().documentMapper("child")
            .parentFieldMapper();
    ParentChildIndexFieldData parentChildIndexFieldData = SearchContext.current().fieldData()
            .getForField(parentFieldMapper);
    ChildrenConstantScoreQuery query = new ChildrenConstantScoreQuery(parentChildIndexFieldData, childQuery,
            "parent", "child", parentFilter, shortCircuitParentDocSet, null);

    BitSetCollector collector = new BitSetCollector(indexReader.maxDoc());
    searcher.search(query, collector);
    FixedBitSet actualResult = collector.getResult();

    assertThat(actualResult.cardinality(), equalTo(5));

    indexWriter.close();
    indexReader.close();
    directory.close();
}