Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java

License:Open Source License

private void printStatistic() throws IOException {
    if (LOG.isDebugEnabled()) {
        IndexSearcher luceneSearcher = null;
        try {//from  w w  w .  j a v  a 2  s .c om
            searcherManager.maybeRefresh();
            luceneSearcher = searcherManager.acquire();
            IndexReader reader = luceneSearcher.getIndexReader();
            LOG.debug(
                    "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={}  hasUncommittedChanges={} hasDeletions={}",
                    reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(),
                    luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(),
                    luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(),
                    luceneIndexWriter.hasDeletions());
        } finally {
            searcherManager.release(luceneSearcher);
        }
    }
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

private void logIndexStats() {
    try {//from   ww  w. jav a 2 s.c  om
        IndexReader reader = null;
        try {
            reader = getIndexReader();

            Document doc;
            int totalFields = 0;

            Set<String> ids = new HashSet<String>();
            String[] idArray;
            int count = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (isDeleted(reader, i))
                    continue;
                doc = readDocument(reader, i, null);
                totalFields += doc.getFields().size();
                count++;
                idArray = doc.getValues("id");
                for (String id : idArray)
                    ids.add(id);

            }

            logger.info("Total documents in the index: " + reader.numDocs()
                    + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                    + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                    + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
            logger.info("Distinct ids in the index: " + ids.size());

        } finally {
            ReaderMonitor toCloseCurrentMonitor = currentMonitor;
            currentMonitor = null;
            if (toCloseCurrentMonitor != null) {
                toCloseCurrentMonitor.closeWhenPossible();
            }
        }
    } catch (IOException e) {
        logger.warn(e.getMessage(), e);
    }

}

From source file:org.eclipse.smila.lucene.test.TestLuceneIndexService.java

License:Open Source License

/**
 * Add documents./*from  w ww. j a  va2 s .  c  om*/
 * 
 * @throws Exception
 *           if any error occurs
 */
private void addDocument() throws Exception {
    IndexReader indexReader = IndexReader.open(_testIndexDir);
    assertEquals(0, indexReader.numDocs());

    final Record[] records = AllTests.createRecords(5);
    assertEquals(DOCS_QUANTITY, records.length);
    final String[] recordIds = new String[DOCS_QUANTITY];
    for (int i = 0; i < records.length; i++) {
        recordIds[i] = records[i].getId();
        _blackboard.setRecord(records[i]);
        AllTests.setAnnotations(_blackboard, records[i].getId(), LuceneIndexService.ExecutionMode.ADD,
                TEST_INDEX_NAME);
    }
    final String[] result = _luceneIndexPipelet.process(_blackboard, recordIds);
    assertEquals(DOCS_QUANTITY, result.length);

    indexReader = indexReader.reopen();
    assertEquals(DOCS_QUANTITY, indexReader.numDocs());
    indexReader.close();
}

From source file:org.eclipse.smila.lucene.test.TestLuceneIndexService.java

License:Open Source License

/**
 * Delete documents.//from  w  ww  . java2s. c  o m
 * 
 * @throws Exception
 *           if any error occurs
 */
private void deleteDocument() throws Exception {
    IndexReader indexReader = IndexReader.open(_testIndexDir);
    assertEquals(DOCS_QUANTITY, indexReader.numDocs());

    final Record[] records = AllTests.createRecords(5);
    assertEquals(DOCS_QUANTITY, records.length);
    final String[] recordIds = new String[DOCS_QUANTITY];
    for (int i = 0; i < records.length; i++) {
        recordIds[i] = records[i].getId();
        AllTests.setAnnotations(_blackboard, records[i].getId(), LuceneIndexService.ExecutionMode.DELETE,
                TEST_INDEX_NAME);
    }
    final String[] result = _luceneIndexPipelet.process(_blackboard, recordIds);
    assertEquals(DOCS_QUANTITY, result.length);
    indexReader = indexReader.reopen();
    assertEquals(0, indexReader.numDocs());
    indexReader.close();
}

From source file:org.eclipse.smila.search.lucene.index.access.CountTotalOperation.java

License:Open Source License

/**
 * {@inheritDoc}//  ww  w  .ja v a  2s. c o  m
 * 
 * @see org.eclipse.smila.search.lucene.index.access.ISynchronizedOperation#process(java.lang.Object)
 */
public Integer process(final IndexReader object) throws IndexException {
    Integer docs;
    try {
        docs = object.numDocs();
    } catch (final Exception e) {
        throw new IndexException("Can't determine number of documents in index", e);
    }
    return docs;
}

From source file:org.elasticsearch.action.termwalker.TransportTermwalkerAction.java

License:Apache License

@Override
protected ShardTermwalkerResponse shardOperation(ShardTermwalkerRequest request) throws ElasticSearchException {
    synchronized (mutex) {
        try {/* w ww  . j  a  va  2 s .co  m*/
            Map<String, Object> response = new HashMap();
            IndexService indexService = indicesService.indexServiceSafe(request.index());
            InternalIndexShard indexShard = (InternalIndexShard) indexService.shardSafe(request.shardId());
            Store store = indexShard.store();
            IndexReader reader = indexShard.searcher().reader();

            Integer termCount = 0;
            Long totalCount = 0L;
            List termList = new ArrayList();
            Fields fields = MultiFields.getFields(reader);
            Terms terms = fields.terms("_all");

            Boolean includeDF = request.includeDF();
            Boolean includeTTF = request.includeTTF();

            logger.info("termwalker:" + " shard: " + request.shardId() + " df: " + includeDF + " ttf: "
                    + includeTTF);

            if (terms != null) {
                TermsEnum iterator = terms.iterator(null);

                for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
                    Integer df = iterator.docFreq();
                    Long ttf = iterator.totalTermFreq();

                    termCount += 1;
                    totalCount += ttf;

                    if ((includeDF || includeTTF) && df > 1) {
                        Map tiMap = new HashMap();
                        tiMap.put("text", term.utf8ToString());
                        if (includeDF) {
                            tiMap.put("df", df);
                        }
                        if (includeTTF) {
                            tiMap.put("ttf", ttf);
                        }
                        termList.add(tiMap);
                    }
                }
            } else {
                logger.error("Terms for _all is null.");
            }
            response.put("terms", termList);
            response.put("num_docs", reader.numDocs());
            response.put("num_terms", termCount);
            response.put("total_terms", totalCount);

            return new ShardTermwalkerResponse(request.index(), request.shardId()).setResponse(response);
        } catch (IOException ex) {
            throw new ElasticSearchException(ex.getMessage(), ex);
        }
    }
}

From source file:org.elasticsearch.index.cache.bloom.simple.SimpleBloomCache.java

License:Apache License

@Override
public BloomFilter filter(IndexReader reader, String fieldName, boolean asyncLoad) {
    int currentNumDocs = reader.numDocs();
    if (currentNumDocs == 0) {
        return BloomFilter.EMPTY;
    }/*from w w w.  j  a va2 s. c o m*/
    ConcurrentMap<String, BloomFilterEntry> fieldCache = cache.get(reader.getCoreCacheKey());
    if (fieldCache == null) {
        synchronized (creationMutex) {
            fieldCache = cache.get(reader.getCoreCacheKey());
            if (fieldCache == null) {
                reader.addReaderFinishedListener(this);
                fieldCache = ConcurrentCollections.newConcurrentMap();
                cache.put(reader.getCoreCacheKey(), fieldCache);
            }
        }
    }
    BloomFilterEntry filter = fieldCache.get(fieldName);
    if (filter == null) {
        synchronized (fieldCache) {
            filter = fieldCache.get(fieldName);
            if (filter == null) {
                filter = new BloomFilterEntry(currentNumDocs, BloomFilter.NONE);
                fieldCache.put(fieldName, filter);
                // now, do the async load of it...
                if (currentNumDocs < maxSize) {
                    filter.loading.set(true);
                    BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName);
                    if (asyncLoad) {
                        threadPool.cached().execute(loader);
                    } else {
                        loader.run();
                        filter = fieldCache.get(fieldName);
                    }
                }
            }
        }
    }
    // if we too many deletes, we need to reload the bloom filter so it will be more effective
    if (filter.numDocs > 1000 && filter.numDocs < maxSize && (currentNumDocs / filter.numDocs) < 0.6) {
        if (filter.loading.compareAndSet(false, true)) {
            // do the async loading
            BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName);
            if (asyncLoad) {
                threadPool.cached().execute(loader);
            } else {
                loader.run();
                filter = fieldCache.get(fieldName);
            }
        }
    }
    return filter.filter;
}

From source file:org.elasticsearch.index.engine.robin.RobinEngine.java

License:Apache License

@Override
public List<Segment> segments() {
    rwl.readLock().lock();/*from ww w  .  jav  a  2s.  c  om*/
    try {
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter == null) {
            throw new EngineClosedException(shardId, failedEngine);
        }
        Map<String, Segment> segments = new HashMap<String, Segment>();

        // first, go over and compute the search ones...
        Searcher searcher = searcher();
        try {
            IndexReader[] readers = searcher.reader().getSequentialSubReaders();
            for (IndexReader reader : readers) {
                assert reader instanceof SegmentReader;
                SegmentInfo info = Lucene.getSegmentInfo((SegmentReader) reader);
                assert !segments.containsKey(info.name);
                Segment segment = new Segment(info.name);
                segment.search = true;
                segment.docCount = reader.numDocs();
                segment.delDocCount = reader.numDeletedDocs();
                try {
                    segment.sizeInBytes = info.sizeInBytes(true);
                } catch (IOException e) {
                    logger.trace("failed to get size for [{}]", e, info.name);
                }
                segments.put(info.name, segment);
            }
        } finally {
            searcher.release();
        }

        // now, correlate or add the committed ones...
        if (lastCommittedSegmentInfos != null) {
            SegmentInfos infos = lastCommittedSegmentInfos;
            for (SegmentInfo info : infos) {
                Segment segment = segments.get(info.name);
                if (segment == null) {
                    segment = new Segment(info.name);
                    segment.search = false;
                    segment.committed = true;
                    segment.docCount = info.docCount;
                    try {
                        segment.delDocCount = indexWriter.numDeletedDocs(info);
                    } catch (IOException e) {
                        logger.trace("failed to get deleted docs for committed segment", e);
                    }
                    try {
                        segment.sizeInBytes = info.sizeInBytes(true);
                    } catch (IOException e) {
                        logger.trace("failed to get size for [{}]", e, info.name);
                    }
                    segments.put(info.name, segment);
                } else {
                    segment.committed = true;
                }
            }
        }

        Segment[] segmentsArr = segments.values().toArray(new Segment[segments.values().size()]);
        Arrays.sort(segmentsArr, new Comparator<Segment>() {
            @Override
            public int compare(Segment o1, Segment o2) {
                return (int) (o1.generation() - o2.generation());
            }
        });

        return Arrays.asList(segmentsArr);
    } finally {
        rwl.readLock().unlock();
    }
}

From source file:org.elasticsearch.search.aggregations.bucket.composite.SingleDimensionValuesSource.java

License:Apache License

/**
 * Returns true if a {@link SortedDocsProducer} should be used to optimize the execution.
 *///  ww  w.  j ava  2s  . c o m
protected boolean checkIfSortedDocsIsApplicable(IndexReader reader, MappedFieldType fieldType) {
    if (fieldType == null || fieldType.indexOptions() == IndexOptions.NONE ||
    // inverse of the natural order
            reverseMul == -1) {
        return false;
    }

    if (reader.hasDeletions()
            && (reader.numDocs() == 0 || (double) reader.numDocs() / (double) reader.maxDoc() < 0.5)) {
        // do not use the index if it has more than 50% of deleted docs
        return false;
    }
    return true;
}

From source file:org.elasticsearch.search.aggregations.bucket.composite.SingleDimensionValuesSourceTests.java

License:Apache License

private static IndexReader mockIndexReader(int maxDoc, int numDocs) {
    IndexReader reader = mock(IndexReader.class);
    when(reader.hasDeletions()).thenReturn(maxDoc - numDocs > 0);
    when(reader.maxDoc()).thenReturn(maxDoc);
    when(reader.numDocs()).thenReturn(numDocs);
    return reader;
}