List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java
License:Open Source License
private void printStatistic() throws IOException { if (LOG.isDebugEnabled()) { IndexSearcher luceneSearcher = null; try {//from w w w . j a v a 2 s .c om searcherManager.maybeRefresh(); luceneSearcher = searcherManager.acquire(); IndexReader reader = luceneSearcher.getIndexReader(); LOG.debug( "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={} hasUncommittedChanges={} hasDeletions={}", reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(), luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(), luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(), luceneIndexWriter.hasDeletions()); } finally { searcherManager.release(luceneSearcher); } } }
From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java
License:Open Source License
private void logIndexStats() { try {//from ww w. jav a 2 s.c om IndexReader reader = null; try { reader = getIndexReader(); Document doc; int totalFields = 0; Set<String> ids = new HashSet<String>(); String[] idArray; int count = 0; for (int i = 0; i < reader.maxDoc(); i++) { if (isDeleted(reader, i)) continue; doc = readDocument(reader, i, null); totalFields += doc.getFields().size(); count++; idArray = doc.getValues("id"); for (String id : idArray) ids.add(id); } logger.info("Total documents in the index: " + reader.numDocs() + ", number of deletable documents in the index: " + reader.numDeletedDocs() + ", valid documents: " + count + ", total fields in all documents: " + totalFields + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs()); logger.info("Distinct ids in the index: " + ids.size()); } finally { ReaderMonitor toCloseCurrentMonitor = currentMonitor; currentMonitor = null; if (toCloseCurrentMonitor != null) { toCloseCurrentMonitor.closeWhenPossible(); } } } catch (IOException e) { logger.warn(e.getMessage(), e); } }
From source file:org.eclipse.smila.lucene.test.TestLuceneIndexService.java
License:Open Source License
/** * Add documents./*from w ww. j a va2 s . c om*/ * * @throws Exception * if any error occurs */ private void addDocument() throws Exception { IndexReader indexReader = IndexReader.open(_testIndexDir); assertEquals(0, indexReader.numDocs()); final Record[] records = AllTests.createRecords(5); assertEquals(DOCS_QUANTITY, records.length); final String[] recordIds = new String[DOCS_QUANTITY]; for (int i = 0; i < records.length; i++) { recordIds[i] = records[i].getId(); _blackboard.setRecord(records[i]); AllTests.setAnnotations(_blackboard, records[i].getId(), LuceneIndexService.ExecutionMode.ADD, TEST_INDEX_NAME); } final String[] result = _luceneIndexPipelet.process(_blackboard, recordIds); assertEquals(DOCS_QUANTITY, result.length); indexReader = indexReader.reopen(); assertEquals(DOCS_QUANTITY, indexReader.numDocs()); indexReader.close(); }
From source file:org.eclipse.smila.lucene.test.TestLuceneIndexService.java
License:Open Source License
/** * Delete documents.//from w ww . java2s. c o m * * @throws Exception * if any error occurs */ private void deleteDocument() throws Exception { IndexReader indexReader = IndexReader.open(_testIndexDir); assertEquals(DOCS_QUANTITY, indexReader.numDocs()); final Record[] records = AllTests.createRecords(5); assertEquals(DOCS_QUANTITY, records.length); final String[] recordIds = new String[DOCS_QUANTITY]; for (int i = 0; i < records.length; i++) { recordIds[i] = records[i].getId(); AllTests.setAnnotations(_blackboard, records[i].getId(), LuceneIndexService.ExecutionMode.DELETE, TEST_INDEX_NAME); } final String[] result = _luceneIndexPipelet.process(_blackboard, recordIds); assertEquals(DOCS_QUANTITY, result.length); indexReader = indexReader.reopen(); assertEquals(0, indexReader.numDocs()); indexReader.close(); }
From source file:org.eclipse.smila.search.lucene.index.access.CountTotalOperation.java
License:Open Source License
/** * {@inheritDoc}// ww w .ja v a 2s. c o m * * @see org.eclipse.smila.search.lucene.index.access.ISynchronizedOperation#process(java.lang.Object) */ public Integer process(final IndexReader object) throws IndexException { Integer docs; try { docs = object.numDocs(); } catch (final Exception e) { throw new IndexException("Can't determine number of documents in index", e); } return docs; }
From source file:org.elasticsearch.action.termwalker.TransportTermwalkerAction.java
License:Apache License
@Override protected ShardTermwalkerResponse shardOperation(ShardTermwalkerRequest request) throws ElasticSearchException { synchronized (mutex) { try {/* w ww . j a va 2 s .co m*/ Map<String, Object> response = new HashMap(); IndexService indexService = indicesService.indexServiceSafe(request.index()); InternalIndexShard indexShard = (InternalIndexShard) indexService.shardSafe(request.shardId()); Store store = indexShard.store(); IndexReader reader = indexShard.searcher().reader(); Integer termCount = 0; Long totalCount = 0L; List termList = new ArrayList(); Fields fields = MultiFields.getFields(reader); Terms terms = fields.terms("_all"); Boolean includeDF = request.includeDF(); Boolean includeTTF = request.includeTTF(); logger.info("termwalker:" + " shard: " + request.shardId() + " df: " + includeDF + " ttf: " + includeTTF); if (terms != null) { TermsEnum iterator = terms.iterator(null); for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { Integer df = iterator.docFreq(); Long ttf = iterator.totalTermFreq(); termCount += 1; totalCount += ttf; if ((includeDF || includeTTF) && df > 1) { Map tiMap = new HashMap(); tiMap.put("text", term.utf8ToString()); if (includeDF) { tiMap.put("df", df); } if (includeTTF) { tiMap.put("ttf", ttf); } termList.add(tiMap); } } } else { logger.error("Terms for _all is null."); } response.put("terms", termList); response.put("num_docs", reader.numDocs()); response.put("num_terms", termCount); response.put("total_terms", totalCount); return new ShardTermwalkerResponse(request.index(), request.shardId()).setResponse(response); } catch (IOException ex) { throw new ElasticSearchException(ex.getMessage(), ex); } } }
From source file:org.elasticsearch.index.cache.bloom.simple.SimpleBloomCache.java
License:Apache License
@Override public BloomFilter filter(IndexReader reader, String fieldName, boolean asyncLoad) { int currentNumDocs = reader.numDocs(); if (currentNumDocs == 0) { return BloomFilter.EMPTY; }/*from w w w. j a va2 s. c o m*/ ConcurrentMap<String, BloomFilterEntry> fieldCache = cache.get(reader.getCoreCacheKey()); if (fieldCache == null) { synchronized (creationMutex) { fieldCache = cache.get(reader.getCoreCacheKey()); if (fieldCache == null) { reader.addReaderFinishedListener(this); fieldCache = ConcurrentCollections.newConcurrentMap(); cache.put(reader.getCoreCacheKey(), fieldCache); } } } BloomFilterEntry filter = fieldCache.get(fieldName); if (filter == null) { synchronized (fieldCache) { filter = fieldCache.get(fieldName); if (filter == null) { filter = new BloomFilterEntry(currentNumDocs, BloomFilter.NONE); fieldCache.put(fieldName, filter); // now, do the async load of it... if (currentNumDocs < maxSize) { filter.loading.set(true); BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName); if (asyncLoad) { threadPool.cached().execute(loader); } else { loader.run(); filter = fieldCache.get(fieldName); } } } } } // if we too many deletes, we need to reload the bloom filter so it will be more effective if (filter.numDocs > 1000 && filter.numDocs < maxSize && (currentNumDocs / filter.numDocs) < 0.6) { if (filter.loading.compareAndSet(false, true)) { // do the async loading BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName); if (asyncLoad) { threadPool.cached().execute(loader); } else { loader.run(); filter = fieldCache.get(fieldName); } } } return filter.filter; }
From source file:org.elasticsearch.index.engine.robin.RobinEngine.java
License:Apache License
@Override public List<Segment> segments() { rwl.readLock().lock();/*from ww w . jav a 2s. c om*/ try { IndexWriter indexWriter = this.indexWriter; if (indexWriter == null) { throw new EngineClosedException(shardId, failedEngine); } Map<String, Segment> segments = new HashMap<String, Segment>(); // first, go over and compute the search ones... Searcher searcher = searcher(); try { IndexReader[] readers = searcher.reader().getSequentialSubReaders(); for (IndexReader reader : readers) { assert reader instanceof SegmentReader; SegmentInfo info = Lucene.getSegmentInfo((SegmentReader) reader); assert !segments.containsKey(info.name); Segment segment = new Segment(info.name); segment.search = true; segment.docCount = reader.numDocs(); segment.delDocCount = reader.numDeletedDocs(); try { segment.sizeInBytes = info.sizeInBytes(true); } catch (IOException e) { logger.trace("failed to get size for [{}]", e, info.name); } segments.put(info.name, segment); } } finally { searcher.release(); } // now, correlate or add the committed ones... if (lastCommittedSegmentInfos != null) { SegmentInfos infos = lastCommittedSegmentInfos; for (SegmentInfo info : infos) { Segment segment = segments.get(info.name); if (segment == null) { segment = new Segment(info.name); segment.search = false; segment.committed = true; segment.docCount = info.docCount; try { segment.delDocCount = indexWriter.numDeletedDocs(info); } catch (IOException e) { logger.trace("failed to get deleted docs for committed segment", e); } try { segment.sizeInBytes = info.sizeInBytes(true); } catch (IOException e) { logger.trace("failed to get size for [{}]", e, info.name); } segments.put(info.name, segment); } else { segment.committed = true; } } } Segment[] segmentsArr = segments.values().toArray(new Segment[segments.values().size()]); Arrays.sort(segmentsArr, new Comparator<Segment>() { @Override public int compare(Segment o1, Segment o2) { return (int) (o1.generation() - o2.generation()); } }); return Arrays.asList(segmentsArr); } finally { rwl.readLock().unlock(); } }
From source file:org.elasticsearch.search.aggregations.bucket.composite.SingleDimensionValuesSource.java
License:Apache License
/** * Returns true if a {@link SortedDocsProducer} should be used to optimize the execution. */// ww w. j ava 2s . c o m protected boolean checkIfSortedDocsIsApplicable(IndexReader reader, MappedFieldType fieldType) { if (fieldType == null || fieldType.indexOptions() == IndexOptions.NONE || // inverse of the natural order reverseMul == -1) { return false; } if (reader.hasDeletions() && (reader.numDocs() == 0 || (double) reader.numDocs() / (double) reader.maxDoc() < 0.5)) { // do not use the index if it has more than 50% of deleted docs return false; } return true; }
From source file:org.elasticsearch.search.aggregations.bucket.composite.SingleDimensionValuesSourceTests.java
License:Apache License
private static IndexReader mockIndexReader(int maxDoc, int numDocs) { IndexReader reader = mock(IndexReader.class); when(reader.hasDeletions()).thenReturn(maxDoc - numDocs > 0); when(reader.maxDoc()).thenReturn(maxDoc); when(reader.numDocs()).thenReturn(numDocs); return reader; }