List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.dbpedia.spotlight.lucene.similarity.TermCache.java
License:Apache License
public OpenBitSet createDocIdSet(IndexReader reader, Term term) throws IOException { OpenBitSet result = new OpenBitSet(reader.maxDoc()); TermDocs td = reader.termDocs();//from ww w.j a v a 2 s . co m int c = 0; td.seek(term); while (td.next()) { c++; result.set(td.doc()); } return result; }
From source file:org.dbpedia.spotlight.lucene.similarity.TermsFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { OpenBitSet result = new OpenBitSet(reader.maxDoc()); TermDocs td = reader.termDocs();/*from w ww. ja v a2s . co m*/ try { int c = 0; for (Iterator<Term> iter = terms.iterator(); iter.hasNext();) { Term term = iter.next(); td.seek(term); while (td.next()) { c++; result.set(td.doc()); } } } finally { td.close(); } return result; }
From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java
License:Open Source License
public void renameInode(ImmutableInodeHandle fromInode, ImmutableInodeHandle toInode) throws IndexException { IndexSearcher iSearcher = null;//from w w w. java 2s . co m IndexReader iReader = null; try { Term fromInodeTerm = makeFullPathTermFromInode(fromInode); synchronized (INDEX_DOCUMENT) { _iWriter.updateDocument(fromInodeTerm, makeDocumentFromInode(toInode)); } if (toInode.isDirectory()) { PrefixQuery prefixQuery = new PrefixQuery(fromInodeTerm); iReader = IndexReader.open(_iWriter, true); iSearcher = new IndexSearcher(iReader); final BitSet bits = new BitSet(iReader.maxDoc()); iSearcher.search(prefixQuery, new Collector() { private int docBase; // ignore scorer public void setScorer(Scorer scorer) { } // accept docs out of order (for a BitSet it doesn't matter) public boolean acceptsDocsOutOfOrder() { return true; } public void collect(int doc) { bits.set(doc + docBase); } public void setNextReader(IndexReader reader, int docBase) { this.docBase = docBase; } }); for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { Document doc = iSearcher.doc(i, SIMPLE_FIELD_SELECTOR); String oldPath = doc.getFieldable(FIELD_FULL_PATH.name()).stringValue(); String newPath = toInode.getPath() + oldPath.substring(fromInode.getPath().length()); doc.removeField(FIELD_FULL_PATH.name()); doc.removeField(FIELD_PARENT_PATH.name()); synchronized (INDEX_DOCUMENT) { FIELD_FULL_PATH.setValue(newPath); if (newPath.equals(VirtualFileSystem.separator)) { FIELD_PARENT_PATH.setValue(""); } else { FIELD_PARENT_PATH .setValue(VirtualFileSystem.stripLast(newPath) + VirtualFileSystem.separator); } doc.add(FIELD_FULL_PATH); doc.add(FIELD_PARENT_PATH); _iWriter.updateDocument(makeFullPathTermFromString(oldPath), doc); } } } } catch (CorruptIndexException e) { throw new IndexException( "Unable to rename " + fromInode.getPath() + " to " + toInode.getPath() + " in the index", e); } catch (IOException e) { throw new IndexException( "Unable to rename " + fromInode.getPath() + " to " + toInode.getPath() + " in the index", e); } finally { if (iSearcher != null) { try { iSearcher.close(); } catch (IOException e) { logger.error("IOException closing IndexSearcher", e); } } if (iReader != null) { try { iReader.close(); } catch (IOException e) { logger.error("IOException closing IndexReader obtained from the IndexWriter", e); } } } }
From source file:org.dyndns.andreasbaumann.LuceneAnalyzer.java
License:Open Source License
private static void printGlobalInfo(IndexReader indexReader, boolean printHeaders, boolean isSolr, SolrIndexSearcher solrSearch) throws IOException { if (printHeaders) { System.out.println("Global Information:"); System.out.println("==================="); }// www . jav a2 s. c o m System.out.println("\tnumber of documents: " + indexReader.numDocs()); // we should get the number of features differently, this is inefficient, but Lucene // has no notion of global statistics (because the default weighting schema doesn't // make use of it!) int nofFeatures = 0; int nofTokens = 0; TermEnum terms = indexReader.terms(); while (terms.next()) { Term term = terms.term(); int df = terms.docFreq(); nofFeatures++; nofTokens += df; } System.out.println("\ttotal number of features: " + nofFeatures); System.out.println("\ttotal number of tokens: " + nofTokens); System.out.println("\tversion: " + indexReader.getVersion()); System.out.println("\tstill current: " + indexReader.isCurrent()); //TODO: we don't get segment information! //System.out.println( "is optimized:" + segmentInfos.size( ) == 1 && !indexReader.hasDeletions( ) ); System.out.println("\tmaximal document number: " + indexReader.maxDoc()); System.out.println("\thas deletions: " + indexReader.hasDeletions()); if (isSolr) { System.out.println("\tSolr version: " + solrSearch.getVersion()); } System.out.println(""); }
From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java
License:Open Source License
private void printStatistic() throws IOException { if (LOG.isDebugEnabled()) { IndexSearcher luceneSearcher = null; try {/*from w ww . ja v a 2 s . c o m*/ searcherManager.maybeRefresh(); luceneSearcher = searcherManager.acquire(); IndexReader reader = luceneSearcher.getIndexReader(); LOG.debug( "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={} hasUncommittedChanges={} hasDeletions={}", reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(), luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(), luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(), luceneIndexWriter.hasDeletions()); } finally { searcherManager.release(luceneSearcher); } } }
From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java
License:Open Source License
private void logIndexStats() { try {/*ww w .j a v a2 s . co m*/ IndexReader reader = null; try { reader = getIndexReader(); Document doc; int totalFields = 0; Set<String> ids = new HashSet<String>(); String[] idArray; int count = 0; for (int i = 0; i < reader.maxDoc(); i++) { if (isDeleted(reader, i)) continue; doc = readDocument(reader, i, null); totalFields += doc.getFields().size(); count++; idArray = doc.getValues("id"); for (String id : idArray) ids.add(id); } logger.info("Total documents in the index: " + reader.numDocs() + ", number of deletable documents in the index: " + reader.numDeletedDocs() + ", valid documents: " + count + ", total fields in all documents: " + totalFields + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs()); logger.info("Distinct ids in the index: " + ids.size()); } finally { ReaderMonitor toCloseCurrentMonitor = currentMonitor; currentMonitor = null; if (toCloseCurrentMonitor != null) { toCloseCurrentMonitor.closeWhenPossible(); } } } catch (IOException e) { logger.warn(e.getMessage(), e); } }
From source file:org.elasticsearch.action.fieldstats.TransportFieldStatsTransportAction.java
License:Apache License
@Override protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) { ShardId shardId = request.shardId(); Map<String, FieldStats> fieldStats = new HashMap<>(); IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex()); MapperService mapperService = indexServices.mapperService(); IndexShard shard = indexServices.shardSafe(shardId.id()); try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) { for (String field : request.getFields()) { MappedFieldType fieldType = mapperService.fullName(field); if (fieldType != null) { IndexReader reader = searcher.reader(); Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { fieldStats.put(field, fieldType.stats(terms, reader.maxDoc())); }//from w w w. j av a 2 s .c om } else { throw new IllegalArgumentException("field [" + field + "] doesn't exist"); } } } catch (IOException e) { throw ExceptionsHelper.convertToElastic(e); } return new FieldStatsShardResponse(shardId, fieldStats); }
From source file:org.elasticsearch.common.lucene.docset.DocIdSetCollector.java
License:Apache License
public DocIdSetCollector(Collector collector, IndexReader reader) { this.collector = collector; this.docIdSet = new OpenBitSetDISI(reader.maxDoc()); }
From source file:org.elasticsearch.common.lucene.docset.DocSets.java
License:Apache License
public static DocSet convert(IndexReader reader, DocIdSet docIdSet) throws IOException { if (docIdSet == null) { return DocSet.EMPTY_DOC_SET; } else if (docIdSet instanceof DocSet) { return (DocSet) docIdSet; } else if (docIdSet instanceof OpenBitSet) { return new OpenBitDocSet((OpenBitSet) docIdSet); } else {/*from w w w . j a va 2s .com*/ final DocIdSetIterator it = docIdSet.iterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the empty set, // which is cacheable. return (it == null) ? DocSet.EMPTY_DOC_SET : new OpenBitDocSet(it, reader.maxDoc()); } }
From source file:org.elasticsearch.common.lucene.docset.DocSets.java
License:Apache License
/** * Returns a cacheable version of the doc id set (might be the same instance provided as a parameter). *//* w w w. j a va2 s. c o m*/ public static DocSet cacheable(IndexReader reader, DocIdSet docIdSet) throws IOException { if (docIdSet == null) { return DocSet.EMPTY_DOC_SET; } else if (docIdSet.isCacheable() && (docIdSet instanceof DocSet)) { return (DocSet) docIdSet; } else if (docIdSet instanceof OpenBitSet) { return new OpenBitDocSet((OpenBitSet) docIdSet); } else { final DocIdSetIterator it = docIdSet.iterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the empty set, // which is cacheable. return (it == null) ? DocSet.EMPTY_DOC_SET : new OpenBitDocSet(it, reader.maxDoc()); } }