Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:org.dbpedia.spotlight.lucene.similarity.TermCache.java

License:Apache License

public OpenBitSet createDocIdSet(IndexReader reader, Term term) throws IOException {
    OpenBitSet result = new OpenBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();//from ww  w.j a  v  a 2 s  . co m
    int c = 0;
    td.seek(term);
    while (td.next()) {
        c++;
        result.set(td.doc());
    }
    return result;
}

From source file:org.dbpedia.spotlight.lucene.similarity.TermsFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    OpenBitSet result = new OpenBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();/*from   w  ww.  ja  v a2s  .  co  m*/
    try {
        int c = 0;
        for (Iterator<Term> iter = terms.iterator(); iter.hasNext();) {
            Term term = iter.next();
            td.seek(term);
            while (td.next()) {
                c++;
                result.set(td.doc());
            }
        }
    } finally {
        td.close();
    }
    return result;
}

From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java

License:Open Source License

public void renameInode(ImmutableInodeHandle fromInode, ImmutableInodeHandle toInode) throws IndexException {
    IndexSearcher iSearcher = null;//from  w w  w. java 2s  . co  m
    IndexReader iReader = null;
    try {
        Term fromInodeTerm = makeFullPathTermFromInode(fromInode);
        synchronized (INDEX_DOCUMENT) {
            _iWriter.updateDocument(fromInodeTerm, makeDocumentFromInode(toInode));
        }
        if (toInode.isDirectory()) {
            PrefixQuery prefixQuery = new PrefixQuery(fromInodeTerm);

            iReader = IndexReader.open(_iWriter, true);
            iSearcher = new IndexSearcher(iReader);

            final BitSet bits = new BitSet(iReader.maxDoc());
            iSearcher.search(prefixQuery, new Collector() {
                private int docBase;

                // ignore scorer
                public void setScorer(Scorer scorer) {
                }

                // accept docs out of order (for a BitSet it doesn't matter)
                public boolean acceptsDocsOutOfOrder() {
                    return true;
                }

                public void collect(int doc) {
                    bits.set(doc + docBase);
                }

                public void setNextReader(IndexReader reader, int docBase) {
                    this.docBase = docBase;
                }
            });

            for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
                Document doc = iSearcher.doc(i, SIMPLE_FIELD_SELECTOR);

                String oldPath = doc.getFieldable(FIELD_FULL_PATH.name()).stringValue();
                String newPath = toInode.getPath() + oldPath.substring(fromInode.getPath().length());
                doc.removeField(FIELD_FULL_PATH.name());
                doc.removeField(FIELD_PARENT_PATH.name());

                synchronized (INDEX_DOCUMENT) {
                    FIELD_FULL_PATH.setValue(newPath);
                    if (newPath.equals(VirtualFileSystem.separator)) {
                        FIELD_PARENT_PATH.setValue("");
                    } else {
                        FIELD_PARENT_PATH
                                .setValue(VirtualFileSystem.stripLast(newPath) + VirtualFileSystem.separator);
                    }
                    doc.add(FIELD_FULL_PATH);
                    doc.add(FIELD_PARENT_PATH);
                    _iWriter.updateDocument(makeFullPathTermFromString(oldPath), doc);
                }
            }
        }
    } catch (CorruptIndexException e) {
        throw new IndexException(
                "Unable to rename " + fromInode.getPath() + " to " + toInode.getPath() + " in the index", e);
    } catch (IOException e) {
        throw new IndexException(
                "Unable to rename " + fromInode.getPath() + " to " + toInode.getPath() + " in the index", e);
    } finally {
        if (iSearcher != null) {
            try {
                iSearcher.close();
            } catch (IOException e) {
                logger.error("IOException closing IndexSearcher", e);
            }
        }
        if (iReader != null) {
            try {
                iReader.close();
            } catch (IOException e) {
                logger.error("IOException closing IndexReader obtained from the IndexWriter", e);
            }
        }
    }
}

From source file:org.dyndns.andreasbaumann.LuceneAnalyzer.java

License:Open Source License

private static void printGlobalInfo(IndexReader indexReader, boolean printHeaders, boolean isSolr,
        SolrIndexSearcher solrSearch) throws IOException {
    if (printHeaders) {
        System.out.println("Global Information:");
        System.out.println("===================");
    }// www  .  jav a2  s. c o m

    System.out.println("\tnumber of documents: " + indexReader.numDocs());

    // we should get the number of features differently, this is inefficient, but Lucene
    // has no notion of global statistics (because the default weighting schema doesn't
    // make use of it!)
    int nofFeatures = 0;
    int nofTokens = 0;
    TermEnum terms = indexReader.terms();
    while (terms.next()) {
        Term term = terms.term();
        int df = terms.docFreq();
        nofFeatures++;
        nofTokens += df;
    }
    System.out.println("\ttotal number of features: " + nofFeatures);
    System.out.println("\ttotal number of tokens: " + nofTokens);

    System.out.println("\tversion: " + indexReader.getVersion());
    System.out.println("\tstill current: " + indexReader.isCurrent());

    //TODO: we don't get segment information!
    //System.out.println( "is optimized:" + segmentInfos.size( ) == 1 && !indexReader.hasDeletions( ) );
    System.out.println("\tmaximal document number: " + indexReader.maxDoc());
    System.out.println("\thas deletions: " + indexReader.hasDeletions());

    if (isSolr) {
        System.out.println("\tSolr version: " + solrSearch.getVersion());
    }

    System.out.println("");
}

From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java

License:Open Source License

private void printStatistic() throws IOException {
    if (LOG.isDebugEnabled()) {
        IndexSearcher luceneSearcher = null;
        try {/*from  w  ww  .  ja  v  a 2  s . c o  m*/
            searcherManager.maybeRefresh();
            luceneSearcher = searcherManager.acquire();
            IndexReader reader = luceneSearcher.getIndexReader();
            LOG.debug(
                    "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={}  hasUncommittedChanges={} hasDeletions={}",
                    reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(),
                    luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(),
                    luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(),
                    luceneIndexWriter.hasDeletions());
        } finally {
            searcherManager.release(luceneSearcher);
        }
    }
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

private void logIndexStats() {
    try {/*ww  w .j  a v  a2  s  . co m*/
        IndexReader reader = null;
        try {
            reader = getIndexReader();

            Document doc;
            int totalFields = 0;

            Set<String> ids = new HashSet<String>();
            String[] idArray;
            int count = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (isDeleted(reader, i))
                    continue;
                doc = readDocument(reader, i, null);
                totalFields += doc.getFields().size();
                count++;
                idArray = doc.getValues("id");
                for (String id : idArray)
                    ids.add(id);

            }

            logger.info("Total documents in the index: " + reader.numDocs()
                    + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                    + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                    + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
            logger.info("Distinct ids in the index: " + ids.size());

        } finally {
            ReaderMonitor toCloseCurrentMonitor = currentMonitor;
            currentMonitor = null;
            if (toCloseCurrentMonitor != null) {
                toCloseCurrentMonitor.closeWhenPossible();
            }
        }
    } catch (IOException e) {
        logger.warn(e.getMessage(), e);
    }

}

From source file:org.elasticsearch.action.fieldstats.TransportFieldStatsTransportAction.java

License:Apache License

@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
    ShardId shardId = request.shardId();
    Map<String, FieldStats> fieldStats = new HashMap<>();
    IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
    MapperService mapperService = indexServices.mapperService();
    IndexShard shard = indexServices.shardSafe(shardId.id());
    try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
        for (String field : request.getFields()) {
            MappedFieldType fieldType = mapperService.fullName(field);
            if (fieldType != null) {
                IndexReader reader = searcher.reader();
                Terms terms = MultiFields.getTerms(reader, field);
                if (terms != null) {
                    fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
                }//from  w w  w.  j  av a 2 s .c om
            } else {
                throw new IllegalArgumentException("field [" + field + "] doesn't exist");
            }
        }
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
    return new FieldStatsShardResponse(shardId, fieldStats);
}

From source file:org.elasticsearch.common.lucene.docset.DocIdSetCollector.java

License:Apache License

public DocIdSetCollector(Collector collector, IndexReader reader) {
    this.collector = collector;
    this.docIdSet = new OpenBitSetDISI(reader.maxDoc());
}

From source file:org.elasticsearch.common.lucene.docset.DocSets.java

License:Apache License

public static DocSet convert(IndexReader reader, DocIdSet docIdSet) throws IOException {
    if (docIdSet == null) {
        return DocSet.EMPTY_DOC_SET;
    } else if (docIdSet instanceof DocSet) {
        return (DocSet) docIdSet;
    } else if (docIdSet instanceof OpenBitSet) {
        return new OpenBitDocSet((OpenBitSet) docIdSet);
    } else {/*from  w w w .  j  a va  2s  .com*/
        final DocIdSetIterator it = docIdSet.iterator();
        // null is allowed to be returned by iterator(),
        // in this case we wrap with the empty set,
        // which is cacheable.
        return (it == null) ? DocSet.EMPTY_DOC_SET : new OpenBitDocSet(it, reader.maxDoc());
    }
}

From source file:org.elasticsearch.common.lucene.docset.DocSets.java

License:Apache License

/**
 * Returns a cacheable version of the doc id set (might be the same instance provided as a parameter).
 *//*  w w  w.  j a  va2 s. c  o m*/
public static DocSet cacheable(IndexReader reader, DocIdSet docIdSet) throws IOException {
    if (docIdSet == null) {
        return DocSet.EMPTY_DOC_SET;
    } else if (docIdSet.isCacheable() && (docIdSet instanceof DocSet)) {
        return (DocSet) docIdSet;
    } else if (docIdSet instanceof OpenBitSet) {
        return new OpenBitDocSet((OpenBitSet) docIdSet);
    } else {
        final DocIdSetIterator it = docIdSet.iterator();
        // null is allowed to be returned by iterator(),
        // in this case we wrap with the empty set,
        // which is cacheable.
        return (it == null) ? DocSet.EMPTY_DOC_SET : new OpenBitDocSet(it, reader.maxDoc());
    }
}