Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:stroom.search.server.shard.IndexShardSearcherImpl.java

License:Apache License

IndexShardSearcherImpl(final IndexShard indexShard, final IndexWriter indexWriter) {
    this.indexShard = indexShard;
    this.indexWriter = indexWriter;

    Directory directory = null;/* w ww.ja v a2  s .c o m*/
    IndexReader indexReader = null;

    try {
        // First try and open the reader with the current writer if one is in
        // use. If a writer is available this will give us the benefit of being
        // able to search documents that have not yet been flushed to disk.
        if (indexWriter != null) {
            try {
                indexReader = openWithWriter(indexWriter);
            } catch (final Exception e) {
                LOGGER.error(e.getMessage());
            }
        }

        // If we failed to open a reader with an existing writer then just try
        // and use the index shard directory.
        if (indexReader == null) {
            final Path dir = IndexShardUtil.getIndexPath(indexShard);

            if (!Files.isDirectory(dir)) {
                throw new SearchException(
                        "Index directory not found for searching: " + dir.toAbsolutePath().toString());
            }

            directory = new NIOFSDirectory(dir, NoLockFactory.INSTANCE);
            indexReader = DirectoryReader.open(directory);

            // Check the document count in the index matches the DB.
            final int actualDocumentCount = indexReader.numDocs();
            if (indexShard.getDocumentCount() != actualDocumentCount) {
                // We should only worry about document mismatch if the shard
                // is closed. However the shard
                // may still have been written to since we got this
                // reference.
                if (IndexShardStatus.CLOSED.equals(indexShard.getStatus())) {
                    LOGGER.warn("open() - Mismatch document count.  Index says " + actualDocumentCount
                            + " DB says " + indexShard.getDocumentCount());
                } else if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("open() - Mismatch document count.  Index says " + actualDocumentCount
                            + " DB says " + indexShard.getDocumentCount());
                }
            }
        }
    } catch (final IOException e) {
        throw new SearchException(e.getMessage(), e);
    }

    this.directory = directory;
    this.indexReader = indexReader;
}

From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.IAViewRepository.java

License:Mozilla Public License

/**
 * return the total nb of docs in IAView index
 * /*  w w  w  .  java 2s.  com*/
 * @return
 */
public int getTotalNbOfDocs() {
    IndexSearcher searcher = null;
    try {
        searcher = iaviewSearcherManager.acquire();
        IndexReader indexReader = searcher.getIndexReader();

        return indexReader.numDocs();
    } catch (IOException ioException) {
        throw new TaxonomyException(TaxonomyErrorType.LUCENE_IO_EXCEPTION, ioException);
    } finally {
        LuceneHelperTools.releaseSearcherManagerQuietly(iaviewSearcherManager, searcher);
    }
}

From source file:vagueobjects.ir.lda.lucene.Indexer.java

License:Apache License

private void collect(Term term, IndexReader reader, IndexWriter writer, String[] fieldNames)
        throws IOException {
    int numDocs = reader.numDocs();
    String field = term.field();/*from ww w.  j  a v a2  s . c o m*/
    String value = term.text();
    int count = 0;
    for (int d = 0; d < numDocs; ++d) {

        Document source = reader.document(d);
        if (!reader.isDeleted(d) && value.equals(source.get(field))) {
            ++count;
            if (count % 100000 == 0) {
                logger.debug("Passed " + count + "  documents");
            }
            Document document = new Document();
            for (String fieldName : fieldNames) {
                String v = source.get(fieldName);
                if (v != null) {
                    document.add(new Field(FIELD, v, Field.Store.YES, Field.Index.ANALYZED));
                }
            }
            writer.addDocument(document);
        }
    }
    if (count == 0) {
        throw new IllegalStateException("No matching documents found");
    }
}

From source file:vectorizer.TermInfo.java

public void loadDcuments(Directory dir, Dictionary dict) throws Exception {
    IndexReader reader = DirectoryReader.open(dir);
    //int numDocs = Math.min(reader.numDocs(), 1000);
    int numDocs = reader.numDocs();

    // build the per-doc word maps
    for (int i = 0; i < numDocs; i++) {
        System.out.println("Loading term vector of document: " + i);
        DocVector dvector = buildTerms(reader, i, numDocs, dict);
        if (dvector != null)
            docWordMaps.add(dvector);//w w  w . j  a  v  a2  s .  c  o  m
    }
    reader.close();
}

From source file:wvec.WordVecsIndexer.java

void clusterWordVecs(IndexWriter clusterIndexWriter, int numClusters) throws Exception {
    // Index where word vectors are stored
    IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(indexPath)).toPath()));
    int numDocs = reader.numDocs();
    KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(numClusters);
    List<WordVec> wordList = new ArrayList<>(numDocs);

    // Read every wvec and load in memory
    for (int i = 0; i < numDocs; i++) {
        Document doc = reader.document(i);
        WordVec wvec = new WordVec(doc.get(FIELD_WORD_VEC));
        wordList.add(wvec);//w  w  w . j  a va  2 s .c  o m
    }

    // Call K-means clustering
    System.out.println("Clustering the entire vocabulary...");
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);

    // Save the cluster info
    System.out.println("Writing out cluster ids in Lucene index...");
    int clusterId = 0;
    for (CentroidCluster<WordVec> c : clusters) {
        List<WordVec> pointsInThisClusuter = c.getPoints();
        for (WordVec thisPoint : pointsInThisClusuter) {
            Document clusterInfo = constructDoc(thisPoint.word, String.valueOf(clusterId));
            clusterIndexWriter.addDocument(clusterInfo);
        }
        clusterId++;
    }

    reader.close();
}