Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:stroom.search.server.shard.IndexShardSearcherImpl.java

License:Apache License

IndexShardSearcherImpl(final IndexShard indexShard, final IndexWriter indexWriter) {
    this.indexShard = indexShard;
    this.indexWriter = indexWriter;

    Directory directory = null;/* w ww.ja v a2  s .c o m*/
    IndexReader indexReader = null;

    try {
        // First try and open the reader with the current writer if one is in
        // use. If a writer is available this will give us the benefit of being
        // able to search documents that have not yet been flushed to disk.
        if (indexWriter != null) {
            try {
                indexReader = openWithWriter(indexWriter);
            } catch (final Exception e) {
                LOGGER.error(e.getMessage());
            }
        }

        // If we failed to open a reader with an existing writer then just try
        // and use the index shard directory.
        if (indexReader == null) {
            final Path dir = IndexShardUtil.getIndexPath(indexShard);

            if (!Files.isDirectory(dir)) {
                throw new SearchException(
                        "Index directory not found for searching: " + dir.toAbsolutePath().toString());
            }

            directory = new NIOFSDirectory(dir, NoLockFactory.INSTANCE);
            indexReader = DirectoryReader.open(directory);

            // Check the document count in the index matches the DB.
            final int actualDocumentCount = indexReader.numDocs();
            if (indexShard.getDocumentCount() != actualDocumentCount) {
                // We should only worry about document mismatch if the shard
                // is closed. However the shard
                // may still have been written to since we got this
                // reference.
                if (IndexShardStatus.CLOSED.equals(indexShard.getStatus())) {
                    LOGGER.warn("open() - Mismatch document count.  Index says " + actualDocumentCount
                            + " DB says " + indexShard.getDocumentCount());
                } else if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("open() - Mismatch document count.  Index says " + actualDocumentCount
                            + " DB says " + indexShard.getDocumentCount());
                }
            }
        }
    } catch (final IOException e) {
        throw new SearchException(e.getMessage(), e);
    }

    this.directory = directory;
    this.indexReader = indexReader;
}

From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.IAViewRepository.java

License:Mozilla Public License

/**
 * return the total nb of docs in IAView index
 * /*  w w  w  .  java 2s.  com*/
 * @return
 */
public int getTotalNbOfDocs() {
    IndexSearcher searcher = null;
    try {
        searcher = iaviewSearcherManager.acquire();
        IndexReader indexReader = searcher.getIndexReader();

        return indexReader.numDocs();
    } catch (IOException ioException) {
        throw new TaxonomyException(TaxonomyErrorType.LUCENE_IO_EXCEPTION, ioException);
    } finally {
        LuceneHelperTools.releaseSearcherManagerQuietly(iaviewSearcherManager, searcher);
    }
}

From source file:vagueobjects.ir.lda.lucene.Indexer.java

License:Apache License

private void collect(Term term, IndexReader reader, IndexWriter writer, String[] fieldNames)
        throws IOException {
    int numDocs = reader.numDocs();
    String field = term.field();/*from ww w.  j  a v a2  s . c o m*/
    String value = term.text();
    int count = 0;
    for (int d = 0; d < numDocs; ++d) {

        Document source = reader.document(d);
        if (!reader.isDeleted(d) && value.equals(source.get(field))) {
            ++count;
            if (count % 100000 == 0) {
                logger.debug("Passed " + count + "  documents");
            }
            Document document = new Document();
            for (String fieldName : fieldNames) {
                String v = source.get(fieldName);
                if (v != null) {
                    document.add(new Field(FIELD, v, Field.Store.YES, Field.Index.ANALYZED));
                }
            }
            writer.addDocument(document);
        }
    }
    if (count == 0) {
        throw new IllegalStateException("No matching documents found");
    }
}

From source file:vectorizer.TermInfo.java

public void loadDcuments(Directory dir, Dictionary dict) throws Exception {
    IndexReader reader = DirectoryReader.open(dir);
    //int numDocs = Math.min(reader.numDocs(), 1000);
    int numDocs = reader.numDocs();

    // build the per-doc word maps
    for (int i = 0; i < numDocs; i++) {
        System.out.println("Loading term vector of document: " + i);
        DocVector dvector = buildTerms(reader, i, numDocs, dict);
        if (dvector != null)
            docWordMaps.add(dvector);//w w  w . j  a  v  a2  s .  c  o  m
    }
    reader.close();
}

From source file:wvec.WordVecsIndexer.java

void clusterWordVecs(IndexWriter clusterIndexWriter, int numClusters) throws Exception {
    // Index where word vectors are stored
    IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(indexPath)).toPath()));
    int numDocs = reader.numDocs();
    KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(numClusters);
    List<WordVec> wordList = new ArrayList<>(numDocs);

    // Read every wvec and load in memory
    for (int i = 0; i < numDocs; i++) {
        Document doc = reader.document(i);
        WordVec wvec = new WordVec(doc.get(FIELD_WORD_VEC));
        wordList.add(wvec);//w  w  w . j  a va  2 s .c  o m
    }

    // Call K-means clustering
    System.out.println("Clustering the entire vocabulary...");
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);

    // Save the cluster info
    System.out.println("Writing out cluster ids in Lucene index...");
    int clusterId = 0;
    for (CentroidCluster<WordVec> c : clusters) {
        List<WordVec> pointsInThisClusuter = c.getPoints();
        for (WordVec thisPoint : pointsInThisClusuter) {
            Document clusterInfo = constructDoc(thisPoint.word, String.valueOf(clusterId));
            clusterIndexWriter.addDocument(clusterInfo);
        }
        clusterId++;
    }

    reader.close();
}