List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:stroom.search.server.shard.IndexShardSearcherImpl.java
License:Apache License
IndexShardSearcherImpl(final IndexShard indexShard, final IndexWriter indexWriter) { this.indexShard = indexShard; this.indexWriter = indexWriter; Directory directory = null;/* w ww.ja v a2 s .c o m*/ IndexReader indexReader = null; try { // First try and open the reader with the current writer if one is in // use. If a writer is available this will give us the benefit of being // able to search documents that have not yet been flushed to disk. if (indexWriter != null) { try { indexReader = openWithWriter(indexWriter); } catch (final Exception e) { LOGGER.error(e.getMessage()); } } // If we failed to open a reader with an existing writer then just try // and use the index shard directory. if (indexReader == null) { final Path dir = IndexShardUtil.getIndexPath(indexShard); if (!Files.isDirectory(dir)) { throw new SearchException( "Index directory not found for searching: " + dir.toAbsolutePath().toString()); } directory = new NIOFSDirectory(dir, NoLockFactory.INSTANCE); indexReader = DirectoryReader.open(directory); // Check the document count in the index matches the DB. final int actualDocumentCount = indexReader.numDocs(); if (indexShard.getDocumentCount() != actualDocumentCount) { // We should only worry about document mismatch if the shard // is closed. However the shard // may still have been written to since we got this // reference. if (IndexShardStatus.CLOSED.equals(indexShard.getStatus())) { LOGGER.warn("open() - Mismatch document count. Index says " + actualDocumentCount + " DB says " + indexShard.getDocumentCount()); } else if (LOGGER.isDebugEnabled()) { LOGGER.debug("open() - Mismatch document count. Index says " + actualDocumentCount + " DB says " + indexShard.getDocumentCount()); } } } } catch (final IOException e) { throw new SearchException(e.getMessage(), e); } this.directory = directory; this.indexReader = indexReader; }
From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.IAViewRepository.java
License:Mozilla Public License
/** * return the total nb of docs in IAView index * /* w w w . java 2s. com*/ * @return */ public int getTotalNbOfDocs() { IndexSearcher searcher = null; try { searcher = iaviewSearcherManager.acquire(); IndexReader indexReader = searcher.getIndexReader(); return indexReader.numDocs(); } catch (IOException ioException) { throw new TaxonomyException(TaxonomyErrorType.LUCENE_IO_EXCEPTION, ioException); } finally { LuceneHelperTools.releaseSearcherManagerQuietly(iaviewSearcherManager, searcher); } }
From source file:vagueobjects.ir.lda.lucene.Indexer.java
License:Apache License
private void collect(Term term, IndexReader reader, IndexWriter writer, String[] fieldNames) throws IOException { int numDocs = reader.numDocs(); String field = term.field();/*from ww w. j a v a2 s . c o m*/ String value = term.text(); int count = 0; for (int d = 0; d < numDocs; ++d) { Document source = reader.document(d); if (!reader.isDeleted(d) && value.equals(source.get(field))) { ++count; if (count % 100000 == 0) { logger.debug("Passed " + count + " documents"); } Document document = new Document(); for (String fieldName : fieldNames) { String v = source.get(fieldName); if (v != null) { document.add(new Field(FIELD, v, Field.Store.YES, Field.Index.ANALYZED)); } } writer.addDocument(document); } } if (count == 0) { throw new IllegalStateException("No matching documents found"); } }
From source file:vectorizer.TermInfo.java
public void loadDcuments(Directory dir, Dictionary dict) throws Exception { IndexReader reader = DirectoryReader.open(dir); //int numDocs = Math.min(reader.numDocs(), 1000); int numDocs = reader.numDocs(); // build the per-doc word maps for (int i = 0; i < numDocs; i++) { System.out.println("Loading term vector of document: " + i); DocVector dvector = buildTerms(reader, i, numDocs, dict); if (dvector != null) docWordMaps.add(dvector);//w w w . j a v a2 s . c o m } reader.close(); }
From source file:wvec.WordVecsIndexer.java
void clusterWordVecs(IndexWriter clusterIndexWriter, int numClusters) throws Exception { // Index where word vectors are stored IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(indexPath)).toPath())); int numDocs = reader.numDocs(); KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(numClusters); List<WordVec> wordList = new ArrayList<>(numDocs); // Read every wvec and load in memory for (int i = 0; i < numDocs; i++) { Document doc = reader.document(i); WordVec wvec = new WordVec(doc.get(FIELD_WORD_VEC)); wordList.add(wvec);//w w w . j a va 2 s .c o m } // Call K-means clustering System.out.println("Clustering the entire vocabulary..."); List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList); // Save the cluster info System.out.println("Writing out cluster ids in Lucene index..."); int clusterId = 0; for (CentroidCluster<WordVec> c : clusters) { List<WordVec> pointsInThisClusuter = c.getPoints(); for (WordVec thisPoint : pointsInThisClusuter) { Document clusterInfo = constructDoc(thisPoint.word, String.valueOf(clusterId)); clusterIndexWriter.addDocument(clusterInfo); } clusterId++; } reader.close(); }