Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:org.olat.search.service.update.IndexUpdater.java

License:Apache License

/**
 * Check update and delete-queue. Update existing index and writes new index file.
 *//*from  w w  w.j a va2 s . co  m*/
private void doUpdate() {
    if (!updateQueue.isEmpty() || !deleteQueue.isEmpty()) {
        try {
            log.info("updateQueue.size=" + updateQueue.size() + " deleteQueue.size" + deleteQueue.size());
            // 0. make copy of queue's and delete it
            List<Document> updateCopy;
            synchronized (updateQueue) {
                updateCopy = new Vector<Document>(updateQueue);
                updateQueue.clear();
            }
            List<Document> deleteCopy;
            synchronized (deleteQueue) {
                deleteCopy = new Vector<Document>(deleteQueue);
                deleteQueue.clear();
            }
            // 1. Open Index Reader
            final File indexFile = new File(indexPath);
            final Directory directory = FSDirectory.open(indexFile);
            final IndexReader indexReader = IndexReader.open(directory);

            log.info("befor delete: indexReader.numDocs()=" + indexReader.numDocs());
            // 2. Delete old Document
            // loop over all documents in updateQueue
            for (int i = 0; i < updateCopy.size(); i++) {
                final String resourceUrl = updateCopy.get(i).get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
                final Term term = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl);
                log.info("updateQueue:delete documents with resourceUrl=" + resourceUrl);
                indexReader.deleteDocuments(term);
            }
            // loop over all documents in deleteQueue
            for (int i = 0; i < deleteCopy.size(); i++) {
                final String resourceUrl = deleteCopy.get(i).get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
                final Term term = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl);
                log.info("deleteQueue:delete documents with resourceUrl='" + resourceUrl + "'");
                indexReader.deleteDocuments(term);

            }
            log.info("after delete: indexReader.numDocs()=" + indexReader.numDocs());
            // 3. Close reader
            indexReader.close();
            directory.close();

            // 4. open writer
            final IndexWriter indexWriter = new IndexWriter(directory,
                    new StandardAnalyzer(Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.UNLIMITED);
            indexWriter.setMergeFactor(INDEX_MERGE_FACTOR); // for better performance
            // 5. Add new Document
            for (int i = 0; i < updateCopy.size(); i++) {
                final Document document = updateCopy.get(i);
                log.info("addDocument:" + document);
                indexWriter.addDocument(document);
            }
            // 6. Close writer
            long startOptimizeTime = 0;
            if (log.isDebug()) {
                startOptimizeTime = System.currentTimeMillis();
            }
            indexWriter.optimize();// TODO:chg: dauert ev. zulange oder nocht noetig
            if (log.isDebug()) {
                log.debug("Optimized in " + (System.currentTimeMillis() - startOptimizeTime) + "ms");
            }
            indexWriter.close();
        } catch (final Exception ex) {
            log.warn("Exception during doUpdate. ", ex);
        }
    } else {
        log.debug("Queues are ampty.");
    }
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Get all files in this index database.
 *
 * @throws IOException If an IO error occurs while reading from the database
 * @return set of files in this index database
 *//*from   w ww  .  ja va 2s .  c o  m*/
public Set<String> getFiles() throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;
    Terms terms;
    Set<String> files = new HashSet<>();

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.U);
            iter = terms.iterator(); // init uid iterator
        }
        while (iter != null && iter.term() != null) {
            String value = iter.term().utf8ToString();
            if (value.isEmpty()) {
                iter.next();
                continue;
            }

            files.add(Util.uid2url(value));
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
            }
        }
    }

    return files;
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Get number of documents in this index database.
 * @return number of documents/*  w  w  w. j av a 2s  .  c  o m*/
 * @throws IOException if I/O exception occurred
 */
public int getNumFiles() throws IOException {
    IndexReader ireader = null;
    int numDocs = 0;

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        numDocs = ireader.numDocs();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
            }
        }
    }

    return numDocs;
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

public void listTokens(int freq) throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;//from  ww w  .j  av  a2 s. c  om
    Terms terms;

    try {
        ireader = DirectoryReader.open(indexDirectory);
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.DEFS);
            iter = terms.iterator(); // init uid iterator
        }
        while (iter != null && iter.term() != null) {
            //if (iter.term().field().startsWith("f")) {
            if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
                LOGGER.warning(iter.term().utf8ToString());
            }
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
            }
        }
    }
}

From source file:org.opengrok.suggest.SuggesterSearcher.java

License:Open Source License

/**
 * @param reader reader of the index for which to provide suggestions
 * @param resultSize size of the results
 *///  w w w. java2 s.  c o m
SuggesterSearcher(final IndexReader reader, final int resultSize) {
    super(reader);
    numDocs = reader.numDocs();
    this.resultSize = resultSize;
}

From source file:org.opengrok.suggest.SuggesterUtils.java

License:Open Source License

private static double computeNormalizedDocumentFrequency(final IndexReader indexReader, final Term term)
        throws IOException {
    int documentFrequency = indexReader.docFreq(term);

    return ((double) documentFrequency) / indexReader.numDocs();
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

private void logIndexStats() {
    try {/*from   w ww  . ja  v a  2 s.  co m*/
        IndexReader reader = null;
        try {
            reader = getIndexReader();

            Document doc;
            int totalFields = 0;

            Set<String> ids = new HashSet<String>();
            String[] idArray;
            int count = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (reader.isDeleted(i))
                    continue;
                doc = reader.document(i);
                totalFields += doc.getFields().size();
                count++;
                idArray = doc.getValues("id");
                for (String id : idArray)
                    ids.add(id);

            }

            logger.info("Total documents in the index: " + reader.numDocs()
                    + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                    + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                    + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
            logger.info("Distinct ids in the index: " + ids.size());

        } finally {
            if (currentMonitor != null) {
                currentMonitor.closeWhenPossible();
                currentMonitor = null;
            }
        }
    } catch (IOException e) {
        logger.warn(e.getMessage(), e);
    }

}

From source file:org.openrdf.sail.lucene.LuceneIndexTest.java

License:BSD License

public void testAddStatement() throws IOException, ParseException {
    // add a statement to an index
    index.addStatement(statement11);/*w w w .  ja  v  a2s . c  om*/

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    Term term = new Term(LuceneIndex.URI_FIELD_NAME, subject.toString());
    TermDocs docs = reader.termDocs(term);
    assertTrue(docs.next());

    int documentNr = docs.doc();
    Document document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));

    assertFalse(docs.next());
    docs.close();
    reader.close();

    // add another statement
    index.addStatement(statement12);

    // See if everything remains consistent. We must create a new IndexReader
    // in order to be able to see the updates
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs()); // #docs should *not* have increased

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    // see if we can query for these literals
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(Version.LUCENE_35, LuceneIndex.TEXT_FIELD_NAME, analyzer);

    Query query = parser.parse(object1.getLabel());
    System.out.println("query=" + query);
    TotalHitCountCollector results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    query = parser.parse(object2.getLabel());
    results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    searcher.close();
    reader.close();

    // remove the first statement
    index.removeStatement(statement11);

    // check that that statement is actually removed and that the other still
    // exists
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertNull(document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    reader.close();

    // remove the other statement
    index.removeStatement(statement12);

    // check that there are no documents left (i.e. the last Document was
    // removed completely, rather than its remaining triple removed)
    reader = IndexReader.open(directory);
    assertEquals(0, reader.numDocs());
    reader.close();
}

From source file:org.openrdf.sail.lucene.LuceneIndexTest.java

License:BSD License

public void testAddMultiple() throws Exception {
    // add a statement to an index
    HashSet<Statement> added = new HashSet<Statement>();
    HashSet<Statement> removed = new HashSet<Statement>();
    added.add(statement11);/*from   w  ww  . j av a2  s . c om*/
    added.add(statement12);
    added.add(statement21);
    added.add(statement22);
    index.addRemoveStatements(added, removed);

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(2, reader.numDocs());
    reader.close();

    // check the documents
    Document document = index.getDocuments(subject).iterator().next();
    assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertStatement(statement11, document);
    assertStatement(statement12, document);

    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertStatement(statement21, document);
    assertStatement(statement22, document);

    // check if the text field stores all added string values
    Set<String> texts = new HashSet<String>();
    texts.add("cats");
    texts.add("dogs");
    // FIXME
    // assertTexts(texts, document);

    // add/remove one
    added.clear();
    removed.clear();
    added.add(statement23);
    removed.add(statement22);
    index.addRemoveStatements(added, removed);

    // check doc 2
    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.get(LuceneIndex.URI_FIELD_NAME));
    assertStatement(statement21, document);
    assertStatement(statement23, document);
    assertNoStatement(statement22, document);

    // check if the text field stores all added and no deleted string values
    texts.remove("dogs");
    texts.add("chicken");
    // FIXME
    // assertTexts(texts, document);

    // TODO: check deletion of the rest

}

From source file:org.openrdf.sail.lucene3.LuceneIndex.java

License:BSD License

private void logIndexStats() {
    try {/*from  w  w  w .  j  a va 2  s . c  o m*/
        IndexReader reader = null;
        try {
            reader = getIndexReader();

            Document doc;
            int totalFields = 0;

            Set<String> ids = new HashSet<String>();
            String[] idArray;
            int count = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (isDeleted(reader, i))
                    continue;
                doc = readDocument(reader, i, null);
                totalFields += doc.getFields().size();
                count++;
                idArray = doc.getValues("id");
                for (String id : idArray)
                    ids.add(id);

            }

            logger.info("Total documents in the index: " + reader.numDocs()
                    + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                    + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                    + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
            logger.info("Distinct ids in the index: " + ids.size());

        } finally {
            if (currentMonitor != null) {
                currentMonitor.closeWhenPossible();
                currentMonitor = null;
            }
        }
    } catch (IOException e) {
        logger.warn(e.getMessage(), e);
    }

}