Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java

License:BSD License

@Test
public void testAddStatement() throws IOException, ParseException {
    // add a statement to an index
    index.begin();/* w ww. j a v a  2s  .  c o  m*/
    index.addStatement(statement11);
    index.commit();

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString());
    TermDocs docs = reader.termDocs(term);
    assertTrue(docs.next());

    int documentNr = docs.doc();
    Document document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));

    assertFalse(docs.next());
    docs.close();
    reader.close();

    // add another statement
    index.begin();
    index.addStatement(statement12);
    index.commit();

    // See if everything remains consistent. We must create a new IndexReader
    // in order to be able to see the updates
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs()); // #docs should *not* have increased

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    // see if we can query for these literals
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(Version.LUCENE_35, SearchFields.TEXT_FIELD_NAME, analyzer);

    Query query = parser.parse(object1.getLabel());
    System.out.println("query=" + query);
    TotalHitCountCollector results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    query = parser.parse(object2.getLabel());
    results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    searcher.close();
    reader.close();

    // remove the first statement
    index.begin();
    index.removeStatement(statement11);
    index.commit();

    // check that that statement is actually removed and that the other still
    // exists
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertNull(document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    reader.close();

    // remove the other statement
    index.begin();
    index.removeStatement(statement12);
    index.commit();

    // check that there are no documents left (i.e. the last Document was
    // removed completely, rather than its remaining triple removed)
    reader = IndexReader.open(directory);
    assertEquals(0, reader.numDocs());
    reader.close();
}

From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java

License:BSD License

@Test
public void testAddMultiple() throws Exception {
    // add a statement to an index
    HashSet<Statement> added = new HashSet<Statement>();
    HashSet<Statement> removed = new HashSet<Statement>();
    added.add(statement11);/*  w  ww .  j a v  a2 s  . c  o m*/
    added.add(statement12);
    added.add(statement21);
    added.add(statement22);
    index.begin();
    index.addRemoveStatements(added, removed);
    index.commit();

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(2, reader.numDocs());
    reader.close();

    // check the documents
    Document document = index.getDocuments(subject).iterator().next();
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertStatement(statement11, document);
    assertStatement(statement12, document);

    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertStatement(statement21, document);
    assertStatement(statement22, document);

    // check if the text field stores all added string values
    Set<String> texts = new HashSet<String>();
    texts.add("cats");
    texts.add("dogs");
    // FIXME
    // assertTexts(texts, document);

    // add/remove one
    added.clear();
    removed.clear();
    added.add(statement23);
    removed.add(statement22);
    index.begin();
    index.addRemoveStatements(added, removed);
    index.commit();

    // check doc 2
    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertStatement(statement21, document);
    assertStatement(statement23, document);
    assertNoStatement(statement22, document);

    // check if the text field stores all added and no deleted string values
    texts.remove("dogs");
    texts.add("chicken");
    // FIXME
    // assertTexts(texts, document);

    // TODO: check deletion of the rest

}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 * @throws HistoryException if an error occurs when accessing the history
 *///from w w w.j  a v  a  2  s .  co m
public void update() throws IOException, HistoryException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    String ctgs = RuntimeEnvironment.getInstance().getCtags();
    if (ctgs != null) {
        ctags = new Ctags();
        ctags.setBinary(ctgs);
    }
    if (ctags == null) {
        log.severe("Unable to run ctags! searching definitions will not work!");
    }

    if (ctags != null) {
        String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
        if (filename != null) {
            ctags.setCTagsExtraOptionsFile(filename);
        }
    }

    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk            

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
            } else {
                sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
            }

            HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);

            String startuid = Util.path2uid(dir, "");
            IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (numDocs > 0) {
                    uidIter = terms.iterator(uidIter);
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid                        
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
                    }
                }
                // The code below traverses the tree to get total count.
                int file_cnt = 0;
                if (RuntimeEnvironment.getInstance().isPrintProgress()) {
                    log.log(Level.INFO, "Counting files in {0} ...", dir);
                    file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
                    if (log.isLoggable(Level.INFO)) {
                        log.log(Level.INFO, "Need to process: {0} files for {1}",
                                new Object[] { file_cnt, dir });
                    }
                }

                indexDown(sourceRoot, dir, false, 0, file_cnt);

                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile();
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }
            } finally {
                reader.close();
            }
        }
    } finally {
        if (writer != null) {
            try {
                writer.prepareCommit();
                writer.commit();
                writer.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }

        if (ctags != null) {
            try {
                ctags.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing ctags process", e);
            }
        }

        synchronized (lock) {
            running = false;
        }
    }

    if (!isInterrupted() && isDirty()) {
        if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
            optimize();
        }
        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        File timestamp = new File(env.getDataRootFile(), "timestamp");
        String purpose = "used for timestamping the index database.";
        if (timestamp.exists()) {
            if (!timestamp.setLastModified(System.currentTimeMillis())) {
                log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        } else {
            if (!timestamp.createNewFile()) {
                log.log(Level.WARNING, "Failed to create file ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * List all of the files in this index database
 *
 * @throws IOException If an IO error occurs while reading from the database
 *//*from w ww  .  jav a2  s.c o  m*/
public void listFiles() throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.U);
        }
        iter = terms.iterator(iter); // init uid iterator
        while (iter != null && iter.term() != null) {
            log.fine(Util.uid2url(iter.term().utf8ToString()));
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

public void listTokens(int freq) throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;//www.java  2 s.c  o m
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory);
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.DEFS);
        }
        iter = terms.iterator(iter); // init uid iterator            
        while (iter != null && iter.term() != null) {
            //if (iter.term().field().startsWith("f")) {
            if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
                log.warning(iter.term().utf8ToString());
            }
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
            /*} else {
             break;
             }*/
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.oscm.search.IndexRequestMasterListenerIT.java

private void assertDocsInIndex(final Class<?> clazz, final String comment, final int expectedNumDocs,
        final int expectedNumIndexedAttributes, final List<String> expectedAttributes) throws Exception {
    Boolean evaluationTookPlace = runTX(new Callable<Boolean>() {

        @Override// w ww  .jav a2s. c o m
        public Boolean call() throws Exception {
            boolean evaluatedIndex = false;
            Session session = dm.getSession();
            if (session != null) {
                FullTextSession fullTextSession = Search.getFullTextSession(session);
                SearchFactory searchFactory = fullTextSession.getSearchFactory();
                IndexReader reader = searchFactory.getIndexReaderAccessor().open(clazz);

                try {
                    assertEquals(comment, expectedNumDocs, reader.numDocs());
                    if (expectedNumDocs > 0) {
                        final FieldInfos indexedFieldNames = ReaderUtil.getMergedFieldInfos(reader);
                        for (String expectedAttr : expectedAttributes) {
                            assertNotNull("attribute " + expectedAttr + " does not exist in index: "
                                    + indexedFieldNames, indexedFieldNames.fieldInfo(expectedAttr));
                        }
                        assertNotNull("attribute \"key\" does not exist in index: " + indexedFieldNames,
                                indexedFieldNames.fieldInfo("key"));
                        assertNotNull(
                                "attribute \"_hibernate_class\" does not exist in index: " + indexedFieldNames,
                                indexedFieldNames.fieldInfo("_hibernate_class"));
                        assertEquals(
                                "More or less attributes indexed than expected, attributes retrieved from index: "
                                        + indexedFieldNames,
                                expectedNumIndexedAttributes + 2, indexedFieldNames.size());
                        evaluatedIndex = true;
                    }
                } finally {
                    searchFactory.getIndexReaderAccessor().close(reader);
                }
            }

            return Boolean.valueOf(evaluatedIndex);
        }
    });

    if (expectedNumDocs > 0) {
        Assert.assertTrue("Index not found, no evaluation took place", evaluationTookPlace.booleanValue());
    }
}

From source file:org.punksearch.crawler.IndexOperator.java

License:Open Source License

public static void deleteByAge(String dirPath, float days) {
    try {/*from   w  w w .jav  a  2 s . c  o  m*/
        final Directory dir = LuceneUtils.dir(dirPath);
        boolean indexExists = IndexReader.indexExists(dir);
        if (!indexExists) {
            return;
        }

        final IndexWriter iw = createIndexWriter(dirPath);
        final IndexReader ir = IndexReader.open(dir);
        IndexSearcher is = new IndexSearcher(ir);

        long min = 0;
        long max = System.currentTimeMillis() - Math.round(days * 1000 * 3600 * 24);

        final TermRangeQuery oldDocsQuery = new TermRangeQuery(IndexFields.INDEXED,
                DateTools.timeToString(min, DateTools.Resolution.MILLISECOND),
                DateTools.timeToString(max, DateTools.Resolution.MILLISECOND), true, false);

        final int docsInReader = ir.numDocs();
        final TopDocs topDocs = is.search(oldDocsQuery, Math.max(1, docsInReader));
        log.info("Deleting by age from index directory. Items to delete: " + topDocs.totalHits);

        iw.deleteDocuments(oldDocsQuery);

        iw.close();
    } catch (IOException ex) {
        log.error("Exception during deleting by age from index directory", ex);
        throw new RuntimeException(ex);
    }
}

From source file:org.punksearch.web.statistics.FileTypeStatistics.java

License:Open Source License

private static Hits extractDocsForType(String type) {
    Filter filter = TypeFilters.get(type);
    try {/*from www.  j  a va2s  . c o  m*/
        IndexSearcher indexSearcher = Core.getIndexReaderHolder().getCurrentSearcher();
        IndexReader indexReader = indexSearcher.getIndexReader();
        final TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), filter, indexReader.numDocs());
        return new Hits(indexSearcher, topDocs);
    } catch (Exception e) {
        log.error("error extractDocsForType", e);
        return null;
    }
}

From source file:org.punksearch.web.statistics.FileTypeStatistics.java

License:Open Source License

public static synchronized Long totalSize() {
    if (totalSizeCache == null || indexChangedAfter(totalSizeCacheTimestamp)) {
        long size = 0;
        try {//from   w w  w .  j  a  va 2 s .  com
            // Rough approximation to the root directories.
            // Obviously, non-latin1 directory names slip through the filter, we'll catch them later
            // Maybe we should use some ranges with UTF8-16 characters... TODO
            String approxQuery = "*:* -Path:{a TO Z*} -Path:{0 TO 9*}";
            QueryParser parser = new QueryParser(LuceneVersion.VERSION, "Host",
                    new SimpleAnalyzer(LuceneVersion.VERSION));
            Query query = parser.parse(approxQuery);
            IndexSearcher indexSearcher = Core.getIndexReaderHolder().getCurrentSearcher();
            IndexReader indexReader = indexSearcher.getIndexReader();
            final TopDocs topDocs = indexSearcher.search(query, indexReader.numDocs());
            Hits hits = new Hits(indexSearcher, topDocs);
            for (int i = 0; i < hits.length(); i++) {
                Document doc = hits.doc(i);
                String path = doc.get(IndexFields.PATH);
                if (!path.equals("/")) {
                    continue;
                }
                size += Long.parseLong(doc.get(IndexFields.SIZE));
            }
        } catch (Exception e) {
            log.error("", e);
        }
        totalSizeCache = size;
        totalSizeCacheTimestamp = System.currentTimeMillis();
    }
    return totalSizeCache;
}

From source file:org.roosster.store.EntryStore.java

License:Open Source License

/**
 *//*www  .  ja v a 2  s . c o m*/
public int getDocNum() throws IOException {
    if (!isInitialized())
        throw new IllegalStateException("Database must be initialized before use!");

    int numdocs = 0;
    IndexReader reader = null;
    try {
        reader = getReader();
        numdocs = reader.numDocs();
    } finally {
        if (reader != null)
            reader.close();
    }

    return numdocs;
}