Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java

License:BSD License

@Test
public void testAddStatement() throws IOException, ParseException {
    // add a statement to an index
    index.begin();/* w ww. j a v a  2s  .  c o  m*/
    index.addStatement(statement11);
    index.commit();

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString());
    TermDocs docs = reader.termDocs(term);
    assertTrue(docs.next());

    int documentNr = docs.doc();
    Document document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));

    assertFalse(docs.next());
    docs.close();
    reader.close();

    // add another statement
    index.begin();
    index.addStatement(statement12);
    index.commit();

    // See if everything remains consistent. We must create a new IndexReader
    // in order to be able to see the updates
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs()); // #docs should *not* have increased

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertEquals(object1.getLabel(), document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    // see if we can query for these literals
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(Version.LUCENE_35, SearchFields.TEXT_FIELD_NAME, analyzer);

    Query query = parser.parse(object1.getLabel());
    System.out.println("query=" + query);
    TotalHitCountCollector results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    query = parser.parse(object2.getLabel());
    results = new TotalHitCountCollector();
    searcher.search(query, results);
    assertEquals(1, results.getTotalHits());

    searcher.close();
    reader.close();

    // remove the first statement
    index.begin();
    index.removeStatement(statement11);
    index.commit();

    // check that that statement is actually removed and that the other still
    // exists
    reader = IndexReader.open(directory);
    assertEquals(1, reader.numDocs());

    docs = reader.termDocs(term);
    assertTrue(docs.next());

    documentNr = docs.doc();
    document = reader.document(documentNr);
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertNull(document.get(predicate1.toString()));
    assertEquals(object2.getLabel(), document.get(predicate2.toString()));

    assertFalse(docs.next());
    docs.close();

    reader.close();

    // remove the other statement
    index.begin();
    index.removeStatement(statement12);
    index.commit();

    // check that there are no documents left (i.e. the last Document was
    // removed completely, rather than its remaining triple removed)
    reader = IndexReader.open(directory);
    assertEquals(0, reader.numDocs());
    reader.close();
}

From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java

License:BSD License

@Test
public void testAddMultiple() throws Exception {
    // add a statement to an index
    HashSet<Statement> added = new HashSet<Statement>();
    HashSet<Statement> removed = new HashSet<Statement>();
    added.add(statement11);/*  w  ww .  j a v  a2 s  . c  o m*/
    added.add(statement12);
    added.add(statement21);
    added.add(statement22);
    index.begin();
    index.addRemoveStatements(added, removed);
    index.commit();

    // check that it arrived properly
    IndexReader reader = IndexReader.open(directory);
    assertEquals(2, reader.numDocs());
    reader.close();

    // check the documents
    Document document = index.getDocuments(subject).iterator().next();
    assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertStatement(statement11, document);
    assertStatement(statement12, document);

    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertStatement(statement21, document);
    assertStatement(statement22, document);

    // check if the text field stores all added string values
    Set<String> texts = new HashSet<String>();
    texts.add("cats");
    texts.add("dogs");
    // FIXME
    // assertTexts(texts, document);

    // add/remove one
    added.clear();
    removed.clear();
    added.add(statement23);
    removed.add(statement22);
    index.begin();
    index.addRemoveStatements(added, removed);
    index.commit();

    // check doc 2
    document = index.getDocuments(subject2).iterator().next();
    assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME));
    assertStatement(statement21, document);
    assertStatement(statement23, document);
    assertNoStatement(statement22, document);

    // check if the text field stores all added and no deleted string values
    texts.remove("dogs");
    texts.add("chicken");
    // FIXME
    // assertTexts(texts, document);

    // TODO: check deletion of the rest

}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 * @throws HistoryException if an error occurs when accessing the history
 *///from w w w.j  a v  a  2  s .  co m
public void update() throws IOException, HistoryException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    String ctgs = RuntimeEnvironment.getInstance().getCtags();
    if (ctgs != null) {
        ctags = new Ctags();
        ctags.setBinary(ctgs);
    }
    if (ctags == null) {
        log.severe("Unable to run ctags! searching definitions will not work!");
    }

    if (ctags != null) {
        String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile();
        if (filename != null) {
            ctags.setCTagsExtraOptionsFile(filename);
        }
    }

    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize());
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk            

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile();
            } else {
                sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir);
            }

            HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);

            String startuid = Util.path2uid(dir, "");
            IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (numDocs > 0) {
                    uidIter = terms.iterator(uidIter);
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid                        
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid);
                    }
                }
                // The code below traverses the tree to get total count.
                int file_cnt = 0;
                if (RuntimeEnvironment.getInstance().isPrintProgress()) {
                    log.log(Level.INFO, "Counting files in {0} ...", dir);
                    file_cnt = indexDown(sourceRoot, dir, true, 0, 0);
                    if (log.isLoggable(Level.INFO)) {
                        log.log(Level.INFO, "Need to process: {0} files for {1}",
                                new Object[] { file_cnt, dir });
                    }
                }

                indexDown(sourceRoot, dir, false, 0, file_cnt);

                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile();
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }
            } finally {
                reader.close();
            }
        }
    } finally {
        if (writer != null) {
            try {
                writer.prepareCommit();
                writer.commit();
                writer.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing writer", e);
            }
        }

        if (ctags != null) {
            try {
                ctags.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing ctags process", e);
            }
        }

        synchronized (lock) {
            running = false;
        }
    }

    if (!isInterrupted() && isDirty()) {
        if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) {
            optimize();
        }
        RuntimeEnvironment env = RuntimeEnvironment.getInstance();
        File timestamp = new File(env.getDataRootFile(), "timestamp");
        String purpose = "used for timestamping the index database.";
        if (timestamp.exists()) {
            if (!timestamp.setLastModified(System.currentTimeMillis())) {
                log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        } else {
            if (!timestamp.createNewFile()) {
                log.log(Level.WARNING, "Failed to create file ''{0}'', {1}",
                        new Object[] { timestamp.getAbsolutePath(), purpose });
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * List all of the files in this index database
 *
 * @throws IOException If an IO error occurs while reading from the database
 *//*from w ww  .  jav a2  s.c o  m*/
public void listFiles() throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory); // open existing index
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.U);
        }
        iter = terms.iterator(iter); // init uid iterator
        while (iter != null && iter.term() != null) {
            log.fine(Util.uid2url(iter.term().utf8ToString()));
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

public void listTokens(int freq) throws IOException {
    IndexReader ireader = null;
    TermsEnum iter = null;//www.java  2 s.c  o m
    Terms terms = null;

    try {
        ireader = DirectoryReader.open(indexDirectory);
        int numDocs = ireader.numDocs();
        if (numDocs > 0) {
            Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
            terms = uFields.terms(QueryBuilder.DEFS);
        }
        iter = terms.iterator(iter); // init uid iterator            
        while (iter != null && iter.term() != null) {
            //if (iter.term().field().startsWith("f")) {
            if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
                log.warning(iter.term().utf8ToString());
            }
            BytesRef next = iter.next();
            if (next == null) {
                iter = null;
            }
            /*} else {
             break;
             }*/
        }
    } finally {

        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occured while closing index reader", e);
            }
        }
    }
}

From source file:org.oscm.search.IndexRequestMasterListenerIT.java

private void assertDocsInIndex(final Class<?> clazz, final String comment, final int expectedNumDocs,
        final int expectedNumIndexedAttributes, final List<String> expectedAttributes) throws Exception {
    Boolean evaluationTookPlace = runTX(new Callable<Boolean>() {

        @Override// w ww  .jav a2s. c o m
        public Boolean call() throws Exception {
            boolean evaluatedIndex = false;
            Session session = dm.getSession();
            if (session != null) {
                FullTextSession fullTextSession = Search.getFullTextSession(session);
                SearchFactory searchFactory = fullTextSession.getSearchFactory();
                IndexReader reader = searchFactory.getIndexReaderAccessor().open(clazz);

                try {
                    assertEquals(comment, expectedNumDocs, reader.numDocs());
                    if (expectedNumDocs > 0) {
                        final FieldInfos indexedFieldNames = ReaderUtil.getMergedFieldInfos(reader);
                        for (String expectedAttr : expectedAttributes) {
                            assertNotNull("attribute " + expectedAttr + " does not exist in index: "
                                    + indexedFieldNames, indexedFieldNames.fieldInfo(expectedAttr));
                        }
                        assertNotNull("attribute \"key\" does not exist in index: " + indexedFieldNames,
                                indexedFieldNames.fieldInfo("key"));
                        assertNotNull(
                                "attribute \"_hibernate_class\" does not exist in index: " + indexedFieldNames,
                                indexedFieldNames.fieldInfo("_hibernate_class"));
                        assertEquals(
                                "More or less attributes indexed than expected, attributes retrieved from index: "
                                        + indexedFieldNames,
                                expectedNumIndexedAttributes + 2, indexedFieldNames.size());
                        evaluatedIndex = true;
                    }
                } finally {
                    searchFactory.getIndexReaderAccessor().close(reader);
                }
            }

            return Boolean.valueOf(evaluatedIndex);
        }
    });

    if (expectedNumDocs > 0) {
        Assert.assertTrue("Index not found, no evaluation took place", evaluationTookPlace.booleanValue());
    }
}

From source file:org.punksearch.crawler.IndexOperator.java

License:Open Source License

public static void deleteByAge(String dirPath, float days) {
    try {/*from   w  w w .jav  a  2 s . c  o  m*/
        final Directory dir = LuceneUtils.dir(dirPath);
        boolean indexExists = IndexReader.indexExists(dir);
        if (!indexExists) {
            return;
        }

        final IndexWriter iw = createIndexWriter(dirPath);
        final IndexReader ir = IndexReader.open(dir);
        IndexSearcher is = new IndexSearcher(ir);

        long min = 0;
        long max = System.currentTimeMillis() - Math.round(days * 1000 * 3600 * 24);

        final TermRangeQuery oldDocsQuery = new TermRangeQuery(IndexFields.INDEXED,
                DateTools.timeToString(min, DateTools.Resolution.MILLISECOND),
                DateTools.timeToString(max, DateTools.Resolution.MILLISECOND), true, false);

        final int docsInReader = ir.numDocs();
        final TopDocs topDocs = is.search(oldDocsQuery, Math.max(1, docsInReader));
        log.info("Deleting by age from index directory. Items to delete: " + topDocs.totalHits);

        iw.deleteDocuments(oldDocsQuery);

        iw.close();
    } catch (IOException ex) {
        log.error("Exception during deleting by age from index directory", ex);
        throw new RuntimeException(ex);
    }
}

From source file:org.punksearch.web.statistics.FileTypeStatistics.java

License:Open Source License

private static Hits extractDocsForType(String type) {
    Filter filter = TypeFilters.get(type);
    try {/*from www.  j  a va2s  . c o  m*/
        IndexSearcher indexSearcher = Core.getIndexReaderHolder().getCurrentSearcher();
        IndexReader indexReader = indexSearcher.getIndexReader();
        final TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), filter, indexReader.numDocs());
        return new Hits(indexSearcher, topDocs);
    } catch (Exception e) {
        log.error("error extractDocsForType", e);
        return null;
    }
}

From source file:org.punksearch.web.statistics.FileTypeStatistics.java

License:Open Source License

public static synchronized Long totalSize() {
    if (totalSizeCache == null || indexChangedAfter(totalSizeCacheTimestamp)) {
        long size = 0;
        try {//from   w w  w .  j  a  va 2 s .  com
            // Rough approximation to the root directories.
            // Obviously, non-latin1 directory names slip through the filter, we'll catch them later
            // Maybe we should use some ranges with UTF8-16 characters... TODO
            String approxQuery = "*:* -Path:{a TO Z*} -Path:{0 TO 9*}";
            QueryParser parser = new QueryParser(LuceneVersion.VERSION, "Host",
                    new SimpleAnalyzer(LuceneVersion.VERSION));
            Query query = parser.parse(approxQuery);
            IndexSearcher indexSearcher = Core.getIndexReaderHolder().getCurrentSearcher();
            IndexReader indexReader = indexSearcher.getIndexReader();
            final TopDocs topDocs = indexSearcher.search(query, indexReader.numDocs());
            Hits hits = new Hits(indexSearcher, topDocs);
            for (int i = 0; i < hits.length(); i++) {
                Document doc = hits.doc(i);
                String path = doc.get(IndexFields.PATH);
                if (!path.equals("/")) {
                    continue;
                }
                size += Long.parseLong(doc.get(IndexFields.SIZE));
            }
        } catch (Exception e) {
            log.error("", e);
        }
        totalSizeCache = size;
        totalSizeCacheTimestamp = System.currentTimeMillis();
    }
    return totalSizeCache;
}

From source file:org.roosster.store.EntryStore.java

License:Open Source License

/**
 *//*www  .  ja v a 2  s . c o m*/
public int getDocNum() throws IOException {
    if (!isInitialized())
        throw new IllegalStateException("Database must be initialized before use!");

    int numdocs = 0;
    IndexReader reader = null;
    try {
        reader = getReader();
        numdocs = reader.numDocs();
    } finally {
        if (reader != null)
            reader.close();
    }

    return numdocs;
}