Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:org.neo4j.kernel.api.impl.schema.sampler.NonUniqueLuceneIndexSampler.java

License:Open Source License

@Override
protected IndexSample performSampling() throws IndexNotFoundKernelException {
    NonUniqueIndexSampler sampler = new NonUniqueIndexSampler(indexSamplingConfig.sampleSizeLimit());
    IndexReader indexReader = indexSearcher.getIndexReader();
    for (LeafReaderContext readerContext : indexReader.leaves()) {
        try {/*  ww w.ja v  a 2 s . co  m*/
            Set<String> fieldNames = getFieldNamesToSample(readerContext);
            for (String fieldName : fieldNames) {
                Terms terms = readerContext.reader().terms(fieldName);
                if (terms != null) {
                    TermsEnum termsEnum = LuceneDocumentStructure.originalTerms(terms, fieldName);
                    BytesRef termsRef;
                    while ((termsRef = termsEnum.next()) != null) {
                        sampler.include(termsRef.utf8ToString(), termsEnum.docFreq());
                        checkCancellation();
                    }
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    return sampler.result(indexReader.numDocs());
}

From source file:org.netbeans.modules.jackpot30.indexer.usages.IndexerImplTest.java

License:Open Source License

public void testRepeatedIndexing() throws IOException {
    final FileObject root = FileUtil.toFileObject(getWorkDir());
    FileObject testFile = FileUtil.createData(root, "Test.java");
    copyToFile(testFile, "public class Test {}");

    Directory store = new RAMDirectory();
    IndexWriter iw = new IndexWriter(store, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
    IndexAccessor.current = new IndexAccessor(iw, root);

    iw.addDocument(fakeDocument(testFile));

    doIndex(root, testFile);/*from   w ww .  j a va2 s . c  o  m*/

    iw.close();
    IndexReader ir = IndexReader.open(store);

    int expectedDocumentsCount = ir.numDocs();

    assertEquals(3 + 1, expectedDocumentsCount);

    store = new RAMDirectory();
    iw = new IndexWriter(store, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
    IndexAccessor.current = new IndexAccessor(iw, root);

    iw.addDocument(fakeDocument(testFile));

    doIndex(root, testFile);
    doIndex(root, testFile);

    iw.close();
    ir = IndexReader.open(store);

    assertEquals(expectedDocumentsCount, ir.numDocs());
}

From source file:org.ninit.models.bm25.BM25BooleanScorer.java

License:Apache License

public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
        BooleanTermQuery[] not, Similarity similarity) throws IOException {
    super(similarity);
    this.ndocs = reader.numDocs();

    if (should != null && should.length > 0) {

        Scorer[] shouldScorer = new Scorer[should.length];
        for (int i = 0; i < shouldScorer.length; i++) {
            shouldScorer[i] = new BM25TermScorer(reader, should[i].termQuery, similarity);
        }//from   w w w.  j a v a2 s  . co m
        this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);

    } else
        this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (must != null && must.length > 0) {
        Scorer[] mustScorer = new Scorer[must.length];
        for (int i = 0; i < mustScorer.length; i++) {
            mustScorer[i] = new BM25TermScorer(reader, must[i].termQuery, similarity);
        }

        this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
    } else
        this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (not != null && not.length > 0) {
        Scorer[] notScorer = new Scorer[not.length];
        for (int i = 0; i < notScorer.length; i++) {
            notScorer[i] = new BM25TermScorer(reader, not[i].termQuery, similarity);
        }

        this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
    } else
        this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);
}

From source file:org.ninit.models.bm25.BM25BooleanScorer.java

License:Apache License

public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
        BooleanTermQuery[] not, Similarity similarity, String[] fields, float[] boosts, float[] bParams)
        throws IOException {
    super(similarity);
    this.ndocs = reader.numDocs();
    if (should != null && should.length > 0) {
        Scorer[] shouldScorer = new Scorer[should.length];
        for (int i = 0; i < shouldScorer.length; i++) {
            shouldScorer[i] = new BM25FTermScorer(reader, should[i].termQuery, fields, boosts, bParams,
                    similarity);//from   ww  w  .j  a  va  2 s. co m
        }

        this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);
    } else
        this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (must != null && must.length > 0) {
        Scorer[] mustScorer = new Scorer[must.length];
        for (int i = 0; i < mustScorer.length; i++) {
            mustScorer[i] = new BM25FTermScorer(reader, must[i].termQuery, fields, boosts, bParams, similarity);
        }

        this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
    } else
        this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (not != null && not.length > 0) {
        Scorer[] notScorer = new Scorer[not.length];
        for (int i = 0; i < notScorer.length; i++) {
            notScorer[i] = new BM25FTermScorer(reader, not[i].termQuery, fields, boosts, bParams, similarity);
        }

        this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
    } else
        this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

}

From source file:org.ninit.models.bm25.BM25SingleBooleanScorer.java

License:Apache License

public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity)
        throws IOException {
    super(similarity);

    Scorer[] scorer = new Scorer[termQuery.length];
    for (int i = 0; i < scorer.length; i++) {
        scorer[i] = new BM25TermScorer(reader, termQuery[i].termQuery, similarity);
    }//  w w w . ja  v a 2 s.c  o m

    if (termQuery[0].occur == BooleanClause.Occur.MUST)
        this.booleanScorer = new MustBooleanScorer(similarity, scorer);
    else if (termQuery[0].occur == BooleanClause.Occur.SHOULD)
        this.booleanScorer = new ShouldBooleanScorer(similarity, scorer);
    else
        this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs());

}

From source file:org.ninit.models.bm25.BM25SingleBooleanScorer.java

License:Apache License

public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity,
        String[] fields, float[] boosts, float[] bParams) throws IOException {
    super(similarity);
    Scorer[] scorer = new Scorer[termQuery.length];

    for (int i = 0; i < scorer.length; i++) {
        scorer[i] = new BM25FTermScorer(reader, termQuery[i].termQuery, fields, boosts, bParams, similarity);
    }/*from w  w  w .  jav  a  2 s.  c  o m*/

    if (termQuery[0].occur == BooleanClause.Occur.MUST)
        this.booleanScorer = new MustBooleanScorer(similarity, scorer);
    else if (termQuery[0].occur == BooleanClause.Occur.SHOULD)
        this.booleanScorer = new ShouldBooleanScorer(similarity, scorer);
    else
        this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs());

}

From source file:org.ninit.models.bm25.BM25TermScorer.java

License:Apache License

public BM25TermScorer(IndexReader reader, TermQuery term, Similarity similarity) throws IOException {
    super(similarity);
    this.reader = reader;
    this.term = term;
    this.idf = this.getSimilarity().idf(reader.docFreq(term.getTerm()), reader.numDocs());
    this.norm = this.reader.norms(this.term.getTerm().field());
    this.av_length = BM25Parameters.getAverageLength(this.term.getTerm().field());
    this.b = BM25Parameters.getB();
    this.k1 = BM25Parameters.getK1();
    this.termDocs = this.reader.termDocs(this.term.getTerm());

}

From source file:org.niord.core.LuceneTest.java

License:Apache License

@Test
public void testLucene() throws IOException, ParseException {

    // Something seems to have change in Lucene. In earlier versions (e.g. 4.6), you could
    // use the StandardAnalyzer and quoted phrase searches including stop words (e.g. "the")
    // would still work. See example below:
    // However, in the current version of Lucene, I can only get this scenario to work with
    // SimpleAnalyzer for the QueryParser :-(

    Directory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, iwc);

    Document doc = new Document();

    FieldType ft = new FieldType();
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    ft.setTokenized(true);/*from   ww  w . ja  v a2s  . c  o  m*/
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.freeze();

    doc.add(new Field("message", "The quick brown fox jumps over the lazy dog", ft));
    writer.addDocument(doc);
    writer.close();

    IndexReader reader = DirectoryReader.open(directory);
    assertEquals(1, reader.numDocs());

    analyzer = new SimpleAnalyzer();
    QueryParser parser = new ComplexPhraseQueryParser("message", analyzer);
    parser.setDefaultOperator(QueryParser.OR_OPERATOR);
    parser.setAllowLeadingWildcard(true); // NB: Expensive!
    Query query = parser.parse("\"over the lazy\" +quick -goat bro*");

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs results = searcher.search(query, 10000);

    assertEquals(1, results.scoreDocs.length);

    reader.close();
}

From source file:org.nlp4l.lucene.BuddyWordsFinderTermFilter.java

License:Apache License

public final void start(IndexReader reader, String field, BytesRef term) throws IOException {
    this.reader = reader;
    numDocs = reader.numDocs();
    // TODO: can we ignore term value?
    currentTE = MultiFields.getTerms(reader, field).iterator();
    if (currentTE != null) {
        currentTE.seekCeil(term);/* ww w . j  a v  a 2s.  c o m*/
        currentTermDocFreq = currentTE.docFreq();
    }
}

From source file:org.ohdsi.usagi.tests.TestLucene.java

License:Apache License

public static void main(String[] args) throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    //Analyzer analyzer = new UsagiAnalyzer();
    FieldType textVectorField = new FieldType();
    textVectorField.setIndexed(true);//w w  w.j  ava2  s .  c o m
    textVectorField.setTokenized(true);
    textVectorField.setStoreTermVectors(true);
    textVectorField.setStoreTermVectorPositions(false);
    textVectorField.setStoreTermVectorPayloads(false);
    textVectorField.setStoreTermVectorOffsets(false);
    textVectorField.setStored(true);
    textVectorField.freeze();

    File indexFolder = new File(folder);
    if (indexFolder.exists())
        DirectoryUtilities.deleteDir(indexFolder);

    Directory dir = FSDirectory.open(indexFolder);

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new Field("F", "word1 word2 w3 word4", textVectorField));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("F", "word1 word2 w3", textVectorField));
    writer.addDocument(doc);

    writer.close();

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder)));
    for (int i = 0; i < reader.numDocs(); i++) {
        TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null);
        BytesRef text;
        while ((text = termsEnum.next()) != null) {
            System.out.print(text.utf8ToString() + ",");
        }
        System.out.println();
    }
    IndexSearcher searcher = new IndexSearcher(reader);

    // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
    // mlt.setMinTermFreq(0);
    // mlt.setMinDocFreq(0);
    // mlt.setMaxDocFreq(9999);
    // mlt.setMinWordLen(0);
    // mlt.setMaxWordLen(9999);
    // mlt.setMaxDocFreqPct(100);
    // mlt.setMaxNumTokensParsed(9999);
    // mlt.setMaxQueryTerms(9999);
    // mlt.setStopWords(null);
    // mlt.setFieldNames(new String[] { "F" });
    // mlt.setAnalyzer(new UsagiAnalyzer());
    // Query query = mlt.like("F", new StringReader("Systolic blood pressure"));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer);
    Query query = parser.parse("word1");

    Explanation explanation = searcher.explain(query, 0);
    print(explanation);
    System.out.println();
    explanation = searcher.explain(query, 1);
    print(explanation);
    System.out.println();

    TopDocs topDocs = searcher.search(query, 99);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F"));
    }
}