Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:ro.ranking.technique.bm25.BM25BooleanScorer.java

License:Apache License

/**
 * Build a BM25BooleanScorer composed of atoms that are BM25TermScorers.
 * The scorer will give the score for a boolean formula combining the subscorers.
 * @param reader//from  w  w w .  j  ava2s.  c om
 * @param should - array of BM25TermScorers appearing as SHOULD
 * @param must - array of BM25TermScorers appearing as MUST
 * @param not - array of BM25TermScorers appearing as NOT
 * @param similarity
 * @throws IOException
 */
public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
        BooleanTermQuery[] not, Similarity similarity) throws IOException {
    super(similarity);

    this.ndocs = reader.numDocs();

    if (should != null && should.length > 0) {

        Scorer[] shouldScorer = new Scorer[should.length];
        for (int i = 0; i < shouldScorer.length; i++) {
            shouldScorer[i] = new BM25TermScorer(reader, should[i].termQuery, similarity);
        }
        this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);

    } else
        this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (must != null && must.length > 0) {
        Scorer[] mustScorer = new Scorer[must.length];
        for (int i = 0; i < mustScorer.length; i++) {
            mustScorer[i] = new BM25TermScorer(reader, must[i].termQuery, similarity);
        }

        this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
    } else
        this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (not != null && not.length > 0) {
        Scorer[] notScorer = new Scorer[not.length];
        for (int i = 0; i < notScorer.length; i++) {
            notScorer[i] = new BM25TermScorer(reader, not[i].termQuery, similarity);
        }

        this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
    } else
        this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);
}

From source file:ro.ranking.technique.bm25.BM25BooleanScorer.java

License:Apache License

/**
 * Build a BM25BooleanScorer composed of atoms that are BM25FTermScorers.
 * The scorer will give the score for a boolean formula combining the subscorers.
 * Each subscorer combines the fields' scores using the given boosts and bParams.
 * @param reader/*from  ww w .ja va  2s .c  o m*/
 * @param should - array of BM25FTermScorers appearing as SHOULD
 * @param must - array of BM25FTermScorers appearing as MUST
 * @param not - array of BM25FTermScorers appearing as NOT
 * @param similarity
 * @throws IOException
 */
public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
        BooleanTermQuery[] not, Similarity similarity, String[] fields, float[] boosts, float[] bParams)
        throws IOException {
    super(similarity);
    this.ndocs = reader.numDocs();
    if (should != null && should.length > 0) {
        Scorer[] shouldScorer = new Scorer[should.length];
        for (int i = 0; i < shouldScorer.length; i++) {
            shouldScorer[i] = new BM25FTermScorer(reader, should[i].termQuery, fields, boosts, bParams,
                    similarity);
        }

        this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);
    } else
        this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (must != null && must.length > 0) {
        Scorer[] mustScorer = new Scorer[must.length];
        for (int i = 0; i < mustScorer.length; i++) {
            mustScorer[i] = new BM25FTermScorer(reader, must[i].termQuery, fields, boosts, bParams, similarity);
        }

        this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
    } else
        this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (not != null && not.length > 0) {
        Scorer[] notScorer = new Scorer[not.length];
        for (int i = 0; i < notScorer.length; i++) {
            notScorer[i] = new BM25FTermScorer(reader, not[i].termQuery, fields, boosts, bParams, similarity);
        }

        this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
    } else
        this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

}

From source file:ro.ranking.technique.bm25.BM25FTermScorer.java

License:Apache License

public BM25FTermScorer(IndexReader reader, TermQuery term, String[] fields, float[] boosts, float[] bParams,
        Similarity similarity) {// w  w w  .ja  va  2s  .c o  m
    super(similarity);
    this.fields = fields;
    this.boosts = boosts;
    this.bParam = bParams;
    len = fields.length;
    this.termDocs = new TermDocs[len];
    this.termDocsNext = new boolean[len];
    this.norms = new byte[len][];
    this.averageLengths = new float[len];
    this.K1 = BM25FParameters.getK1();
    this.termBoost = term.getBoost();
    this.numDocs = reader.numDocs();
    this.termText = term.getTerm().text();

    try {
        this.docFreq = reader.docFreq(new Term(BM25FParameters.getIdfField(), termText));
        for (int i = 0; i < len; i++) {
            String field = this.fields[i];
            this.termDocs[i] = reader.termDocs(new Term(field, termText));
            norms[i] = reader.norms(field);
            averageLengths[i] = BM25FParameters.getAverageLength(field);
        }
        this.idf = this.getSimilarity().idf(docFreq, numDocs);
    } catch (IOException e) {
    }

}

From source file:ro.ranking.technique.bm25.BM25SingleBooleanScorer.java

License:Apache License

/**
 * Fill scorer array with BM25TermScorers.
 * @param reader/*from www  .j av a2s .  c  o  m*/
 * @param termQuery
 * @param similarity
 * @throws IOException
 */
public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity)
        throws IOException {
    super(similarity);

    Scorer[] scorer = new Scorer[termQuery.length];
    for (int i = 0; i < scorer.length; i++) {
        scorer[i] = new BM25TermScorer(reader, termQuery[i].termQuery, similarity);
    }

    if (termQuery[0].occur == BooleanClause.Occur.MUST)
        this.booleanScorer = new MustBooleanScorer(similarity, scorer);
    else if (termQuery[0].occur == BooleanClause.Occur.SHOULD)
        this.booleanScorer = new ShouldBooleanScorer(similarity, scorer);
    else
        this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs());

}

From source file:ro.ranking.technique.bm25.BM25SingleBooleanScorer.java

License:Apache License

/**
 * Fill scorer array with BM25FTermScorers using the fields, boosts and bParams parameters.
 * @param reader//from w ww. jav  a 2 s  .  c  om
 * @param termQuery
 * @param similarity
 * @param fields
 * @param boosts
 * @param bParams
 * @throws IOException
 */
public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity,
        String[] fields, float[] boosts, float[] bParams) throws IOException {
    super(similarity);
    Scorer[] scorer = new Scorer[termQuery.length];

    for (int i = 0; i < scorer.length; i++) {
        scorer[i] = new BM25FTermScorer(reader, termQuery[i].termQuery, fields, boosts, bParams, similarity);
    }

    if (termQuery[0].occur == BooleanClause.Occur.MUST)
        this.booleanScorer = new MustBooleanScorer(similarity, scorer);
    else if (termQuery[0].occur == BooleanClause.Occur.SHOULD)
        this.booleanScorer = new ShouldBooleanScorer(similarity, scorer);
    else
        this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs());

}

From source file:ro.ranking.technique.bm25.BM25TermScorer.java

License:Apache License

public BM25TermScorer(IndexReader reader, TermQuery term, Similarity similarity) throws IOException {
    super(similarity);
    this.reader = reader;
    this.term = term;
    this.idf = this.getSimilarity().idf(reader.docFreq(term.getTerm()), reader.numDocs());
    this.norm = this.reader.norms(this.term.getTerm().field());
    this.avgLength = BM25Parameters.getAverageLength(this.term.getTerm().field());
    this.b = BM25Parameters.getB();
    this.k1 = BM25Parameters.getK1();
    this.termDocs = this.reader.termDocs(this.term.getTerm());
}

From source file:searchenginelucene.LuceneSearchEngine.java

public static void getTermFrequencyPairs(String indexLocation) throws IOException {
    Map<String, Integer> termfrequency = new HashMap<String, Integer>();
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    // Temporary location to store the interediate term frequency results
    PrintWriter writer_tf = new PrintWriter("..\\terf-frequency.csv");

    int docnum = reader.numDocs();
    // System.out.println("docnum:" + docnum);
    Fields fields1 = MultiFields.getFields(reader);
    for (String field : fields1) {
        Terms terms1 = fields1.terms("contents");
        TermsEnum termsEnum = terms1.iterator(null);
        int noWords = 0;

        while (termsEnum.next() != null) {
            noWords++;//from  w  w w .  ja va2  s . co  m
            int count = 0;
            DocsEnum docsEnum = termsEnum.docs(null, null);
            int docIdEnum;
            //System.out.print("The term is->" + termsEnum.term().utf8ToString());
            while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                count += docsEnum.freq();
            }
            //System.out.println("count:" + count);
            termfrequency.put(termsEnum.term().utf8ToString(), count);
        }
        System.out.println("Total Number of Words:" + noWords);
    }

    // =========================================================
    // Write the terms anf their frequencies in a file
    // =========================================================
    for (String key : termfrequency.keySet()) {
        writer_tf.print(key + ",");
        writer_tf.println(termfrequency.get(key));
    }
    writer_tf.close();

}

From source file:sift.DocIdComparator.java

@Override
void loadQueries() throws Exception {

    int nnId = 0;
    float nnDist = 0;
    boolean eval = Boolean.parseBoolean(prop.getProperty("eval", "false"));

    File indexDir = new File(prop.getProperty("query.index"));
    IndexReader siftVecReader = DirectoryReader.open(FSDirectory.open(indexDir.toPath()));

    int numDocs = siftVecReader.numDocs();
    int numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions"));

    for (int i = 0; i < numDocs; i++) {
        Document d = siftVecReader.document(i);

        if (eval) {
            nnId = Integer.parseInt(d.get(QuerySiftVecIndexer.FIELD_NN_ID));
            nnDist = Float.parseFloat(d.get(QuerySiftVecIndexer.FIELD_NN_DIST));
        }/*from  w ww  . java 2s. c  o  m*/

        qvecs.add(new QueryVector(d, numDimensions, DocVector.numIntervals, nnId, (float) Math.sqrt(nnDist)));
    }

    Collections.sort(qvecs, new DocIdComparator());
}

From source file:spell.SpellIndex.java

License:Apache License

/**
 * Returns the number of words in the index.
 *///from ww w  . j a  v  a2 s .  c  om
public int getWordCount() throws IOException {
    IndexReader reader = getReader();
    int result = reader.numDocs();
    reader.close();

    return result;
}

From source file:stroom.search.server.IndexShardSearcherImpl.java

License:Apache License

private IndexReader openWithWriter(final IndexWriter indexWriter) throws IOException {
    final IndexReader indexReader = DirectoryReader.open(indexWriter, false);

    // Check the document count in the index matches the DB. We are using
    // the writer so chances are there is a mismatch.
    final int actualDocumentCount = indexReader.numDocs();
    if (indexShard.getDocumentCount() != actualDocumentCount) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("openWithWriter() - Mismatch document count.  Index says " + actualDocumentCount
                    + " DB says " + indexShard.getDocumentCount());
        }// w w w.  ja va2 s.  c o  m
    }

    return indexReader;
}