Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:ro.ranking.technique.bm25.BM25BooleanScorer.java

License:Apache License

/**
 * Build a BM25BooleanScorer composed of atoms that are BM25TermScorers.
 * The scorer will give the score for a boolean formula combining the subscorers.
 * @param reader//from  w  w w .  j  ava2s.  c om
 * @param should - array of BM25TermScorers appearing as SHOULD
 * @param must - array of BM25TermScorers appearing as MUST
 * @param not - array of BM25TermScorers appearing as NOT
 * @param similarity
 * @throws IOException
 */
public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
        BooleanTermQuery[] not, Similarity similarity) throws IOException {
    super(similarity);

    this.ndocs = reader.numDocs();

    if (should != null && should.length > 0) {

        Scorer[] shouldScorer = new Scorer[should.length];
        for (int i = 0; i < shouldScorer.length; i++) {
            shouldScorer[i] = new BM25TermScorer(reader, should[i].termQuery, similarity);
        }
        this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);

    } else
        this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (must != null && must.length > 0) {
        Scorer[] mustScorer = new Scorer[must.length];
        for (int i = 0; i < mustScorer.length; i++) {
            mustScorer[i] = new BM25TermScorer(reader, must[i].termQuery, similarity);
        }

        this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
    } else
        this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (not != null && not.length > 0) {
        Scorer[] notScorer = new Scorer[not.length];
        for (int i = 0; i < notScorer.length; i++) {
            notScorer[i] = new BM25TermScorer(reader, not[i].termQuery, similarity);
        }

        this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
    } else
        this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);
}

From source file:ro.ranking.technique.bm25.BM25BooleanScorer.java

License:Apache License

/**
 * Build a BM25BooleanScorer composed of atoms that are BM25FTermScorers.
 * The scorer will give the score for a boolean formula combining the subscorers.
 * Each subscorer combines the fields' scores using the given boosts and bParams.
 * @param reader/*from  ww w .ja va  2s .c  o m*/
 * @param should - array of BM25FTermScorers appearing as SHOULD
 * @param must - array of BM25FTermScorers appearing as MUST
 * @param not - array of BM25FTermScorers appearing as NOT
 * @param similarity
 * @throws IOException
 */
public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must,
        BooleanTermQuery[] not, Similarity similarity, String[] fields, float[] boosts, float[] bParams)
        throws IOException {
    super(similarity);
    this.ndocs = reader.numDocs();
    if (should != null && should.length > 0) {
        Scorer[] shouldScorer = new Scorer[should.length];
        for (int i = 0; i < shouldScorer.length; i++) {
            shouldScorer[i] = new BM25FTermScorer(reader, should[i].termQuery, fields, boosts, bParams,
                    similarity);
        }

        this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer);
    } else
        this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (must != null && must.length > 0) {
        Scorer[] mustScorer = new Scorer[must.length];
        for (int i = 0; i < mustScorer.length; i++) {
            mustScorer[i] = new BM25FTermScorer(reader, must[i].termQuery, fields, boosts, bParams, similarity);
        }

        this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer);
    } else
        this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

    if (not != null && not.length > 0) {
        Scorer[] notScorer = new Scorer[not.length];
        for (int i = 0; i < notScorer.length; i++) {
            notScorer[i] = new BM25FTermScorer(reader, not[i].termQuery, fields, boosts, bParams, similarity);
        }

        this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs);
    } else
        this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs);

}

From source file:ro.ranking.technique.bm25.BM25FTermScorer.java

License:Apache License

public BM25FTermScorer(IndexReader reader, TermQuery term, String[] fields, float[] boosts, float[] bParams,
        Similarity similarity) {// w  w w  .ja  va  2s  .c o  m
    super(similarity);
    this.fields = fields;
    this.boosts = boosts;
    this.bParam = bParams;
    len = fields.length;
    this.termDocs = new TermDocs[len];
    this.termDocsNext = new boolean[len];
    this.norms = new byte[len][];
    this.averageLengths = new float[len];
    this.K1 = BM25FParameters.getK1();
    this.termBoost = term.getBoost();
    this.numDocs = reader.numDocs();
    this.termText = term.getTerm().text();

    try {
        this.docFreq = reader.docFreq(new Term(BM25FParameters.getIdfField(), termText));
        for (int i = 0; i < len; i++) {
            String field = this.fields[i];
            this.termDocs[i] = reader.termDocs(new Term(field, termText));
            norms[i] = reader.norms(field);
            averageLengths[i] = BM25FParameters.getAverageLength(field);
        }
        this.idf = this.getSimilarity().idf(docFreq, numDocs);
    } catch (IOException e) {
    }

}

From source file:ro.ranking.technique.bm25.BM25SingleBooleanScorer.java

License:Apache License

/**
 * Fill scorer array with BM25TermScorers.
 * @param reader/*from www  .j av a2s .  c  o  m*/
 * @param termQuery
 * @param similarity
 * @throws IOException
 */
public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity)
        throws IOException {
    super(similarity);

    Scorer[] scorer = new Scorer[termQuery.length];
    for (int i = 0; i < scorer.length; i++) {
        scorer[i] = new BM25TermScorer(reader, termQuery[i].termQuery, similarity);
    }

    if (termQuery[0].occur == BooleanClause.Occur.MUST)
        this.booleanScorer = new MustBooleanScorer(similarity, scorer);
    else if (termQuery[0].occur == BooleanClause.Occur.SHOULD)
        this.booleanScorer = new ShouldBooleanScorer(similarity, scorer);
    else
        this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs());

}

From source file:ro.ranking.technique.bm25.BM25SingleBooleanScorer.java

License:Apache License

/**
 * Fill scorer array with BM25FTermScorers using the fields, boosts and bParams parameters.
 * @param reader//from w ww. jav  a 2 s  .  c  om
 * @param termQuery
 * @param similarity
 * @param fields
 * @param boosts
 * @param bParams
 * @throws IOException
 */
public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity,
        String[] fields, float[] boosts, float[] bParams) throws IOException {
    super(similarity);
    Scorer[] scorer = new Scorer[termQuery.length];

    for (int i = 0; i < scorer.length; i++) {
        scorer[i] = new BM25FTermScorer(reader, termQuery[i].termQuery, fields, boosts, bParams, similarity);
    }

    if (termQuery[0].occur == BooleanClause.Occur.MUST)
        this.booleanScorer = new MustBooleanScorer(similarity, scorer);
    else if (termQuery[0].occur == BooleanClause.Occur.SHOULD)
        this.booleanScorer = new ShouldBooleanScorer(similarity, scorer);
    else
        this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs());

}

From source file:ro.ranking.technique.bm25.BM25TermScorer.java

License:Apache License

public BM25TermScorer(IndexReader reader, TermQuery term, Similarity similarity) throws IOException {
    super(similarity);
    this.reader = reader;
    this.term = term;
    this.idf = this.getSimilarity().idf(reader.docFreq(term.getTerm()), reader.numDocs());
    this.norm = this.reader.norms(this.term.getTerm().field());
    this.avgLength = BM25Parameters.getAverageLength(this.term.getTerm().field());
    this.b = BM25Parameters.getB();
    this.k1 = BM25Parameters.getK1();
    this.termDocs = this.reader.termDocs(this.term.getTerm());
}

From source file:searchenginelucene.LuceneSearchEngine.java

public static void getTermFrequencyPairs(String indexLocation) throws IOException {
    Map<String, Integer> termfrequency = new HashMap<String, Integer>();
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
    // Temporary location to store the interediate term frequency results
    PrintWriter writer_tf = new PrintWriter("..\\terf-frequency.csv");

    int docnum = reader.numDocs();
    // System.out.println("docnum:" + docnum);
    Fields fields1 = MultiFields.getFields(reader);
    for (String field : fields1) {
        Terms terms1 = fields1.terms("contents");
        TermsEnum termsEnum = terms1.iterator(null);
        int noWords = 0;

        while (termsEnum.next() != null) {
            noWords++;//from  w  w w .  ja va2  s . co  m
            int count = 0;
            DocsEnum docsEnum = termsEnum.docs(null, null);
            int docIdEnum;
            //System.out.print("The term is->" + termsEnum.term().utf8ToString());
            while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                count += docsEnum.freq();
            }
            //System.out.println("count:" + count);
            termfrequency.put(termsEnum.term().utf8ToString(), count);
        }
        System.out.println("Total Number of Words:" + noWords);
    }

    // =========================================================
    // Write the terms anf their frequencies in a file
    // =========================================================
    for (String key : termfrequency.keySet()) {
        writer_tf.print(key + ",");
        writer_tf.println(termfrequency.get(key));
    }
    writer_tf.close();

}

From source file:sift.DocIdComparator.java

@Override
void loadQueries() throws Exception {

    int nnId = 0;
    float nnDist = 0;
    boolean eval = Boolean.parseBoolean(prop.getProperty("eval", "false"));

    File indexDir = new File(prop.getProperty("query.index"));
    IndexReader siftVecReader = DirectoryReader.open(FSDirectory.open(indexDir.toPath()));

    int numDocs = siftVecReader.numDocs();
    int numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions"));

    for (int i = 0; i < numDocs; i++) {
        Document d = siftVecReader.document(i);

        if (eval) {
            nnId = Integer.parseInt(d.get(QuerySiftVecIndexer.FIELD_NN_ID));
            nnDist = Float.parseFloat(d.get(QuerySiftVecIndexer.FIELD_NN_DIST));
        }/*from  w ww  . java 2s. c  o  m*/

        qvecs.add(new QueryVector(d, numDimensions, DocVector.numIntervals, nnId, (float) Math.sqrt(nnDist)));
    }

    Collections.sort(qvecs, new DocIdComparator());
}

From source file:spell.SpellIndex.java

License:Apache License

/**
 * Returns the number of words in the index.
 *///from ww w  . j a  v  a2 s .  c  om
public int getWordCount() throws IOException {
    IndexReader reader = getReader();
    int result = reader.numDocs();
    reader.close();

    return result;
}

From source file:stroom.search.server.IndexShardSearcherImpl.java

License:Apache License

private IndexReader openWithWriter(final IndexWriter indexWriter) throws IOException {
    final IndexReader indexReader = DirectoryReader.open(indexWriter, false);

    // Check the document count in the index matches the DB. We are using
    // the writer so chances are there is a mismatch.
    final int actualDocumentCount = indexReader.numDocs();
    if (indexShard.getDocumentCount() != actualDocumentCount) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("openWithWriter() - Mismatch document count.  Index says " + actualDocumentCount
                    + " DB says " + indexShard.getDocumentCount());
        }// w w w.  ja va2 s.  c o  m
    }

    return indexReader;
}