List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:ro.ranking.technique.bm25.BM25BooleanScorer.java
License:Apache License
/** * Build a BM25BooleanScorer composed of atoms that are BM25TermScorers. * The scorer will give the score for a boolean formula combining the subscorers. * @param reader//from w w w . j ava2s. c om * @param should - array of BM25TermScorers appearing as SHOULD * @param must - array of BM25TermScorers appearing as MUST * @param not - array of BM25TermScorers appearing as NOT * @param similarity * @throws IOException */ public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must, BooleanTermQuery[] not, Similarity similarity) throws IOException { super(similarity); this.ndocs = reader.numDocs(); if (should != null && should.length > 0) { Scorer[] shouldScorer = new Scorer[should.length]; for (int i = 0; i < shouldScorer.length; i++) { shouldScorer[i] = new BM25TermScorer(reader, should[i].termQuery, similarity); } this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer); } else this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs); if (must != null && must.length > 0) { Scorer[] mustScorer = new Scorer[must.length]; for (int i = 0; i < mustScorer.length; i++) { mustScorer[i] = new BM25TermScorer(reader, must[i].termQuery, similarity); } this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer); } else this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs); if (not != null && not.length > 0) { Scorer[] notScorer = new Scorer[not.length]; for (int i = 0; i < notScorer.length; i++) { notScorer[i] = new BM25TermScorer(reader, not[i].termQuery, similarity); } this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs); } else this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs); }
From source file:ro.ranking.technique.bm25.BM25BooleanScorer.java
License:Apache License
/** * Build a BM25BooleanScorer composed of atoms that are BM25FTermScorers. * The scorer will give the score for a boolean formula combining the subscorers. * Each subscorer combines the fields' scores using the given boosts and bParams. * @param reader/*from ww w .ja va 2s .c o m*/ * @param should - array of BM25FTermScorers appearing as SHOULD * @param must - array of BM25FTermScorers appearing as MUST * @param not - array of BM25FTermScorers appearing as NOT * @param similarity * @throws IOException */ public BM25BooleanScorer(IndexReader reader, BooleanTermQuery[] should, BooleanTermQuery[] must, BooleanTermQuery[] not, Similarity similarity, String[] fields, float[] boosts, float[] bParams) throws IOException { super(similarity); this.ndocs = reader.numDocs(); if (should != null && should.length > 0) { Scorer[] shouldScorer = new Scorer[should.length]; for (int i = 0; i < shouldScorer.length; i++) { shouldScorer[i] = new BM25FTermScorer(reader, should[i].termQuery, fields, boosts, bParams, similarity); } this.shouldBooleanScorer = new ShouldBooleanScorer(similarity, shouldScorer); } else this.shouldBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs); if (must != null && must.length > 0) { Scorer[] mustScorer = new Scorer[must.length]; for (int i = 0; i < mustScorer.length; i++) { mustScorer[i] = new BM25FTermScorer(reader, must[i].termQuery, fields, boosts, bParams, similarity); } this.mustBooleanScorer = new MustBooleanScorer(similarity, mustScorer); } else this.mustBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs); if (not != null && not.length > 0) { Scorer[] notScorer = new Scorer[not.length]; for (int i = 0; i < notScorer.length; i++) { notScorer[i] = new BM25FTermScorer(reader, not[i].termQuery, fields, boosts, bParams, similarity); } this.notBooleanScorer = new NotBooleanScorer(similarity, notScorer, this.ndocs); } else this.notBooleanScorer = new MatchAllBooleanScorer(similarity, this.ndocs); }
From source file:ro.ranking.technique.bm25.BM25FTermScorer.java
License:Apache License
public BM25FTermScorer(IndexReader reader, TermQuery term, String[] fields, float[] boosts, float[] bParams, Similarity similarity) {// w w w .ja va 2s .c o m super(similarity); this.fields = fields; this.boosts = boosts; this.bParam = bParams; len = fields.length; this.termDocs = new TermDocs[len]; this.termDocsNext = new boolean[len]; this.norms = new byte[len][]; this.averageLengths = new float[len]; this.K1 = BM25FParameters.getK1(); this.termBoost = term.getBoost(); this.numDocs = reader.numDocs(); this.termText = term.getTerm().text(); try { this.docFreq = reader.docFreq(new Term(BM25FParameters.getIdfField(), termText)); for (int i = 0; i < len; i++) { String field = this.fields[i]; this.termDocs[i] = reader.termDocs(new Term(field, termText)); norms[i] = reader.norms(field); averageLengths[i] = BM25FParameters.getAverageLength(field); } this.idf = this.getSimilarity().idf(docFreq, numDocs); } catch (IOException e) { } }
From source file:ro.ranking.technique.bm25.BM25SingleBooleanScorer.java
License:Apache License
/** * Fill scorer array with BM25TermScorers. * @param reader/*from www .j av a2s . c o m*/ * @param termQuery * @param similarity * @throws IOException */ public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity) throws IOException { super(similarity); Scorer[] scorer = new Scorer[termQuery.length]; for (int i = 0; i < scorer.length; i++) { scorer[i] = new BM25TermScorer(reader, termQuery[i].termQuery, similarity); } if (termQuery[0].occur == BooleanClause.Occur.MUST) this.booleanScorer = new MustBooleanScorer(similarity, scorer); else if (termQuery[0].occur == BooleanClause.Occur.SHOULD) this.booleanScorer = new ShouldBooleanScorer(similarity, scorer); else this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs()); }
From source file:ro.ranking.technique.bm25.BM25SingleBooleanScorer.java
License:Apache License
/** * Fill scorer array with BM25FTermScorers using the fields, boosts and bParams parameters. * @param reader//from w ww. jav a 2 s . c om * @param termQuery * @param similarity * @param fields * @param boosts * @param bParams * @throws IOException */ public BM25SingleBooleanScorer(IndexReader reader, BooleanTermQuery[] termQuery, Similarity similarity, String[] fields, float[] boosts, float[] bParams) throws IOException { super(similarity); Scorer[] scorer = new Scorer[termQuery.length]; for (int i = 0; i < scorer.length; i++) { scorer[i] = new BM25FTermScorer(reader, termQuery[i].termQuery, fields, boosts, bParams, similarity); } if (termQuery[0].occur == BooleanClause.Occur.MUST) this.booleanScorer = new MustBooleanScorer(similarity, scorer); else if (termQuery[0].occur == BooleanClause.Occur.SHOULD) this.booleanScorer = new ShouldBooleanScorer(similarity, scorer); else this.booleanScorer = new NotBooleanScorer(similarity, scorer, reader.numDocs()); }
From source file:ro.ranking.technique.bm25.BM25TermScorer.java
License:Apache License
public BM25TermScorer(IndexReader reader, TermQuery term, Similarity similarity) throws IOException { super(similarity); this.reader = reader; this.term = term; this.idf = this.getSimilarity().idf(reader.docFreq(term.getTerm()), reader.numDocs()); this.norm = this.reader.norms(this.term.getTerm().field()); this.avgLength = BM25Parameters.getAverageLength(this.term.getTerm().field()); this.b = BM25Parameters.getB(); this.k1 = BM25Parameters.getK1(); this.termDocs = this.reader.termDocs(this.term.getTerm()); }
From source file:searchenginelucene.LuceneSearchEngine.java
public static void getTermFrequencyPairs(String indexLocation) throws IOException { Map<String, Integer> termfrequency = new HashMap<String, Integer>(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); // Temporary location to store the interediate term frequency results PrintWriter writer_tf = new PrintWriter("..\\terf-frequency.csv"); int docnum = reader.numDocs(); // System.out.println("docnum:" + docnum); Fields fields1 = MultiFields.getFields(reader); for (String field : fields1) { Terms terms1 = fields1.terms("contents"); TermsEnum termsEnum = terms1.iterator(null); int noWords = 0; while (termsEnum.next() != null) { noWords++;//from w w w . ja va2 s . co m int count = 0; DocsEnum docsEnum = termsEnum.docs(null, null); int docIdEnum; //System.out.print("The term is->" + termsEnum.term().utf8ToString()); while ((docIdEnum = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count += docsEnum.freq(); } //System.out.println("count:" + count); termfrequency.put(termsEnum.term().utf8ToString(), count); } System.out.println("Total Number of Words:" + noWords); } // ========================================================= // Write the terms anf their frequencies in a file // ========================================================= for (String key : termfrequency.keySet()) { writer_tf.print(key + ","); writer_tf.println(termfrequency.get(key)); } writer_tf.close(); }
From source file:sift.DocIdComparator.java
@Override void loadQueries() throws Exception { int nnId = 0; float nnDist = 0; boolean eval = Boolean.parseBoolean(prop.getProperty("eval", "false")); File indexDir = new File(prop.getProperty("query.index")); IndexReader siftVecReader = DirectoryReader.open(FSDirectory.open(indexDir.toPath())); int numDocs = siftVecReader.numDocs(); int numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions")); for (int i = 0; i < numDocs; i++) { Document d = siftVecReader.document(i); if (eval) { nnId = Integer.parseInt(d.get(QuerySiftVecIndexer.FIELD_NN_ID)); nnDist = Float.parseFloat(d.get(QuerySiftVecIndexer.FIELD_NN_DIST)); }/*from w ww . java 2s. c o m*/ qvecs.add(new QueryVector(d, numDimensions, DocVector.numIntervals, nnId, (float) Math.sqrt(nnDist))); } Collections.sort(qvecs, new DocIdComparator()); }
From source file:spell.SpellIndex.java
License:Apache License
/** * Returns the number of words in the index. *///from ww w . j a v a2 s . c om public int getWordCount() throws IOException { IndexReader reader = getReader(); int result = reader.numDocs(); reader.close(); return result; }
From source file:stroom.search.server.IndexShardSearcherImpl.java
License:Apache License
private IndexReader openWithWriter(final IndexWriter indexWriter) throws IOException { final IndexReader indexReader = DirectoryReader.open(indexWriter, false); // Check the document count in the index matches the DB. We are using // the writer so chances are there is a mismatch. final int actualDocumentCount = indexReader.numDocs(); if (indexShard.getDocumentCount() != actualDocumentCount) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("openWithWriter() - Mismatch document count. Index says " + actualDocumentCount + " DB says " + indexShard.getDocumentCount()); }// w w w. ja va2 s. c o m } return indexReader; }