Example usage for org.apache.lucene.search TotalHitCountCollector TotalHitCountCollector

List of usage examples for org.apache.lucene.search TotalHitCountCollector TotalHitCountCollector

Introduction

In this page you can find the example usage for org.apache.lucene.search TotalHitCountCollector TotalHitCountCollector.

Prototype

TotalHitCountCollector

Source Link

Usage

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * count the number of documents in the index having at least a value for the 'class' field
 *
 * @return the no. of documents having a value for the 'class' field
 * @throws IOException if accessing to term vectors or search fails
 *//*w ww  . j  av  a2s.  co  m*/
protected int countDocsWithClass() throws IOException {
    int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
    if (docCount == -1) { // in case codec doesn't support getDocCount
        TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
        BooleanQuery.Builder q = new BooleanQuery.Builder();
        q.add(new BooleanClause(
                new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))),
                BooleanClause.Occur.MUST));
        if (query != null) {
            q.add(query, BooleanClause.Occur.MUST);
        }
        indexSearcher.search(q.build(), classQueryCountCollector);
        docCount = classQueryCountCollector.getTotalHits();
    }
    return docCount;
}

From source file:SimpleNaiveBayesClassifier.java

License:Apache License

/**
 * Returns the number of documents of the input class ( from the whole index or from a subset)
 * that contains the word ( in a specific field or in all the fields if no one selected)
 * @param word the token produced by the analyzer
 * @param term the term representing the class
 * @return the number of documents of the input class
 * @throws IOException if a low level I/O problem happens
 *//*from   w ww .  j  av  a 2s  .co m*/
private int getWordFreqForClass(String word, Term term) throws IOException {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
    for (String textFieldName : textFieldNames) {
        subQuery.add(
                new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
    }
    booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
    booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
    if (query != null) {
        booleanQuery.add(query, BooleanClause.Occur.MUST);
    }
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
    return totalHitCountCollector.getTotalHits();
}

From source file:IndexAndSearchOpenStreetMaps1D.java

License:Apache License

private static void queryIndex() throws IOException {
    Directory dir = FSDirectory.open(Paths.get("/l/tmp/1dkd" + (USE_NF ? "_nf" : "")));
    System.out.println("DIR: " + dir);
    IndexReader r = DirectoryReader.open(dir);
    System.out.println("maxDoc=" + r.maxDoc());

    IndexSearcher s = new IndexSearcher(r);

    //System.out.println("reader MB heap=" + (reader.ramBytesUsed()/1024/1024.));

    // London, UK:
    int STEPS = 5;
    double MIN_LAT = 51.0919106;
    double MAX_LAT = 51.6542719;
    double MIN_LON = -0.3867282;
    double MAX_LON = 0.8492337;
    byte[] scratch1 = new byte[4];
    byte[] scratch2 = new byte[4];
    for (int iter = 0; iter < 100; iter++) {
        long tStart = System.nanoTime();
        long totHits = 0;
        int queryCount = 0;
        for (int latStep = 0; latStep < STEPS; latStep++) {
            double lat = MIN_LAT + latStep * (MAX_LAT - MIN_LAT) / STEPS;
            for (int lonStep = 0; lonStep < STEPS; lonStep++) {
                double lon = MIN_LON + lonStep * (MAX_LON - MIN_LON) / STEPS;
                for (int latStepEnd = latStep + 1; latStepEnd <= STEPS; latStepEnd++) {
                    double latEnd = MIN_LAT + latStepEnd * (MAX_LAT - MIN_LAT) / STEPS;
                    for (int lonStepEnd = lonStep + 1; lonStepEnd <= STEPS; lonStepEnd++) {
                        double lonEnd = MIN_LON + lonStepEnd * (MAX_LON - MIN_LON) / STEPS;

                        Query q;/*  w  ww .  j a  v  a 2  s . c om*/
                        if (USE_NF) {
                            q = LegacyNumericRangeQuery.newIntRange("latnum", (int) (1000000. * lat),
                                    (int) (1000000. * latEnd), true, true);
                        } else {
                            q = IntPoint.newRangeQuery("lat", (int) (1000000. * lat),
                                    (int) (1000000. * latEnd));
                        }

                        TotalHitCountCollector c = new TotalHitCountCollector();
                        //long t0 = System.nanoTime();
                        s.search(q, c);

                        //System.out.println("\nITER: now query lat=" + lat + " latEnd=" + latEnd + " lon=" + lon + " lonEnd=" + lonEnd);
                        //Bits hits = reader.intersect(lat, latEnd, lon, lonEnd);
                        //System.out.println("  total hits: " + hitCount);
                        //totHits += ((FixedBitSet) hits).cardinality();
                        //System.out.println("  add tot " + c.getTotalHits());
                        totHits += c.getTotalHits();
                        queryCount++;
                    }
                }
            }
        }

        long tEnd = System.nanoTime();
        System.out.println("ITER: " + iter + " " + ((tEnd - tStart) / 1000000000.0) + " sec; totHits=" + totHits
                + "; " + queryCount + " queries");

        if (iter == 0) {
            long bytes = 0;
            for (LeafReaderContext ctx : r.leaves()) {
                CodecReader cr = (CodecReader) ctx.reader();
                System.out.println(Accountables.toString(cr));
                bytes += cr.ramBytesUsed();
            }
            System.out.println("READER MB: " + (bytes / 1024. / 1024.));
            System.out.println("RAM: " + Accountables.toString((Accountable) r.leaves().get(0).reader()));
        }
    }

    IOUtils.close(r, dir);
}

From source file:SimpleNaiveBayesDocumentClassifier.java

License:Apache License

/**
 * Returns the number of documents of the input class ( from the whole index or from a subset)
 * that contains the word ( in a specific field or in all the fields if no one selected)
 *
 * @param word      the token produced by the analyzer
 * @param fieldName the field the word is coming from
 * @param term      the class term/*from ww w. j  a v  a 2  s.  co  m*/
 * @return number of documents of the input class
 * @throws java.io.IOException If there is a low-level I/O error
 */
private int getWordFreqForClass(String word, String fieldName, Term term) throws IOException {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
    subQuery.add(new BooleanClause(new TermQuery(new Term(fieldName, word)), BooleanClause.Occur.SHOULD));
    booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
    booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
    if (query != null) {
        booleanQuery.add(query, BooleanClause.Occur.MUST);
    }
    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
    return totalHitCountCollector.getTotalHits();
}

From source file:com.b2international.index.lucene.SearchWarmerFactory.java

License:Apache License

@Override
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException {
    IndexSearcher searcher = super.newSearcher(reader, previousReader);

    // TODO: experiment with different queries (MatchAllDocs, a set of "typical" queries, etc.)
    final BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(new TermQuery(new Term(EMPTY_STRING, EMPTY_STRING)), Occur.MUST);
    searcher.search(query.build(), new TotalHitCountCollector());

    return searcher;
}

From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java

License:Apache License

public void invalidateClassificationRuns() throws IOException {

    final Query statusQuery = Fields.newQuery().field(FIELD_STATUS, ClassificationStatus.COMPLETED.name())
            .field(FIELD_STATUS, ClassificationStatus.RUNNING.name())
            .field(FIELD_STATUS, ClassificationStatus.SAVING_IN_PROGRESS.name())
            .field(FIELD_STATUS, ClassificationStatus.SCHEDULED.name()).matchAny();

    final Query query = Fields.newQuery().field(FIELD_CLASS, ClassificationRun.class.getSimpleName())
            .and(statusQuery).matchAll();

    IndexSearcher searcher = null;/*from w ww . j  a v a 2 s  .com*/

    try {

        searcher = manager.acquire();

        final TotalHitCountCollector collector = new TotalHitCountCollector();
        searcher.search(query, collector);
        final int totalHits = collector.getTotalHits();

        final int docsToRetrieve = Ints.min(searcher.getIndexReader().maxDoc(), totalHits);
        if (docsToRetrieve < 1) {
            return;
        }

        final TopDocs docs = searcher.search(query, docsToRetrieve, Sort.INDEXORDER, false, false);
        final ScoreDoc[] scoreDocs = docs.scoreDocs;

        final ObjectReader reader = objectMapper.reader(ClassificationRun.class);
        for (int i = 0; i < scoreDocs.length; i++) {
            final Document sourceDocument = searcher.doc(scoreDocs[i].doc,
                    ImmutableSet.of(FIELD_BRANCH_PATH, FIELD_SOURCE));

            final String branchPath = sourceDocument.get(FIELD_BRANCH_PATH);
            final String source = sourceDocument.get(FIELD_SOURCE);
            final ClassificationRun run = reader.readValue(source);

            run.setStatus(ClassificationStatus.STALE);

            upsertClassificationRunNoCommit(branchPath, run);
        }

        commit();

    } finally {
        if (null != searcher) {
            manager.release(searcher);
        }
    }
}

From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java

License:Apache License

private <T> List<T> search(final Query query, final Class<? extends T> sourceClass, Sort sort, final int offset,
        final int limit) throws IOException {
    IndexSearcher searcher = null;//from   w  w  w . ja v a2  s. c  o m

    try {

        searcher = manager.acquire();

        final TotalHitCountCollector collector = new TotalHitCountCollector();
        searcher.search(query, collector);
        final int totalHits = collector.getTotalHits();

        final int saturatedSum = Ints.saturatedCast((long) offset + limit);
        final int docsToRetrieve = Ints.min(saturatedSum, searcher.getIndexReader().maxDoc(), totalHits);
        final ImmutableList.Builder<T> resultBuilder = ImmutableList.builder();

        if (docsToRetrieve < 1) {
            return resultBuilder.build();
        }

        final TopDocs docs = searcher.search(query, docsToRetrieve, sort, false, false);
        final ScoreDoc[] scoreDocs = docs.scoreDocs;

        final ObjectReader reader = objectMapper.reader(sourceClass);
        for (int i = offset; i < docsToRetrieve && i < scoreDocs.length; i++) {
            final Document sourceDocument = searcher.doc(scoreDocs[i].doc, ImmutableSet.of(FIELD_SOURCE));
            final String source = sourceDocument.get(FIELD_SOURCE);
            final T deserializedSource = reader.readValue(source);
            resultBuilder.add(deserializedSource);
        }

        return resultBuilder.build();

    } finally {

        if (null != searcher) {
            manager.release(searcher);
        }
    }
}

From source file:com.b2international.snowowl.snomed.api.impl.ClassificationRunIndex.java

License:Apache License

private int getHitCount(final Query query) throws IOException {
    IndexSearcher searcher = null;//from  ww  w . j a  va2s.  co m

    try {

        searcher = manager.acquire();
        final TotalHitCountCollector collector = new TotalHitCountCollector();
        searcher.search(query, collector);
        return collector.getTotalHits();

    } finally {

        if (null != searcher) {
            manager.release(searcher);
        }
    }
}

From source file:com.hrstc.lucene.queryexpansion.PatentClassCodeBasedQueryExpansion.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )//from   w ww. ja v a2  s  .  c o m
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //********************************************************************
    //**************** Get the sec of definition codes ******************* 
    //********************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    Query codesQuery = GenerateClassCodesQuery.generateQuery(query.getFullClassCodes());
    //        System.err.println(codesQuery);
    classCodesSearcher.search(codesQuery, collector);
    IndexReader ir = classCodesSearcher.getIndexReader();
    TopDocs hits = classCodesSearcher.search(codesQuery, Math.max(1, collector.getTotalHits())); // Compute PRF set
    //                System.err.println("Found " + hits.totalHits
    //                        + " document(s)  that matched query '"
    //                        + codesQuery + "':");
    //                for (ScoreDoc scoreDoc : hits.scoreDocs) {
    //                    System.out.println("----------");
    //                    Document doc = classCodesSearcher.doc(scoreDoc.doc);
    //                    System.out.println(scoreDoc.score + "\t" + doc.get(PatentDocument.Classification) + "\t" + doc.get(PatentDocument.Title));// + "\t" + doc.get("type") + "\t" + doc.get("num") + "\t" + doc.get("lang"));
    ////            System.out.println(explanation.toString());
    //                }
    //                System.out.println("*************************************");
    Query expandedQuery = null;
    ClassCodeBasedQueryExpansion queryExpansion = new ClassCodeBasedQueryExpansion(hits, ir, parameters,
            Nbr_Terms);
    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            if (query.isFilter()) {
                Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                        new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
                bQueryFields.add(filter, BooleanClause.Occur.MUST);
            }
            if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
                bQueryFields.add(q, BooleanClause.Occur.MUST);
            }

            //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
            if (expandedQuery == null) {
                expandedQuery = queryExpansion.expandQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}

From source file:com.hrstc.lucene.queryexpansion.PatentRocchioQueryExpansion.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )/*from   w  ww  .  jav a 2  s. c  o  m*/
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //*****************************************************************
    //**************** Compute the PRF for field (i)******************* 
    //*****************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query.parse(), collector);
    IndexReader ir = searcher.getIndexReader();
    TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set
    //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
    Query expandedQuery = null;
    RocchioQueryExpansion queryExpansion = new RocchioQueryExpansion(hits, ir, parameters, source, Nbr_Docs,
            Nbr_Terms);
    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            //                BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            //                if (query.isFilter()) {
            //                    Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
            //                            new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
            //                    bQueryFields.add(filter, BooleanClause.Occur.MUST);
            //                }
            //                if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
            //                    bQueryFields.add(q, BooleanClause.Occur.MUST);
            //                }                
            if (expandedQuery == null) {
                expandedQuery = queryExpansion.expandQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        TopDocs hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}