Example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount.

Prototype

public static void setMaxClauseCount(int maxClauseCount)

Source Link

Document

Set the maximum number of clauses permitted per BooleanQuery.

Usage

From source file:it.unipd.dei.ims.falcon.ranking.SegmentQuery.java

License:Apache License

/**
 * Creates a empty {@link SegmentQuery}. 
 * A {@link SegmentQuery} is a {@link org.apache.lucene.search.BooleanQuery}
 * with a number of {@link org.apache.lucene.search.BooleanClause} that is
 * at most "segmentLength", that is the number of hash in a segment of the query
 *
 * @param segmentLength number of hash in a segment of the query
 *//*from   w  ww  .java2 s  .  c  o  m*/
public SegmentQuery(int segmentLength) {
    // create a Lucene Boolean Query
    super(true);
    // re-set the maximum number of clauses
    // Default is 1024, but in our case the number of hash per segment
    // can be segmentLength
    BooleanQuery.setMaxClauseCount(segmentLength);
}

From source file:lucene.LuceneCandidateProvider.java

License:Apache License

public ResEntry[] getCandidates(int queryNum, String query, int maxQty) throws Exception {
    ArrayList<String> toks = new ArrayList<String>();
    for (String s : mSpaceSplit.split(query)) {
        toks.add(s);/*from  ww w.  j  av  a 2 s. co m*/
    }
    if (2 * toks.size() > BooleanQuery.getMaxClauseCount()) {
        // This a heuristic, but it should work fine in many cases
        BooleanQuery.setMaxClauseCount(2 * toks.size());
    }

    ArrayList<ResEntry> resArr = new ArrayList<ResEntry>();

    Query queryParsed = mParser.parse(query);

    TopDocs hits = mSearcher.search(queryParsed, maxQty);
    ScoreDoc[] scoreDocs = hits.scoreDocs;

    for (ScoreDoc oneHit : scoreDocs) {
        Document doc = mSearcher.doc(oneHit.doc);
        String id = doc.get(UtilConst.FIELD_ID);
        float score = oneHit.score;

        resArr.add(new ResEntry(id, score));
    }

    ResEntry[] results = resArr.toArray(new ResEntry[resArr.size()]);
    Arrays.sort(results);

    return results;
}

From source file:net.hillsdon.reviki.web.dispatching.impl.DispatcherServlet.java

License:Apache License

@Override
public void init(final ServletConfig config) throws ServletException {
    super.init(config);
    // This package cycle is fundamental... I figure we'd fix it by
    // putting the impl class name in the web.xml so this is a reasonable
    // temporary step to get back to zero cycles.
    try {//from www. jav  a2  s  . c o  m
        final ApplicationSession applicationSession = new PicoBuilder().build()
                .addComponent(Class.forName("net.hillsdon.reviki.di.impl.ApplicationSessionImpl"))
                .addComponent(config).addComponent(config.getServletContext())
                .getComponent(ApplicationSession.class);
        applicationSession.start();
        _dispatcher = applicationSession.getDispatcher();

        // Default limit of 1024 clauses was too small in some cases (e.g. when searching for "story").
        // Therefore this limit was increased in r1202, to allow searching for common terms.
        // This limit was tested on search with 2500 results and worked well.
        // Should be more than enough for the internal wikis at the moment.
        BooleanQuery.setMaxClauseCount(4096);
    } catch (ClassNotFoundException e) {
        throw new ServletException("Root session class not found", e);
    }
}

From source file:net.semanticmetadata.lire.impl.searcher.VisualWordsImageSearcher.java

License:Open Source License

public VisualWordsImageSearcher(int numMaxHits, Similarity similarity, String fieldName) {
    this.similarity = similarity;
    this.numMaxHits = numMaxHits;
    this.fieldName = fieldName;
    qp = new QueryParser(LuceneUtils.LUCENE_VERSION, fieldName,
            new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    BooleanQuery.setMaxClauseCount(10000);
}

From source file:net.semanticmetadata.lire.impl.searcher.VisualWordsImageSearcher.java

License:Open Source License

public VisualWordsImageSearcher(int numMaxHits, String fieldName) {
    this.numMaxHits = numMaxHits;
    this.fieldName = fieldName;
    qp = new QueryParser(LuceneUtils.LUCENE_VERSION, fieldName,
            new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    BooleanQuery.setMaxClauseCount(10000);
}

From source file:net.semanticmetadata.lire.searchers.VisualWordsImageSearcher.java

License:Open Source License

public VisualWordsImageSearcher(int numMaxHits, Similarity similarity, String fieldName) {
    this.similarity = similarity;
    this.numMaxHits = numMaxHits;
    this.fieldName = fieldName;
    qp = new QueryParser(fieldName, new WhitespaceAnalyzer());
    BooleanQuery.setMaxClauseCount(10000);
}

From source file:net.semanticmetadata.lire.searchers.VisualWordsImageSearcher.java

License:Open Source License

public VisualWordsImageSearcher(int numMaxHits, String fieldName) {
    this.numMaxHits = numMaxHits;
    this.fieldName = fieldName;
    qp = new QueryParser(fieldName, new WhitespaceAnalyzer());
    BooleanQuery.setMaxClauseCount(10000);
}

From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java

/**
 * Main search method, for internal use.
 * //from ww w . j  a va  2  s  .c o m
 * @param q query string
 * @return a list of highlighted string objects.
 * @throws SearchException
 */
private List<SearchResultItem> internalSearch(String q) throws SearchException {
    IndexSearcher is = null;
    try {
        is = new IndexSearcher(zekrIndexReader.indexReader);

        // analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        // resultTokenStream = new StandardTokenizer(Version.LUCENE_CURRENT, reader);

        QueryParser parser = QueryParserFactory.create(Version.LUCENE_CURRENT, QuranTextIndexer.CONTENTS_FIELD,
                analyzer);

        // allow search terms like "*foo" with leading star
        parser.setAllowLeadingWildcard(true);
        // parser.setFuzzyPrefixLength(10);

        // if this line is not set, highlighter doesn't work in in wildcard queries while query.rewrite() is done.
        // and sorting also doesn't work correctly for wildcard queries.
        parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);

        logger.debug("Parse query.");
        query = parser.parse(q);
        BooleanQuery.setMaxClauseCount(maxClauseCount);

        logger.debug("Rewrite query.");
        query = query.rewrite(zekrIndexReader.indexReader); // required to expand search terms

        logger.debug("Searching for: " + query.toString());
        // Hits hits;
        TopFieldDocs tops = null;
        is.setDefaultFieldSortScoring(true, true);
        if (searchScope != null && searchScope.getScopeItems().size() > 0) {
            String scopeQuery = makeSearchScope();
            logger.debug("Scope is: " + scopeQuery);
            // hits = is.search(query, new QuranRangeFilter(searchScope), sortResultOrder);
            tops = is.search(query, new QuranRangeFilter(searchScope), maxSearchResult, sortResultOrder);

        } else {
            // hits = is.search(query, new QueryWrapperFilter(query), 20, sortResultOrder);
            tops = is.search(query, new QueryWrapperFilter(query), maxSearchResult, sortResultOrder);
        }

        logger.debug("Highlight search result.");
        Highlighter highlighter = new Highlighter(highlightFormatter, new QueryScorer(query));
        // highlighter.setFragmentScorer(new QueryTermScorer(query));

        int total = Math.min(maxSearchResult, tops.totalHits);
        List<SearchResultItem> res = new ArrayList<SearchResultItem>(total);
        for (int i = 0; i < total; i++) {
            ScoreDoc[] sd = tops.scoreDocs;
            Document doc = is.doc(sd[i].doc);
            final String contents = doc.get(QuranTextIndexer.CONTENTS_FIELD);
            final IQuranLocation location = new QuranLocation(doc.get(QuranTextIndexer.LOCATION_FIELD));
            TokenStream tokenStream = analyzer.tokenStream(QuranTextIndexer.CONTENTS_FIELD,
                    new StringReader(contents));

            // String resultStr = highlighter.getBestFragment(tokenStream, contents);
            String resultStr = highlighter.getBestFragments(tokenStream, contents, 100, "...");
            SearchResultItem sri = new SearchResultItem(resultStr, location);
            res.add(sri);
        }
        matchedItemCount = highlightFormatter.getHighlightCount();
        // highlightedTermList = highlightFormatter.getHighlightedTermList();
        return res;
    } catch (Exception e) {
        throw new SearchException(e);
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
            }
        }
    }
}

From source file:nicta.com.au.failureanalysis.optimalquery.OptPatentQuery.java

public Query parse() throws ParseException {
    if (queries.length != fields.length) {
        throw new IllegalArgumentException("queries.length != fields.length");
    }/*from w  w  w  . j ava 2  s . c o m*/
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFields = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    for (int i = 1; i < fields.length; i++) {
        if (queries[i] != null && !queries[i].equals("") && boosts.get(fields[i]) != 0) {
            QueryParser qp;
            if (i == 4 || i == 6) {
                qp = new QueryParser(Version.LUCENE_48, fields[i - 1], new StandardAnalyzer(Version.LUCENE_48));
            } else {
                qp = new QueryParser(Version.LUCENE_48, fields[i], new StandardAnalyzer(Version.LUCENE_48));
            }
            Query q = qp.parse(queries[i]);
            q.setBoost(boosts.get(fields[i]));
            if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
                bQueryFields.add(q, BooleanClause.Occur.SHOULD);
            }
        }
    }
    if (filter) {
        Query q = new QueryParser(Version.LUCENE_48, fields[0], new StandardAnalyzer(Version.LUCENE_48))
                .parse(queries[0]);
        q.setBoost(boosts.get(fields[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFields, BooleanClause.Occur.MUST);
    return bQuery;
}

From source file:nicta.com.au.patent.queryexpansion.PatentMMRQueryExpansion.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )/*w ww.  j av a  2  s . c  o  m*/
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    IndexReader ir = searcher.getIndexReader();
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //*****************************************************************
    //**************** Compute the PRF for field (i)******************* 
    //*****************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query.parse(), collector);
    TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set

    Map<String, Float> parameters = new HashMap<>();
    parameters.put(RocchioQueryExpansion.ROCCHIO_ALPHA_FLD, (float) 1.0);
    parameters.put(RocchioQueryExpansion.ROCCHIO_BETA_FLD, (float) 1.0);
    parameters.put(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD, (float) 1.0);
    parameters.put(RocchioQueryExpansion.DECAY_FLD, (float) 0.0);
    Map<String, Float> rocchioVector = null;
    if (model.equals("tfidf")) {
        RocchioQueryExpansion rocchioQueryExpansion = new RocchioQueryExpansion(hits, ir, parameters, source,
                Nbr_Docs, Nbr_Terms);
        rocchioVector = rocchioQueryExpansion.getRocchioVector(PatentQuery.getFields()[source]);
    }

    //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
    Query expandedQuery = null;
    MMRQueryExpansion qe = new MMRQueryExpansion(hits, ir, rocchioVector, MMRQE_ALPHA, MMRQE_LAMBDA,
            PatentQuery.getFields()[source], Nbr_Docs, Nbr_Terms);

    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            //                BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            //                if (query.isFilter()) {
            //                    Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
            //                            new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
            //                    bQueryFields.add(filter, BooleanClause.Occur.MUST);
            //                }
            //                if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
            //                    bQueryFields.add(q, BooleanClause.Occur.MUST);
            //                }
            if (expandedQuery == null) {
                expandedQuery = qe.expandQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        TopDocs hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}