Example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount

List of usage examples for org.apache.lucene.search BooleanQuery setMaxClauseCount

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount.

Prototype

public static void setMaxClauseCount(int maxClauseCount) 

Source Link

Document

Set the maximum number of clauses permitted per BooleanQuery.

Usage

From source file:edu.cmu.lti.oaqa.knn4qa.cand_providers.LuceneCandidateProvider.java

License:Apache License

@Override
public CandidateInfo getCandidates(int queryNum, Map<String, String> queryData, int maxQty) throws Exception {

    ArrayList<CandidateEntry> resArr = new ArrayList<CandidateEntry>();

    String queryID = queryData.get(ID_FIELD_NAME);
    if (null == queryID) {
        throw new Exception(
                String.format("Query id (%s) is undefined for query # %d", ID_FIELD_NAME, queryNum));
    }// w  w  w. ja  v  a2  s.c  om

    String text = queryData.get(TEXT_FIELD_NAME);
    if (null == text) {
        throw new Exception(String.format("Query (%s) is undefined for query # %d", TEXT_FIELD_NAME, queryNum));
    }

    String query = text.trim();

    ArrayList<String> toks = new ArrayList<String>();
    for (String s : mSpaceSplit.split(query)) {
        toks.add(s);
    }
    if (2 * toks.size() > BooleanQuery.getMaxClauseCount()) {
        // This a heuristic, but it should work fine in many cases
        BooleanQuery.setMaxClauseCount(2 * toks.size());
    }

    int numFound = 0;

    if (!query.isEmpty()) {
        // QueryParser cannot be shared among threads!
        QueryParser parser = new QueryParser(TEXT_FIELD_NAME, mAnalyzer);
        parser.setDefaultOperator(QueryParser.OR_OPERATOR);

        Query queryParsed = parser.parse(query);

        TopDocs hits = mSearcher.search(queryParsed, maxQty);
        numFound = hits.totalHits;
        ScoreDoc[] scoreDocs = hits.scoreDocs;

        for (ScoreDoc oneHit : scoreDocs) {
            Document doc = mSearcher.doc(oneHit.doc);
            String id = doc.get(ID_FIELD_NAME);
            float score = oneHit.score;

            resArr.add(new CandidateEntry(id, score));
        }
    }

    CandidateEntry[] results = resArr.toArray(new CandidateEntry[resArr.size()]);
    Arrays.sort(results);

    return new CandidateInfo(numFound, results);
}

From source file:edu.cmu.lti.oaqa.knn4qa.cand_providers.TranRecSortByProb.java

License:Apache License

@Override
public CandidateInfo getCandidates(int queryNum, Map<String, String> queryData, int maxQty) throws Exception {
    ArrayList<CandidateEntry> resArr = new ArrayList<CandidateEntry>();

    String queryID = queryData.get(ID_FIELD_NAME);
    if (null == queryID) {
        throw new Exception(
                String.format("Query id (%s) is undefined for query # %d", ID_FIELD_NAME, queryNum));
    }/*ww  w . j  av a2  s .c o m*/

    String text = queryData.get(TEXT_FIELD_NAME);
    if (null == text) {
        throw new Exception(String.format("Query (%s) is undefined for query # %d", TEXT_FIELD_NAME, queryNum));
    }

    String origQuery = text.trim();
    int numFound = 0;

    if (!origQuery.isEmpty()) {
        StringBuffer queryToks = new StringBuffer();
        int tokQty = 0;

        for (String w : origQuery.split("\\s+"))
            if (!w.isEmpty()) {
                tokQty++;
                queryToks.append(w + " ");

                final WordEntry we = mFieldIndex.getWordEntry(w);
                if (we != null) {
                    GizaOneWordTranRecs rec0 = mAnswToQuestTran.getTranProbs(we.mWordId);
                    if (rec0 != null) {
                        TranRecSortByProb rec[] = new TranRecSortByProb[rec0.mDstIds.length];
                        for (int i = 0; i < rec0.mDstIds.length; ++i)
                            rec[i] = new TranRecSortByProb(rec0.mDstIds[i], rec0.mProbs[i]);
                        Arrays.sort(rec);
                        int qty = mTopTranQty;
                        for (int i = 0; i < Math.min(rec.length, qty); ++i) {
                            int dstId = rec[i].mDstWorId;
                            if (dstId != we.mWordId) {
                                queryToks.append(mFieldIndex.getWord(dstId)
                                        + (mUseWeights ? "^" + String.format("%.3f", rec[i].mProb) : "") + " ");
                                ++tokQty;
                            } else {
                                // If we skip a word, b/c it's the same as the query word
                                // we will get one more candidate so that exactly Math.min(mTopTranQty, rec.length)
                                // words were added
                                ++qty;
                            }
                        }
                    }
                }
            }

        synchronized (lock) { // this is a static lock, it will block all instance of this class
            if (tokQty > BooleanQuery.getMaxClauseCount()) {
                BooleanQuery.setMaxClauseCount(tokQty);
            }
        }

        String luceneQuery = queryToks.toString().trim();
        if (!luceneQuery.isEmpty()) {
            // QueryParser cannot be shared among threads!
            QueryParser parser = new QueryParser(TEXT_FIELD_NAME, mAnalyzer);
            parser.setDefaultOperator(QueryParser.OR_OPERATOR);

            Query queryParsed = parser.parse(luceneQuery);

            //System.out.println("The resulting query: " + luceneQuery.toString());

            TopDocs hits = mSearcher.search(queryParsed, maxQty);
            numFound = hits.totalHits;
            ScoreDoc[] scoreDocs = hits.scoreDocs;

            for (ScoreDoc oneHit : scoreDocs) {
                Document doc = mSearcher.doc(oneHit.doc);
                String id = doc.get(ID_FIELD_NAME);
                float score = oneHit.score;

                resArr.add(new CandidateEntry(id, score));
            }
        }
    }

    CandidateEntry[] results = resArr.toArray(new CandidateEntry[resArr.size()]);
    Arrays.sort(results);

    return new CandidateInfo(numFound, results);
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

public List searchVariables(List<Long> studyIds, SearchTerm searchTerm) throws IOException {
    BooleanQuery indexQuery = null;/*  w w  w  . j  ava 2  s.  c  o  m*/
    BooleanQuery searchQuery = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(dvnMaxClauseCount);
    if (studyIds != null) {
        searchQuery.add(orIdSearchTermClause(studyIds, "varStudyId"), BooleanClause.Occur.MUST);
    }
    if (searchTerm.getFieldName().equalsIgnoreCase("variable")) {
        indexQuery = buildVariableQuery(searchTerm);
        if (searchTerm.getOperator().equals("=")) {
            searchQuery.add(indexQuery, BooleanClause.Occur.MUST);
        } else {
            searchQuery.add(indexQuery, BooleanClause.Occur.MUST_NOT);
        }
    }
    List<Document> variableResults = getHits(searchQuery);
    List<Long> variableIdResults = getVariableHitIds(variableResults);
    // TODO: 
    // Double-check if the intersectionVarDocResults() below - i.e., filtering
    // the hit list against the list of supplied study ids - is necessary at all. 
    // I would think not - because the study IDs were already added to the
    // search query, above. -- L.A.
    List<Long> finalResults = studyIds != null ? intersectionVarDocResults(variableResults, studyIds)
            : variableIdResults;
    return finalResults;
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

public List searchVariables(List<Long> studyIds, List<SearchTerm> searchTerms, boolean varIdReturnValues)
        throws IOException {
    BooleanQuery searchQuery = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(dvnMaxClauseCount);
    if (studyIds != null) {
        searchQuery.add(orIdSearchTermClause(studyIds, "varStudyId"), BooleanClause.Occur.MUST);
    }/* w ww. j  ava  2  s.c  o  m*/
    for (Iterator it = searchTerms.iterator(); it.hasNext();) {
        SearchTerm elem = (SearchTerm) it.next();
        BooleanQuery indexQuery = null;
        if (elem.getFieldName().equalsIgnoreCase("variable")) {
            indexQuery = buildVariableQuery(elem);
            if (elem.getOperator().equals("=")) {
                searchQuery.add(indexQuery, BooleanClause.Occur.MUST);
            } else {
                searchQuery.add(indexQuery, BooleanClause.Occur.MUST_NOT);
            }
        }
    }
    List<Long> finalResults = null;
    // TODO: 
    // Double-check if the intersection(Var)DocResults() below - i.e., filtering
    // the hit list against the list of supplied study ids - is necessary at all. 
    // I would think not - because the study IDs were already added to the
    // search query, above. -- L.A.
    if (varIdReturnValues) {
        List<Document> variableResults = getHits(searchQuery);
        List<Long> variableIdResults = getVariableHitIds(variableResults);
        finalResults = studyIds != null ? intersectionVarDocResults(variableResults, studyIds)
                : variableIdResults;
    } else {
        List<Long> studyIdResults = getVariableHitStudyIds(searchQuery); // gets the study ids
        finalResults = studyIds != null ? intersectionResults(studyIdResults, studyIds) : studyIdResults;
    }
    return finalResults;
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

public List searchFileMetadata(List<Long> studyIds, List<SearchTerm> searchTerms, boolean fileIdReturnValues)
        throws IOException {
    BooleanQuery searchQuery = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(dvnMaxClauseCount);
    if (studyIds != null) {
        searchQuery.add(orIdSearchTermClause(studyIds, "id"), BooleanClause.Occur.MUST);
    }//from   w w w.  j  av  a 2  s.co m
    for (Iterator it = searchTerms.iterator(); it.hasNext();) {
        SearchTerm elem = (SearchTerm) it.next();
        BooleanQuery indexQuery = null;
        // Determine if this is a file-level metadata search term:
        if (isFileMetadataField(elem.getFieldName())) {
            indexQuery = buildFileMetadataQuery(elem);
            logger.fine("INDEXER: filemetadata element query (native): " + indexQuery.toString());
            if (elem.getOperator().equals("=")) {
                // We only support "=" on file metadata, for now, anyway. 
                // -- L.A. 
                searchQuery.add(indexQuery, BooleanClause.Occur.MUST);
            } else {
                searchQuery.add(indexQuery, BooleanClause.Occur.MUST_NOT);
            }
        }
    }

    logger.fine("INDEXER: filemetadata combined query (native): " + searchQuery.toString());

    List<Long> finalResults = null;
    // TODO: 
    // Double-check if the intersection(File)DocResults() below - i.e., filtering
    // the hit list against the list of supplied study ids - is necessary at all. 
    // I would think not - because the study IDs were already added to the
    // search query, above. -- L.A.
    if (fileIdReturnValues) {
        List<Document> fileMetadataResults = getHits(searchQuery);
        List<Long> fileMetadataIdResults = getFileMetadataHitIds(fileMetadataResults);
        finalResults = studyIds != null ? intersectionFileDocResults(fileMetadataResults, studyIds)
                : fileMetadataIdResults;
    } else {
        List<Long> studyIdResults = getFileMetadataHitStudyIds(searchQuery); // gets the study ids
        finalResults = studyIds != null ? intersectionResults(studyIdResults, studyIds) : studyIdResults;
    }
    return finalResults;
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

BooleanQuery orPhraseQuery(List<SearchTerm> orSearchTerms) {
    BooleanQuery orTerms = new BooleanQuery();
    orTerms.setMaxClauseCount(dvnMaxClauseCount);
    for (Iterator it = orSearchTerms.iterator(); it.hasNext();) {
        SearchTerm elem = (SearchTerm) it.next();
        String[] phrase = getPhrase(elem.getValue().toLowerCase().trim());
        if (phrase.length > 1) {
            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.setSlop(10);// w  w  w  . j a v  a2 s .  c  o  m

            for (int i = 0; i < phrase.length; i++) {
                phraseQuery.add(new Term(elem.getFieldName(), phrase[i].toLowerCase().trim()));
            }
            orTerms.add(phraseQuery, BooleanClause.Occur.SHOULD);
        } else if (phrase.length == 1) {
            //                Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
            logger.fine("INDEXER: orPhraseQuery: search element value: " + phrase[0].toLowerCase().trim());
            Term t = new Term(elem.getFieldName(), phrase[0].toLowerCase().trim());
            logger.fine("INDEXER: orPhraseQuery: term value=" + t.text());
            TermQuery orQuery = new TermQuery(t);
            logger.fine("INDEXER: TermQuery orQuery (native): " + orQuery.toString());
            orTerms.add(orQuery, BooleanClause.Occur.SHOULD);
        }
    }
    return orTerms;
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

BooleanQuery orPhraseOrWildcardQuery(List<SearchTerm> orSearchTerms) {
    BooleanQuery orTerms = new BooleanQuery();
    orTerms.setMaxClauseCount(dvnMaxClauseCount);
    for (Iterator it = orSearchTerms.iterator(); it.hasNext();) {
        SearchTerm elem = (SearchTerm) it.next();
        String[] phrase = getPhrase(elem.getValue().toLowerCase().trim());
        if (phrase.length > 1) {
            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.setSlop(10);/*from   ww  w .  j  a  v a  2  s  .  c  om*/

            for (int i = 0; i < phrase.length; i++) {
                phraseQuery.add(new Term(elem.getFieldName(), phrase[i].toLowerCase().trim()));
            }
            orTerms.add(phraseQuery, BooleanClause.Occur.SHOULD);
        } else if (phrase.length == 1) {
            //                Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
            logger.fine("INDEXER: wildcardQuery: search element value: " + phrase[0].toLowerCase().trim());
            if (isPrefixSearchableFileMetadataField(elem.getFieldName())) {
                Term t = new Term(elem.getFieldName(), phrase[0].toLowerCase().trim() + "*");
                logger.fine("INDEXER: wildcardQuery: term value=" + t.text());
                WildcardQuery wcQuery = new WildcardQuery(t);
                logger.fine("INDEXER: Term wildcardQuery (native): " + wcQuery.toString());
                orTerms.add(wcQuery, BooleanClause.Occur.SHOULD);
            } else {
                logger.fine("INDEXER: building PhraseQuery: search element value: "
                        + phrase[0].toLowerCase().trim());
                Term t = new Term(elem.getFieldName(), phrase[0].toLowerCase().trim());
                logger.fine("INDEXER: building PhraseQuery: term value=" + t.text());
                TermQuery orQuery = new TermQuery(t);
                logger.fine("INDEXER: TermQuery orQuery (native): " + orQuery.toString());
                orTerms.add(orQuery, BooleanClause.Occur.SHOULD);
            }

        }
    }
    return orTerms;
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

public BooleanQuery andSearchTermClause(List<SearchTerm> andSearchTerms) {
    BooleanQuery andTerms = new BooleanQuery();
    andTerms.setMaxClauseCount(dvnMaxClauseCount);
    Query rQuery = null;/*www . j ava 2  s.  c om*/
    for (Iterator it = andSearchTerms.iterator(); it.hasNext();) {
        SearchTerm elem = (SearchTerm) it.next();
        if (elem.getOperator().equals("<")) {
            Term end = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
            Term begin = null;
            rQuery = new TermRangeQuery(elem.getFieldName(), null, elem.getValue().toLowerCase().trim(), false,
                    false);
            //                rQuery = new RangeQuery(begin,end,true);
            andTerms.add(rQuery, BooleanClause.Occur.MUST);
        } else if (elem.getOperator().equals(">")) {
            Term end = null;
            Term begin = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
            rQuery = new TermRangeQuery(elem.getFieldName(), elem.getValue().toLowerCase().trim(), null, false,
                    false);
            //                rQuery = new RangeQuery(begin,end,true);
            andTerms.add(rQuery, BooleanClause.Occur.MUST);
        } else if (elem.getFieldName().equalsIgnoreCase("any")) {
            andTerms = buildAnyQuery(elem.getValue().toLowerCase().trim());
        } else {
            String[] phrase = getPhrase(elem.getValue().toLowerCase().trim());
            if (phrase.length > 1) {
                PhraseQuery phraseQuery = new PhraseQuery();
                phraseQuery.setSlop(10);
                andTerms.add(partialMatch(elem, 10));
            } else if (phrase.length == 1) {
                //                    Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
                Term t = new Term(elem.getFieldName(), phrase[0].toLowerCase().trim());
                TermQuery andQuery = new TermQuery(t);
                if (elem.getOperator().equals("=")) {
                    andTerms.add(andQuery, BooleanClause.Occur.MUST);
                } else if (elem.getOperator().equalsIgnoreCase("-")) {
                    andTerms.add(andQuery, BooleanClause.Occur.MUST_NOT);
                }
            }
        }

    }
    return andTerms;
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

BooleanQuery andQueryClause(List<BooleanQuery> andQueries) {
    BooleanQuery andTerms = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(dvnMaxClauseCount);
    for (Iterator it = andQueries.iterator(); it.hasNext();) {
        BooleanQuery elem = (BooleanQuery) it.next();
        BooleanClause clause = new BooleanClause(elem, BooleanClause.Occur.MUST);
        andTerms.add(clause);/*from  ww  w  . j  a  v  a  2  s.  c  o m*/
    }
    return andTerms;
}

From source file:edu.isi.pfindr.learn.search.LuceneSearchEngine.java

License:Apache License

public static Map<String, Double> search(String queryString, String descriptionExpandedNotStemmed,
        String descriptionExpandedStemmed) {

    Map<String, Double> searchResultMap = new HashMap<String, Double>();
    //Escape special characters in Lucene
    String originalDefinitionEscaped = LUCENE_PATTERN.matcher(queryString).replaceAll(REPLACEMENT_STRING_ESCAPE)
            .toLowerCase();//from   w  w  w .j av  a  2 s.  c  o  m
    descriptionExpandedNotStemmed = LUCENE_PATTERN.matcher(descriptionExpandedNotStemmed)
            .replaceAll(REPLACEMENT_STRING_ESCAPE).toLowerCase();
    descriptionExpandedStemmed = LUCENE_PATTERN.matcher(descriptionExpandedStemmed)
            .replaceAll(REPLACEMENT_STRING_ESCAPE).toLowerCase();

    try {
        String originalDefinitionStemmedQuery = CleanDataUtil
                .preprocessStemAndTokenize(queryString.toLowerCase()).trim();
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
        Query query;

        //Get the top hits
        TopScoreDocCollector collector = TopScoreDocCollector.create(100000, true);
        indexSearcher = getIndexSearcher();

        BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
        //+(+contents:hello +contents:world) +priority:high
        //"jakarta apache"^4 "Apache Lucene"

        ////////////
        if (!originalDefinitionStemmedQuery.equals("")) {
            originalDefinitionStemmedQuery = LUCENE_PATTERN.matcher(originalDefinitionStemmedQuery)
                    .replaceAll(REPLACEMENT_STRING_ESCAPE);
            String[] fields = new String[] { "content", "contentStemmed", "contentExpanded",
                    "contentExpandedStemmed" };
            String[] queries = new String[] {
                    "\"" + originalDefinitionEscaped.trim().toLowerCase() + "\"^8 "
                            + originalDefinitionEscaped.trim().toLowerCase(),
                    originalDefinitionStemmedQuery + "^3", descriptionExpandedNotStemmed,
                    descriptionExpandedStemmed };
            query = MultiFieldQueryParser.parse(Version.LUCENE_30, queries, fields, analyzer);

        } else {
            QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);

            query = queryParser.parse("\"" + originalDefinitionEscaped.trim().toLowerCase() + "\"^8 "
                    + originalDefinitionEscaped.trim().toLowerCase());
        }
        ////////

        indexSearcher.search(query, collector);
        ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
        int hitCount = collector.getTotalHits();
        if (hitCount > 0) {
            //System.out.println("Hits for \"" +  queryString + "\" were found by:");
            // Iterate over the Documents in the Hits object
            ScoreDoc scoreDoc;
            for (int i = 0; i < hitCount; i++) {

                scoreDoc = scoreDocs[i];
                //System.out.println("docId: " + scoreDoc.doc + "\t" + "docScore: " + scoreDoc.score);
                Document doc = indexSearcher.doc(scoreDoc.doc);
                //System.out.println("  " + (i + 1) + ". " + doc.get("id"));
                //System.out.println("Content: " + doc.get("orgContent"));   
                if (!searchResultMap.containsKey((String) doc.get("orgContent")))
                    searchResultMap.put(((String) doc.get("orgContent")), new Double(scoreDoc.score));
            }
        }
        analyzer = null;
    } catch (org.apache.lucene.queryParser.ParseException pe) {
        // TODO Auto-generated catch block
        pe.printStackTrace();
    } catch (IOException ioe) {
        // TODO Auto-generated catch block
        ioe.printStackTrace();
    } finally {
        /*try{
           closeIndexSearcher();
        }catch(IOException ioe){
           ioe.printStackTrace();
        }*/
    }
    return searchResultMap;
}