Example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount.

Prototype

public static void setMaxClauseCount(int maxClauseCount)

Source Link

Document

Set the maximum number of clauses permitted per BooleanQuery.

Usage

From source file:org.geotoolkit.lucene.index.LuceneIndexSearcher.java

License:Open Source License

/**
 * This method proceed a lucene search and returns a list of ID.
 *
 * @param spatialQueryI The lucene query string with spatials filters.
 *
 * @return A List of metadata identifiers.
 * @throws SearchingException//from  w  w  w .  j  av a 2  s  . co m
 */
public Set<String> doSearch(final SpatialQuery spatialQueryI) throws SearchingException {
    org.geotoolkit.lucene.filter.SpatialQuery spatialQuery = (org.geotoolkit.lucene.filter.SpatialQuery) spatialQueryI;
    try {
        final long start = System.currentTimeMillis();
        final Set<String> results = new LinkedHashSet<>();
        spatialQuery.applyRtreeOnFilter(rTree, envelopeOnly);

        //we look for a cached Query
        if (isCacheEnabled && cachedQueries.containsKey(spatialQuery)) {
            final Set<String> cachedResults = cachedQueries.get(spatialQuery);
            LOGGER.log(logLevel, "returning result from cache ({0} matching documents)", results.size());
            return cachedResults;
        }

        int maxRecords = (int) searcher.collectionStatistics("id").maxDoc();
        if (maxRecords == 0) {
            LOGGER.warning("The index seems to be empty.");
            maxRecords = 1;
        }

        final String field = "title";
        String stringQuery = spatialQuery.getQuery();
        final QueryParser parser = new ExtendedQueryParser(field, analyzer, numericFields);
        parser.setDefaultOperator(Operator.AND);

        // remove term:* query
        stringQuery = removeOnlyWildchar(stringQuery);

        // escape '/' character
        stringQuery = stringQuery.replace("/", "\\/");

        // we enable the leading wildcard mode if the first character of the query is a '*'
        if (stringQuery.indexOf(":*") != -1 || stringQuery.indexOf(":?") != -1
                || stringQuery.indexOf(":(*") != -1 || stringQuery.indexOf(":(+*") != -1
                || stringQuery.indexOf(":+*") != -1) {
            parser.setAllowLeadingWildcard(true);
            LOGGER.log(Level.FINER, "Allowing leading wildChar");
            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
        }

        //we set off the mecanism setting all the character to lower case
        // we do that for range queries only for now. TODO see if we need to set it every time
        if (stringQuery.contains(" TO ")) {
            parser.setLowercaseExpandedTerms(false);
        }
        final Query query;
        if (!stringQuery.isEmpty()) {
            query = parser.parse(stringQuery);
        } else {
            query = SIMPLE_QUERY;
        }
        LOGGER.log(Level.FINER, "QueryType:{0}", query.getClass().getName());
        final Filter filter = spatialQuery.getSpatialFilter();
        final LogicalFilterType operator = spatialQuery.getLogicalOperator();
        final Sort sort = spatialQuery.getSort();
        String sorted = "";
        if (sort != null) {
            sorted = "\norder by: " + sort.toString();
        }
        String f = "";
        if (filter != null) {
            f = '\n' + filter.toString();
        }
        String operatorValue = "";
        if (!(operator == LogicalFilterType.AND || (operator == LogicalFilterType.OR && filter == null))) {
            operatorValue = '\n' + SerialChainFilter.valueOf(operator);
        }
        LOGGER.log(logLevel, "Searching for: " + query.toString(field) + operatorValue + f + sorted
                + "\nmax records: " + maxRecords);

        // simple query with an AND
        if (operator == LogicalFilterType.AND || (operator == LogicalFilterType.OR && filter == null)) {
            final TopDocs docs;
            if (sort != null) {
                docs = searcher.search(query, filter, maxRecords, sort);
            } else {
                docs = searcher.search(query, filter, maxRecords);
            }
            for (ScoreDoc doc : docs.scoreDocs) {
                addToResult(results, doc.doc);
            }

            // for a OR we need to perform many request
        } else if (operator == LogicalFilterType.OR) {
            final TopDocs hits1;
            final TopDocs hits2;
            if (sort != null) {
                hits1 = searcher.search(query, null, maxRecords, sort);
                hits2 = searcher.search(SIMPLE_QUERY, spatialQuery.getSpatialFilter(), maxRecords, sort);
            } else {
                hits1 = searcher.search(query, maxRecords);
                hits2 = searcher.search(SIMPLE_QUERY, spatialQuery.getSpatialFilter(), maxRecords);
            }
            for (ScoreDoc doc : hits1.scoreDocs) {
                addToResult(results, doc.doc);
            }
            for (ScoreDoc doc : hits2.scoreDocs) {
                addToResult(results, doc.doc);
            }

            // for a NOT we need to perform many request
        } else if (operator == LogicalFilterType.NOT) {
            final TopDocs hits1;
            if (sort != null) {
                hits1 = searcher.search(query, filter, maxRecords, sort);
            } else {
                hits1 = searcher.search(query, filter, maxRecords);
            }
            final Set<String> unWanteds = new LinkedHashSet<>();
            for (ScoreDoc doc : hits1.scoreDocs) {
                addToResult(unWanteds, doc.doc);
            }

            final TopDocs hits2;
            if (sort != null) {
                hits2 = searcher.search(SIMPLE_QUERY, null, maxRecords, sort);
            } else {
                hits2 = searcher.search(SIMPLE_QUERY, maxRecords);
            }
            for (ScoreDoc doc : hits2.scoreDocs) {
                final String id = identifiers.get(doc.doc);
                if (id != null && !unWanteds.contains(id)) {
                    results.add(id);
                }
            }

        } else {
            throw new IllegalArgumentException("unsupported logical Operator");
        }

        // if we have some subQueries we execute it separely and merge the result
        if (spatialQuery.getSubQueries().size() > 0) {

            if (operator == LogicalFilterType.OR && query.equals(SIMPLE_QUERY)) {
                results.clear();
            }

            for (SpatialQuery sub : spatialQuery.getSubQueries()) {
                final Set<String> subResults = doSearch(sub);
                if (operator == LogicalFilterType.AND) {
                    final Set<String> toRemove = new HashSet<>();
                    for (String r : results) {
                        if (!subResults.contains(r)) {
                            toRemove.add(r);
                        }
                    }
                    results.removeAll(toRemove);
                } else if (operator == LogicalFilterType.OR) {
                    results.addAll(subResults);

                } else {
                    LOGGER.warning("unimplemented case in doSearch");
                }
            }
        }

        //we put the query in cache
        putInCache(spatialQuery, results);

        LOGGER.log(logLevel,
                results.size() + " total matching documents (" + (System.currentTimeMillis() - start) + "ms)");
        return results;
    } catch (ParseException ex) {
        throw new SearchingException("Parse Exception while performing lucene request", ex);
    } catch (IOException ex) {
        throw new SearchingException("IO Exception while performing lucene request", ex);
    }
}

From source file:org.getopt.luke.Luke.java

License:Apache License

/**
 * Create a Query instance that corresponds to values selected in the UI,
 * such as analyzer class name and arguments, and default field.
 * @return//  w w  w  . j a v a  2s  . c  o m
 */
public Query createQuery(String queryString) throws Exception {
    Object srchOpts = find("srchOptTabs");
    Analyzer analyzer = createAnalyzer(srchOpts);
    if (analyzer == null) {
        return null;
    }
    String defField = getDefaultField(srchOpts);
    QueryParser qp = new QueryParser(LV, defField, analyzer);
    Object ckXmlParser = find(srchOpts, "ckXmlParser");
    Object ckWild = find(srchOpts, "ckWild");
    Object ckPosIncr = find(srchOpts, "ckPosIncr");
    Object ckLoExp = find(srchOpts, "ckLoExp");
    Object cbDateRes = find(srchOpts, "cbDateRes");
    DateTools.Resolution resolution = Util.getResolution(getString(cbDateRes, "text"));
    Object cbOp = find(srchOpts, "cbOp");
    Object bqMaxCount = find(srchOpts, "bqMaxCount");
    int maxCount = 1024;
    try {
        maxCount = Integer.parseInt(getString(bqMaxCount, "text"));
    } catch (Exception e) {
        e.printStackTrace();
        showStatus("Invalid BooleanQuery max clause count, using default 1024");
    }
    QueryParser.Operator op;
    BooleanQuery.setMaxClauseCount(maxCount);
    String opString = getString(cbOp, "text");
    if (opString.equalsIgnoreCase("OR")) {
        op = QueryParser.OR_OPERATOR;
    } else {
        op = QueryParser.AND_OPERATOR;
    }
    qp.setAllowLeadingWildcard(getBoolean(ckWild, "selected"));
    qp.setEnablePositionIncrements(getBoolean(ckPosIncr, "selected"));
    qp.setLowercaseExpandedTerms(getBoolean(ckLoExp, "selected"));
    qp.setDateResolution(resolution);
    qp.setDefaultOperator(op);
    if (getBoolean(ckXmlParser, "selected")) {

        CoreParser cp = createParser(defField, analyzer);
        Query q = cp.parse(new ByteArrayInputStream(queryString.getBytes("UTF-8")));
        return q;
    } else {
        return qp.parse(queryString);
    }
}

From source file:org.hibernate.search.test.perf.SearcherThread.java

License:Open Source License

private Query getQuery() throws ParseException {
    QueryParser qp = new QueryParser(SearchTestCase.getTargetLuceneVersion(), "t",
            SearchTestCase.standardAnalyzer);
    qp.setLowercaseExpandedTerms(true);/*ww w. ja va 2  s . com*/
    // Parse the query
    Query q = qp.parse(queryString);
    if (q instanceof BooleanQuery) {
        BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    }
    return q;
}

From source file:org.hippoecm.repository.query.lucene.AuthorizationQuery.java

License:Apache License

public AuthorizationQuery(final Subject subject, final NamespaceMappings nsMappings,
        final ServicingIndexingConfiguration indexingConfig, final NodeTypeManager ntMgr, final Session session)
        throws RepositoryException {
    // set the max clauses for booleans higher than the default 1024.
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    if (!(session instanceof InternalHippoSession)) {
        throw new RepositoryException("Session is not an instance of o.a.j.core.SessionImpl");
    }// w w w .  ja  v a  2 s  .  co  m

    if (!subject.getPrincipals(SystemPrincipal.class).isEmpty()) {
        this.query = new BooleanQuery(true);
        this.query.add(new MatchAllDocsQuery(), Occur.MUST);
    } else {
        final Set<String> memberships = new HashSet<String>();
        for (GroupPrincipal groupPrincipal : subject.getPrincipals(GroupPrincipal.class)) {
            memberships.add(groupPrincipal.getName());
        }
        final Set<String> userIds = new HashSet<String>();
        for (UserPrincipal userPrincipal : subject.getPrincipals(UserPrincipal.class)) {
            userIds.add(userPrincipal.getName());
        }
        long start = System.currentTimeMillis();
        this.query = initQuery(subject.getPrincipals(FacetAuthPrincipal.class),
                subject.getPrincipals(AuthorizationFilterPrincipal.class), userIds, memberships,
                (InternalHippoSession) session, indexingConfig, nsMappings, ntMgr);
        log.info("Creating authorization query took {} ms. Query: {}",
                String.valueOf(System.currentTimeMillis() - start), query);
    }
}

From source file:org.hupo.psi.mi.psicquic.ws.IndexBasedPsicquicService.java

License:Apache License

public IndexBasedPsicquicService() {
    BooleanQuery.setMaxClauseCount(200 * 1000);
}

From source file:org.index.TermScore.java

public List<SOQuery> constructQueries() throws Exception {
    List<SOQuery> queryList = new LinkedList<SOQuery>();
    boolean allTerms = true;
    BooleanQuery.setMaxClauseCount(20000);

    String queryFile = prop.getProperty("query.tsv.file");
    if (queryFile == null) {
        // the weighted tsv files
        queryFile = prop.getProperty("query.wtsv.file");
        allTerms = false;//from   ww w .ja v a  2s. c  om
    }

    FileReader fr = new FileReader(queryFile);
    BufferedReader br = new BufferedReader(fr);
    String line;
    List<String> terms;

    while ((line = br.readLine()) != null) {
        String[] tokens = line.split("\t");
        String tags = tokens[1];
        String[] tagTerms = tokens[1].split("\\,");
        // the first token is the question id, the second is a list
        // of labels...
        if (allTerms)
            terms = getBagOfWords(tokens[2]);
        else
            terms = getSelectedTerms(tokens[2]);

        Query q = construct(terms, tagTerms);
        queryList.add(new SOQuery(Integer.parseInt(tokens[0]), q));
    }
    return queryList;
}

From source file:org.intermine.web.autocompletion.LuceneSearchEngine.java

License:GNU General Public License

/**
 * Perform the lucene search./*w w w  .  j av a2s .co m*/
 * @param queryString
 *            the string for what you search in the indexes
 * @param toSearch
 *            the field in which you search
 * @return Hits list of documents (search results)
 * @throws IOException
 *             IOException
 * @throws ParseException
 *             IOException
 */
public TopDocs performSearch(String queryString, String toSearch) throws IOException, ParseException {
    QueryParser parser = new QueryParser(Version.LUCENE_30, toSearch, analyzer);
    BooleanQuery.setMaxClauseCount(4096);

    if (!"".equals(queryString) && !queryString.trim().startsWith("*")) {

        Query query;

        if (queryString.endsWith(" ")) {
            queryString = queryString.substring(0, queryString.length() - 1);
        }

        String[] tmp;
        if (queryString.contains(" ")) {
            tmp = queryString.replaceAll(" +", " ").trim().split(" ");
            queryString = new String();

            for (int i = 0; i < tmp.length; i++) {
                queryString += tmp[i];
                if (i < tmp.length - 1) {
                    queryString += "* AND ";
                }
            }
        }
        query = parser.parse(queryString + "*");

        return indexSearch.search(query, 500); // FIXME: hardcoded maximum
                                               // number of results
    }

    return null;
}

From source file:org.intermine.web.autocompletion.LuceneSearchEngine.java

License:GNU General Public License

/**
 * Perform the search but only return n results.
 * @param queryS/*from   w  w  w .  j ava  2 s .co m*/
 *            the string for what you search in the indexes
 * @param toSearch
 *            the field in which you search
 * @param n
 *            first n results
 * @return array of ScoreDoc[] with n elements
 */
public String[] fastSearch(String queryS, String toSearch, int n) {

    QueryParser parser = new QueryParser(Version.LUCENE_30, toSearch, analyzer);
    BooleanQuery.setMaxClauseCount(4096);
    String status = "true";
    String[] results = null;

    if (!"".equals(queryS) && !queryS.trim().startsWith("*")) {
        Query query = null;
        if (queryS.endsWith(" ")) {
            queryS = queryS.substring(0, queryS.length() - 1);
        }

        String[] tmp;
        if (queryS.contains(" ")) {
            tmp = queryS.replaceAll(" +", " ").trim().split(" ");
            queryS = new String();

            for (int i = 0; i < tmp.length; i++) {
                queryS += tmp[i];
                if (i < tmp.length - 1) {
                    queryS += "* AND ";
                }
            }
        }

        try {
            query = parser.parse(queryS + "*");
            TopDocs topDoc = null;
            try {
                topDoc = indexSearch.search(query, null, n);
                ScoreDoc[] docs = topDoc.scoreDocs;

                results = new String[docs.length + 1];
                for (int i = 1; i < docs.length + 1; i++) {
                    try {
                        results[i] = indexSearch.doc(docs[i - 1].doc).get(toSearch);
                    } catch (IOException e) {
                        status = "No results! Please try again.";
                    }
                }
                results[0] = status;
            } catch (IOException e) {
                status = "Please type in more characters to get results.";
                results = new String[1];
                results[0] = status;
            } catch (Throwable e1) {
                status = "Please type in more characters to get results.";
                results = new String[1];
                results[0] = status;
            }
        } catch (ParseException e) {
            status = "No results! Please try again.";
        }

        return results;
    }

    return null;
}

From source file:org.jetbrains.idea.maven.server.embedder.Maven2ServerIndexerImpl.java

License:Apache License

@Override
public Set<MavenArtifactInfo> search(int indexId, Query query, int maxResult)
        throws MavenServerIndexerException {
    try {//from www .j  av a  2  s.c o m
        IndexingContext index = getIndex(indexId);

        TopDocs docs = null;
        try {
            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
            docs = index.getIndexSearcher().search(query, null, maxResult);
        } catch (BooleanQuery.TooManyClauses ignore) {
            // this exception occurs when too wide wildcard is used on too big data.
        }

        if (docs == null || docs.scoreDocs.length == 0) {
            return Collections.emptySet();
        }

        Set<MavenArtifactInfo> result = new HashSet<MavenArtifactInfo>();

        for (int i = 0; i < docs.scoreDocs.length; i++) {
            int docIndex = docs.scoreDocs[i].doc;
            Document doc = index.getIndexReader().document(docIndex);
            ArtifactInfo a = IndexUtils.constructArtifactInfo(doc, index);
            if (a == null) {
                continue;
            }

            a.repository = getRepositoryPathOrUrl(index);
            result.add(Maven2ModelConverter.convertArtifactInfo(a));
        }
        return result;
    } catch (Exception e) {
        throw new MavenServerIndexerException(wrapException(e));
    }
}

From source file:org.jetbrains.idea.maven.server.Maven3ServerIndexerImpl.java

License:Apache License

@Override
public Set<MavenArtifactInfo> search(int indexId, Query query, int maxResult)
        throws RemoteException, MavenServerIndexerException {
    try {//w w  w.j  ava  2 s . c o  m
        IndexingContext index = getIndex(indexId);

        TopDocs docs = null;
        try {
            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
            docs = index.getIndexSearcher().search(query, null, maxResult);
        } catch (BooleanQuery.TooManyClauses ignore) {
            // this exception occurs when too wide wildcard is used on too big data.
        }

        if (docs == null || docs.scoreDocs.length == 0) {
            return Collections.emptySet();
        }

        Set<MavenArtifactInfo> result = new HashSet<MavenArtifactInfo>();

        for (int i = 0; i < docs.scoreDocs.length; i++) {
            int docIndex = docs.scoreDocs[i].doc;
            Document doc = index.getIndexReader().document(docIndex);
            ArtifactInfo a = IndexUtils.constructArtifactInfo(doc, index);
            if (a == null) {
                continue;
            }

            a.repository = getRepositoryPathOrUrl(index);
            result.add(MavenModelConverter.convertArtifactInfo(a));
        }
        return result;
    } catch (Exception e) {
        throw new MavenServerIndexerException(wrapException(e));
    }
}