Example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery setMaxClauseCount.

Prototype

public static void setMaxClauseCount(int maxClauseCount)

Source Link

Document

Set the maximum number of clauses permitted per BooleanQuery.

Usage

From source file:com.sindicetech.siren.search.node.TestNodeNumericRangeQuery32.java

License:Open Source License

private void testRandomTrieAndClassicRangeQuery(final int precisionStep) throws Exception {
    final String field = "field" + precisionStep;
    int totalTermCountT = 0, totalTermCountC = 0, termCountT, termCountC;
    final int num = TestUtil.nextInt(random(), 10, 20);

    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    for (int i = 0; i < num; i++) {
        int lower = (int) (random().nextDouble() * noDocs * distance) + startOffset;
        int upper = (int) (random().nextDouble() * noDocs * distance) + startOffset;
        if (lower > upper) {
            final int a = lower;
            lower = upper;/*from  w  ww.  j a  v  a2 s .c o m*/
            upper = a;
        }
        /*
         * In SIREn, the numeric type and the precision step are prepended to the
         * indexed numeric terms.
         */
        final BytesRef lowerBytes = new BytesRef(NumericType.INT.toString() + precisionStep);
        final BytesRef upperBytes = new BytesRef(NumericType.INT.toString() + precisionStep);
        final BytesRef lBytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
        final BytesRef uBytes = new BytesRef(NumericUtils.BUF_SIZE_INT);
        NumericUtils.intToPrefixCoded(lower, 0, lBytes);
        NumericUtils.intToPrefixCoded(upper, 0, uBytes);
        lowerBytes.append(lBytes);
        upperBytes.append(uBytes);

        // test inclusive range
        MultiNodeTermQuery tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, true, true)
                .getQuery();
        MultiNodeTermQuery cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, true);
        TopDocs tTopDocs = index.searcher.search(dq(tq), 1);
        TopDocs cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
        // test exclusive range
        tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, false, false).getQuery();
        cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, false);
        tTopDocs = index.searcher.search(dq(tq), 1);
        cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
        // test left exclusive range
        tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, false, true).getQuery();
        cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, true);
        tTopDocs = index.searcher.search(dq(tq), 1);
        cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
        // test right exclusive range
        tq = (MultiNodeTermQuery) nmqInt(field, precisionStep, lower, upper, true, false).getQuery();
        cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, false);
        tTopDocs = index.searcher.search(dq(tq), 1);
        cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
    }

    this.checkTermCounts(precisionStep, totalTermCountT, totalTermCountC);
    if (VERBOSE && precisionStep != Integer.MAX_VALUE) {
        System.out.println("Average number of terms during random search on '" + field + "':");
        System.out.println(" Numeric query: " + (((double) totalTermCountT) / (num * 4)));
        System.out.println(" Classical query: " + (((double) totalTermCountC) / (num * 4)));
    }
}

From source file:com.sindicetech.siren.search.node.TestNodeNumericRangeQuery64.java

License:Open Source License

private void testRandomTrieAndClassicRangeQuery(final int precisionStep) throws Exception {
    final String field = "field" + precisionStep;
    int totalTermCountT = 0, totalTermCountC = 0, termCountT, termCountC;
    final int num = TestUtil.nextInt(random(), 10, 20);

    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    for (int i = 0; i < num; i++) {
        long lower = (long) (random().nextDouble() * noDocs * distance) + startOffset;
        long upper = (long) (random().nextDouble() * noDocs * distance) + startOffset;
        if (lower > upper) {
            final long a = lower;
            lower = upper;//from   ww  w . j av a2  s  . c om
            upper = a;
        }
        /*
         * In SIREn, the numeric type and the precision step are prepended to the
         * indexed numeric terms.
         */
        final BytesRef lowerBytes = new BytesRef(NumericType.LONG.toString() + precisionStep);
        final BytesRef upperBytes = new BytesRef(NumericType.LONG.toString() + precisionStep);
        final BytesRef lBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
        final BytesRef uBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG);
        NumericUtils.longToPrefixCoded(lower, 0, lBytes);
        NumericUtils.longToPrefixCoded(upper, 0, uBytes);
        lowerBytes.append(lBytes);
        upperBytes.append(uBytes);

        // test inclusive range
        MultiNodeTermQuery tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, true, true)
                .getQuery();
        MultiNodeTermQuery cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, true);
        TopDocs tTopDocs = index.searcher.search(dq(tq), 1);
        TopDocs cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
        // test exclusive range
        tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, false, false).getQuery();
        cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, false);
        tTopDocs = index.searcher.search(dq(tq), 1);
        cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
        // test left exclusive range
        tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, false, true).getQuery();
        cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, false, true);
        tTopDocs = index.searcher.search(dq(tq), 1);
        cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
        // test right exclusive range
        tq = (MultiNodeTermQuery) nmqLong(field, precisionStep, lower, upper, true, false).getQuery();
        cq = new NodeTermRangeQuery(field, lowerBytes, upperBytes, true, false);
        tTopDocs = index.searcher.search(dq(tq), 1);
        cTopDocs = index.searcher.search(dq(cq), 1);
        assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal",
                cTopDocs.totalHits, tTopDocs.totalHits);
        totalTermCountT += termCountT = this.countTerms(tq);
        totalTermCountC += termCountC = this.countTerms(cq);
        this.checkTermCounts(precisionStep, termCountT, termCountC);
    }

    this.checkTermCounts(precisionStep, totalTermCountT, totalTermCountC);
    if (VERBOSE && precisionStep != Integer.MAX_VALUE) {
        System.out.println("Average number of terms during random search on '" + field + "':");
        System.out.println(" Numeric query: " + (((double) totalTermCountT) / (num * 4)));
        System.out.println(" Classical query: " + (((double) totalTermCountC) / (num * 4)));
    }
}

From source file:com.soebes.supose.core.search.SearchRepository.java

License:Open Source License

public TopDocs getQueryResult(String queryLine) {
    IndexReader reader = null;//from   w w  w  . j a  va2 s .  com
    TopDocs result = null;
    try {

        reader = IndexReader.open(getIndexDirectory());
        setReader(reader);

        BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
        Searcher searcher = new IndexSearcher(reader);
        setSearcher(searcher);
        SortField[] sf = { new SortField(FieldNames.REVISION.toString()),
                new SortField(FieldNames.FILENAME.toString()), // We use for
                                                                                                                                 // sorting
                                                                                                                                 // the
                                                                                                                                 // filename
        };
        Sort sort = new Sort(sf);
        // Here we define the default field for searching.
        QueryParser parser = new CustomQueryParser(FieldNames.CONTENTS.toString(), getAnalyzer());
        // We will allow using a wildcard at the beginning of the
        // expression.
        parser.setAllowLeadingWildcard(true);
        // The search term will not be expanded to lowercase.
        parser.setLowercaseExpandedTerms(true);
        Query query = parser.parse(queryLine);
        LOGGER.info("Query: " + query.toString());
        // That's not the best idea...but currently i have not better
        // solution for this...
        // This is intended to get all results not only a limited number
        // results.
        TopDocs tmp = searcher.search(query, null, 20, sort);
        result = searcher.search(query, null, tmp.totalHits, sort);
    } catch (CorruptIndexException e) {
        LOGGER.error("Error: The index is corrupted: ", e);
    } catch (IOException e) {
        LOGGER.error("Error: IOException: ", e);
    } catch (Exception e) {
        LOGGER.error("Error: Something has gone wrong: ", e);
    }
    return result;
}

From source file:com.yahoo.bard.webservice.data.config.ConfigurationLoader.java

License:Apache License

/**
 * Constructor.//ww w.ja  va 2 s.co  m
 *
 * @param dimensionLoader  DimensionLoader to load dimensions from
 * @param metricLoader  MetricLoader to load metrics from
 * @param tableLoader  TableLoader to load tables from
 */
@Inject
public ConfigurationLoader(DimensionLoader dimensionLoader, MetricLoader metricLoader,
        TableLoader tableLoader) {
    DateTimeZone.setDefault(DateTimeZone.forID(TIMEZONE));

    // Set the max lucene query clauses as high as it can go
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);

    this.dimensionLoader = dimensionLoader;
    this.metricLoader = metricLoader;
    this.tableLoader = tableLoader;
}

From source file:dcu.com.ie.patent.queryreduction.PatentMagdyQueryReduction.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )//  www.j av  a  2s . co  m
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    IndexReader ir = searcher.getIndexReader();
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //*****************************************************************
    //**************** Compute the PRF for field (i)******************* 
    //*****************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query.parse(), collector);
    TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set

    //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
    Query expandedQuery = null;
    MagdyQueryReduction qe = new MagdyQueryReduction(hits, ir, PatentQuery.getFields()[source], Nbr_Docs,
            Nbr_Terms);

    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            //                BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            //                if (query.isFilter()) {
            //                    Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
            //                            new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
            //                    bQueryFields.add(filter, BooleanClause.Occur.MUST);
            //                }
            //                if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
            //                    bQueryFields.add(q, BooleanClause.Occur.MUST);
            //                }
            if (expandedQuery == null) {
                expandedQuery = qe.reduceQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        TopDocs hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}

From source file:dcu.com.ie.synset.PatentSynSetQueryExpansion.java

@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    Query expandedQuery = null;//from  w  w  w. ja v a 2 s  . c  o m
    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            Query q = qp.parse(query.getQueries()[i]);
            if (expandedQuery == null) {
                BooleanQuery bq;
                if (q instanceof BooleanQuery) {
                    bq = ((BooleanQuery) q).clone();
                } else {
                    bq = new BooleanQuery();
                    bq.add(q, BooleanClause.Occur.SHOULD);
                }

                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    bq2.add(tq, BooleanClause.Occur.SHOULD);
                    //                        System.err.println(tq.getTerm().text());
                    List<Map.Entry<String, Double>> l = synset.getSynSeyList(tq.getTerm().text(), Nbr_Terms);
                    for (Map.Entry<String, Double> e : l) {
                        //                            System.err.println("\t" + e.getKey() + " -> " + e.getValue());
                        Term term = new Term(PatentQuery.getFields()[i], e.getKey());
                        TermQuery tq2 = new TermQuery(term);
                        float boost = tq.getBoost();
                        if (weigth) {
                            boost *= e.getValue().floatValue();
                        }
                        tq2.setBoost(boost);
                        bq2.add(tq2, BooleanClause.Occur.SHOULD);
                    }
                }
                expandedQuery = bq2;
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);

        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        TopDocs hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}

From source file:de.ilias.services.lucene.search.RPCSearchHandler.java

License:Open Source License

/**
 * Multi field searcher/*from w w  w .  j  av a  2s.  c  o m*/
 * Searches in all defined fields.
 * @todo allow configuration of searchable fields.
 * 
 * 
 * @param clientKey
 * @param query
 */
public String search(String clientKey, String queryString, int pageNumber) {

    LuceneSettings luceneSettings;
    LocalSettings.setClientKey(clientKey);
    IndexSearcher searcher;
    FieldInfo fieldInfo;
    String rewrittenQuery;

    logger.info("Query is: " + queryString);

    try {

        long start = new java.util.Date().getTime();

        fieldInfo = FieldInfo.getInstance(LocalSettings.getClientKey());
        luceneSettings = LuceneSettings.getInstance(LocalSettings.getClientKey());

        // Append doctype
        searcher = SearchHolder.getInstance().getSearcher();

        // Rewrite query
        QueryRewriter rewriter = new QueryRewriter(QueryRewriter.MODE_SEARCH, queryString);
        rewrittenQuery = rewriter.rewrite();

        Vector<Occur> occurs = new Vector<Occur>();
        for (int i = 0; i < fieldInfo.getFieldSize(); i++) {
            occurs.add(BooleanClause.Occur.SHOULD);
        }

        MultiFieldQueryParser multiParser = new MultiFieldQueryParser(fieldInfo.getFieldsAsStringArray(),
                new StandardAnalyzer());

        if (luceneSettings.getDefaultOperator() == LuceneSettings.OPERATOR_AND) {
            multiParser.setDefaultOperator(Operator.AND);
        } else {
            multiParser.setDefaultOperator(Operator.OR);
        }

        BooleanQuery.setMaxClauseCount(10000);
        BooleanQuery query = (BooleanQuery) multiParser.parse(rewrittenQuery);
        logger.info("Max clauses allowed: " + BooleanQuery.getMaxClauseCount());

        //BooleanQuery query = (BooleanQuery) MultiFieldQueryParser.parse(rewrittenQuery,
        //      fieldInfo.getFieldsAsStringArray(),
        //      occurs.toArray(new Occur[0]),
        //      new StandardAnalyzer());

        for (Object f : fieldInfo.getFields()) {
            logger.info(((String) f).toString());
        }

        TopDocCollector collector = new TopDocCollector(1000);
        long s_start = new java.util.Date().getTime();
        searcher.search(query, collector);
        long s_end = new java.util.Date().getTime();
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        SearchResultWriter writer = new SearchResultWriter(hits);
        writer.setOffset(SearchHolder.SEARCH_LIMIT * (pageNumber - 1));
        writer.write();

        long end = new java.util.Date().getTime();
        logger.info("Total time: " + (end - start));
        logger.info("Query time: " + (s_end - s_start));

        return writer.toXML();
    } catch (ConfigurationException e) {
        logger.error(e);
    } catch (IOException e) {
        logger.warn(e);
    } catch (ParseException e) {
        logger.info(e);
    } catch (Exception e) {

        StringWriter writer = new StringWriter();
        e.printStackTrace(new PrintWriter(writer));
        logger.error(writer.toString());
    }
    return "";
}

From source file:de.innovationgate.wgpublisher.lucene.LuceneManager.java

License:Open Source License

public WGResultSet search(WGDatabase db, List<String> fields, String phrase, Map parameters, WGA wga)
        throws WGQueryException {

    if (wga == null) {
        wga = WGA.get(_core);/*from   w w w .j  a  va2  s. c o  m*/
    }

    // set max clause count for boolean queries
    BooleanQuery.setMaxClauseCount(_booleanQueryMaxClauseCount);

    if (this.isRebuildingIndex()) {
        throw new WGQueryException(phrase, "Lucene search temporary disabled. Rebuilding lucene index ...");
    }

    // Registering problem in that case but not cancelling the query, as this is old, expected behaviour. The query will just return no results.
    if (!_core.getLuceneManager().indexIsEnabled(db.getDbReference())) {
        _core.getProblemRegistry().addProblem(
                Problem.create(new TMLContext.WebTMLOccasion(), new DatabaseScope(db.getDbReference()),
                        "webtmlProblem.luceneIndexExpected", ProblemSeverity.LOW));
    }

    if (phrase == null || phrase.trim().equals("")) {
        return null;
    }

    try {
        BooleanQuery wholeQuery = new BooleanQuery();

        int max = WGACore.DEFAULT_QUERY_MAXRESULTS;
        Integer maxResults = (Integer) parameters.get(WGDatabase.QUERYOPTION_MAXRESULTS);
        if (maxResults != null) {
            if (maxResults == 0 || maxResults == -1) {
                max = Integer.MAX_VALUE;
            } else {
                max = maxResults;
            }
        }

        // handle dboption EXCLUDEDOCUMENT
        WGContent excludeContent = (WGContent) parameters.get(WGDatabase.QUERYOPTION_EXCLUDEDOCUMENT);
        if (excludeContent != null) {
            String uniqueKey = buildUniqueIndexKey(excludeContent.getDatabase().getDbReference(),
                    excludeContent.getDocumentKey());
            wholeQuery.add(new TermQuery(new Term(INDEXFIELD_UNIQUEKEY, uniqueKey)),
                    BooleanClause.Occur.MUST_NOT);
            wholeQuery.add(new TermQuery(new Term(INDEXFIELD_PARENTKEY, uniqueKey)),
                    BooleanClause.Occur.MUST_NOT);
        }

        // list of dbs to search in
        String searchScope = (String) parameters.get(LuceneManager.QUERYOPTION_SEARCHSCOPE);
        List searchDBKeys = new ArrayList();
        if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB)) {
            searchDBKeys.add(db.getDbReference());
        }
        if (searchScope.equals(LuceneManager.SEARCHSCOPE_DOMAIN)) {
            Iterator<WGDatabase> dbs = _core
                    .getDatabasesForDomain((String) db.getAttribute(WGACore.DBATTRIB_DOMAIN)).iterator();
            while (dbs.hasNext()) {
                WGDatabase currentDB = dbs.next();
                if (wga.openDatabase(currentDB)) {
                    searchDBKeys.add(currentDB.getDbReference());
                }
            }
        }
        if (searchScope.equals(LuceneManager.SEARCHSCOPE_WGA)) {
            Iterator dbs = _core.getContentdbs().values().iterator();
            while (dbs.hasNext()) {
                WGDatabase currentDB = (WGDatabase) dbs.next();
                if (wga.openDatabase(currentDB)) {
                    searchDBKeys.add(currentDB.getDbReference());
                }
            }
        }
        if (searchScope.equals(LuceneManager.SEARCHSCOPE_DB_LIST)) {
            String dbListCSV = (String) parameters.get(QUERYOPTION_SEARCHDBKEYS);
            if (dbListCSV == null || dbListCSV.trim().equals("")) {
                throw new WGQueryException(phrase, "Search scope is 'dblist' but no db keys given.");
            } else {
                Iterator dbkeys = WGUtils.deserializeCollection(dbListCSV, ",").iterator();
                while (dbkeys.hasNext()) {
                    String dbkey = (String) dbkeys.next();
                    WGDatabase currentDB = wga.db(dbkey);
                    if (currentDB.isSessionOpen()) {
                        searchDBKeys.add(dbkey.trim().toLowerCase());
                    }
                }
            }
        }

        // Handle language selection;
        List<WGLanguage> languagesPriorityList = null;
        boolean filterLanguages = false;
        if (parameters.containsKey(WGDatabase.QUERYOPTION_LANGUAGES)) {
            List<WGLanguage> langs = (List<WGLanguage>) parameters.get(WGDatabase.QUERYOPTION_LANGUAGES);
            if (langs.size() > 1) {
                BooleanQuery langQuery = new BooleanQuery();
                for (WGLanguage lang : langs) {
                    langQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, lang.getName())),
                            BooleanClause.Occur.SHOULD);
                }
                wholeQuery.add(langQuery, BooleanClause.Occur.MUST);
                languagesPriorityList = langs;
                filterLanguages = true;
            } else if (langs.size() == 1) {
                wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, langs.get(0).getName())),
                        BooleanClause.Occur.MUST);
                languagesPriorityList = Collections.singletonList(langs.get(0));
            }
        } else if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYLANGUAGE)) {
            String language = (String) parameters.get(WGDatabase.QUERYOPTION_ONLYLANGUAGE);
            wholeQuery.add(new TermQuery(new Term(WGContent.META_LANGUAGE, language)),
                    BooleanClause.Occur.MUST);
            languagesPriorityList = Collections.singletonList(db.getLanguage(language));
        }

        if (languagesPriorityList == null) {
            languagesPriorityList = getLanguagesForSearchDBKeys(searchDBKeys);
            ;
        }

        // Handle visibility selection
        if (!parameters.containsKey(WGDatabase.QUERYOPTION_ENHANCE)
                || parameters.get(WGDatabase.QUERYOPTION_ENHANCE).equals(new Boolean(true))) {

            wholeQuery.add(new TermQuery(new Term(WGContent.META_VISIBLE, "true")), BooleanClause.Occur.MUST);

            String role = (String) parameters.get(WGDatabase.QUERYOPTION_ROLE);
            if (role != null) {
                if (!role.equalsIgnoreCase(WGContent.DISPLAYTYPE_NONE)) {
                    wholeQuery.add(new TermQuery(new Term("HIDDENIN" + role.toUpperCase(), "false")),
                            BooleanClause.Occur.MUST);
                }
            }
        }

        if (parameters.containsKey(WGDatabase.QUERYOPTION_ONLYRELEASED)) {
            wholeQuery.add(new TermQuery(new Term(WGContent.META_STATUS, WGContent.STATUS_RELEASE)),
                    BooleanClause.Occur.MUST);
        }

        // build dbQuery (OR combination of all searchDbs indexed by lucene)
        BooleanQuery dbQuery = new BooleanQuery();
        Iterator itSearchDBKeys = searchDBKeys.iterator();
        while (itSearchDBKeys.hasNext()) {
            String currentDBKey = (String) itSearchDBKeys.next();
            if (_indexedDbs.containsKey(currentDBKey)) {
                dbQuery.add(new TermQuery(new Term(INDEXFIELD_DBKEY, currentDBKey)),
                        BooleanClause.Occur.SHOULD);
            }
        }
        wholeQuery.add(dbQuery, BooleanClause.Occur.MUST);

        // Add parsed search phrase.
        // Search in allcontent for each language using the configured analyzer
        // if no analyzer is configured for a language search at least with one
        // default analyzer
        boolean searchWithDefaultAnalyzer = false;

        //if no languages found search at least with DefaultAnalyzer
        if (languagesPriorityList.size() <= 0) {
            searchWithDefaultAnalyzer = true;
        }

        // parse native options
        Sort sort = null;
        String sortFieldName = "";
        Operator defaultOperator = QueryParser.AND_OPERATOR;
        String nativeOptionsStr = (String) parameters.get(WGDatabase.QUERYOPTION_NATIVEOPTIONS);
        boolean includeVirtualContent = false;
        String doctype = DOCTYPE_CONTENT;
        if (nativeOptionsStr != null) {
            Iterator nativeOptions = WGUtils.deserializeCollection(nativeOptionsStr, ",", true).iterator();
            while (nativeOptions.hasNext()) {
                String option = (String) nativeOptions.next();
                if (option.startsWith("sort:")) {
                    sortFieldName = option.substring(5).trim();
                    boolean reverse = false;
                    if (sortFieldName.toLowerCase().endsWith("(asc)")) {
                        sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 5).trim();
                    } else if (sortFieldName.toLowerCase().endsWith("(desc)")) {
                        sortFieldName = sortFieldName.substring(0, sortFieldName.length() - 6).trim();
                        reverse = true;
                    }

                    if (sortFieldName.length() > 0) {
                        char first = sortFieldName.charAt(0);
                        if (first >= 'A' && first <= 'Z') {
                            // meta sort
                            sortFieldName = sortFieldName.toUpperCase();
                        } else {
                            // item sort
                            sortFieldName = sortFieldName.toLowerCase();
                        }
                    }

                    // sort order currently only german
                    sort = new Sort(new SortField(SORTITEM_PREFIX + sortFieldName, Locale.GERMANY, reverse));
                } else if (option.equalsIgnoreCase(NATIVE_QUERYOPTION_INCLUDEVIRTUALCONTENT)) {
                    includeVirtualContent = true;
                } else if (option.startsWith("doctype:")) {
                    doctype = option.substring("doctype:".length()).trim();
                } else if (option.startsWith("operator:")) {
                    String op = option.substring("operator:".length()).trim();
                    if (op.equalsIgnoreCase("or"))
                        defaultOperator = QueryParser.OR_OPERATOR;
                }

            }
        }

        if (!includeVirtualContent) {
            wholeQuery.add(new TermQuery(new Term(INDEXFIELD_ISVIRTUALCONTENT, String.valueOf(true))),
                    BooleanClause.Occur.MUST_NOT);
        }

        // handle doctype option
        // we cannot be sure that all documents in index already contains the field DOCTYPE (introduced with OpenWGA 7.1) therefore we have to perform some excludes
        if (doctype.equals(DOCTYPE_CONTENT)) {
            wholeQuery.add(new TermQuery(new Term(INDEXFIELD_DOCTYPE, DOCTYPE_ATTACHMENT)),
                    BooleanClause.Occur.MUST_NOT);
        } else if (!doctype.equals(DOCTYPE_ALL)) {
            wholeQuery.add(new TermQuery(new Term(INDEXFIELD_DOCTYPE, doctype)), BooleanClause.Occur.MUST);
        }

        //build phrase query                
        BooleanQuery phraseQuery = new BooleanQuery();
        phraseQuery.setBoost(10);
        Iterator languageList = languagesPriorityList.iterator();

        List<String> searchFields = new ArrayList<String>();
        Map<String, Float> searchBoosts = new HashMap<String, Float>();
        for (String field : fields) {
            String[] parts = field.split("\\^");
            searchFields.add(parts[0]);
            if (parts.length == 2) {
                searchBoosts.put(parts[0], Float.parseFloat(parts[1]));
            }
        }
        if (!searchFields.contains("allcontent"))
            searchFields.add("allcontent");
        if (!searchFields.contains("TITLE"))
            searchFields.add("TITLE");
        if (!searchFields.contains("DESCRIPTION"))
            searchFields.add("DESCRIPTION");
        if (!searchFields.contains("KEYWORDS"))
            searchFields.add("KEYWORDS");

        while (languageList.hasNext()) {
            WGLanguage languageItem = (WGLanguage) languageList.next();
            Analyzer analyzer = _core.getAnalyzerForLanguageCode(languageItem.getName().substring(0, 2));
            if (analyzer != null) {
                QueryParser parser = new IndexingRuleBasedQueryParser(searchFields.toArray(new String[0]),
                        analyzer, searchBoosts, _indexedDbs, searchDBKeys, _metaKeywordFields);
                parser.setDefaultOperator(defaultOperator);
                Query query = parser.parse(phrase);
                if (filterLanguages) {
                    BooleanQuery testPhraseAndLangQuery = new BooleanQuery();
                    testPhraseAndLangQuery.add(query, BooleanClause.Occur.MUST);
                    testPhraseAndLangQuery.add(
                            new TermQuery(new Term(WGContent.META_LANGUAGE, languageItem.getName())),
                            BooleanClause.Occur.MUST);
                    phraseQuery.add(testPhraseAndLangQuery, BooleanClause.Occur.SHOULD);
                } else {
                    phraseQuery.add(query, BooleanClause.Occur.SHOULD);
                }
            } else {
                searchWithDefaultAnalyzer = true;
            }
        }

        if (searchWithDefaultAnalyzer) {
            QueryParser parser = new IndexingRuleBasedQueryParser(searchFields.toArray(new String[0]),
                    _core.getDefaultAnalyzer(), searchBoosts, _indexedDbs, searchDBKeys, _metaKeywordFields);
            parser.setDefaultOperator(defaultOperator);
            Query query = parser.parse(phrase);
            phraseQuery.add(query, BooleanClause.Occur.SHOULD);
        }
        //LOG.info(phraseQuery.toString());
        wholeQuery.add(phraseQuery, BooleanClause.Occur.MUST);

        TopDocs hits;
        //register executed query as output parameter
        parameters.put(WGDatabase.QUERYOPTION_RETURNQUERY, wholeQuery.toString());
        // simplify query and register as taginfo
        parameters.put(TAGINFO_SIMPLIFIEDQUERY, rewrite(wholeQuery));

        long timeBefore = System.currentTimeMillis();
        if (sort != null) {
            try {
                hits = search(wholeQuery, max, sort);
            } catch (NullPointerException e) {
                // lucene bug when sorting for non existing fields with Locale
                throw new WGQueryException(wholeQuery.toString(),
                        "Sortfield '" + sortFieldName + "' not indexed.");
            }
        } else {
            try {
                hits = search(wholeQuery, max, null);
            } catch (BooleanQuery.TooManyClauses e) {
                parameters.put(TAGINFO_UNSPECIFICQUERY, new Boolean(true));
                throw new WGQueryException(phrase,
                        "Too many BooleanClauses in query. "
                                + "Please use a more specific query or increase value of "
                                + "'booleanQueryMaxClauseCount' via WGAManager. Current value is '"
                                + this.getBooleanQueryMaxClauseCount() + "'.");
            }
        }

        long timeAfter = System.currentTimeMillis();
        long executionTime = timeAfter - timeBefore;

        LuceneResultSet resultSet;
        if (filterLanguages) {
            resultSet = new LuceneLanguageChoosingResultSet(hits, wga, parameters, wholeQuery, executionTime,
                    languagesPriorityList);
        } else {
            resultSet = new LuceneMultiDBResultSet(hits, wga, parameters, wholeQuery, executionTime);
        }

        // put resultset in per thread list
        List rsList = (List) _resultsetList.get();
        if (rsList == null) {
            rsList = new LinkedList();
            _resultsetList.set(rsList);
        }
        rsList.add(resultSet);

        return resultSet;
    } catch (org.apache.lucene.queryParser.ParseException e) {
        throw new WGQueryException("Unable to parse lucene query", e.getMessage(), e);
    } catch (Exception e) {
        LOG.error("Error executing lucene search: " + e.getClass().getName() + " - " + e.getMessage(), e);
        throw new WGQueryException(phrase, e.getClass().getName() + ": " + e.getMessage(), e);
    }
}

From source file:de.u808.simpleinquest.service.search.SearchManager.java

License:Apache License

public SearchResult search(String searchString) throws ParseException, IOException {
    Hits hits = null;/*w  w  w  .  j a v a2  s.  co m*/
    SearchResult searchResult = new SearchResult();
    searchResult.setSearchString(searchString);
    searchResult.setSearchPerformed(true);
    if (StringUtils.isNotEmpty(searchString)) {
        if (searchCach.containsKey(searchString)) {
            return searchCach.get(searchString);
        } else {
            Element element = globalSearchCache.getCache().get(searchString);
            if (element != null) {
                hits = (Hits) element.getObjectValue();
                searchResult.setHits(hits);
                searchCach.put(searchString, searchResult);
            } else {
                // Query query = new QueryParser(Indexer.CONTENT_FIELD_NAME, new
                // StandardAnalyzer()).parse(searchString);
                // search.setHits(indexSearchBean.getIndexSearcher().search(query));

                //TEST
                if (indexSearchBean.getIndexSearcher() != null) {
                    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);

                    String[] fields = { Indexer.AUTOR_FIELD_NAME, Indexer.CONTENT_FIELD_NAME,
                            Indexer.TITLE_FIELD_NAME };
                    Analyzer analyzer = new StandardAnalyzer();
                    QueryParser qp = new MultiFieldQueryParser(fields, analyzer);
                    qp.setDefaultOperator(QueryParser.Operator.AND);
                    Query query = qp.parse(searchString);
                    hits = indexSearchBean.getIndexSearcher().search(query);
                    searchResult.setHits(hits);
                    searchCach.put(searchString, searchResult);
                    globalSearchCache.getCache().put(new Element(searchString, hits));
                } else {
                    //TODO check lang
                    log.warn("Index dos not exist! Returning null!");
                    //TODO display Info
                }
            }
        }
    }
    return searchResult;
}

From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.CollaborativeIndex.java

License:Apache License

public CollaborativeIndex open() {
    try {/*from   w ww  .j  a v a  2 s .  c o  m*/
        stringSearcher = GetReader.getIndexSearcher(stringIndexName, stringLoad);
        // for setting the max clause count for search query.
        BooleanQuery.setMaxClauseCount(2500);

    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    try {
        infoSearcher = GetReader.getIndexSearcher(infoIndexName, infoLoad);
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return this;
}