Example usage for org.apache.lucene.search BooleanQuery clauses

List of usage examples for org.apache.lucene.search BooleanQuery clauses

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery clauses.

Prototype

List clauses

To view the source code for org.apache.lucene.search BooleanQuery clauses.

Click Source Link

Usage

From source file:alix.lucene.MoreLikeThisQuery.java

License:Apache License

@Override
public Query rewrite(IndexReader reader) throws IOException {
    if (getBoost() != 1f) {
        return super.rewrite(reader);
    }//from   w w  w. ja  v  a 2  s .c  o m
    MoreLikeThis mlt = new MoreLikeThis(reader);

    mlt.setFieldNames(moreLikeFields);
    mlt.setAnalyzer(analyzer);
    mlt.setMinTermFreq(minTermFrequency);
    if (minDocFreq >= 0) {
        mlt.setMinDocFreq(minDocFreq);
    }
    mlt.setMaxQueryTerms(maxQueryTerms);
    mlt.setStopWords(stopWords);
    BooleanQuery bq = (BooleanQuery) mlt.like(fieldName, new StringReader(likeText));
    BooleanQuery.Builder newBq = new BooleanQuery.Builder();
    newBq.setDisableCoord(bq.isCoordDisabled());
    for (BooleanClause clause : bq) {
        newBq.add(clause);
    }
    // make at least half the terms match
    newBq.setMinimumNumberShouldMatch((int) (bq.clauses().size() * percentTermsToMatch));
    return newBq.build();
}

From source file:bbejeck.nosql.lucene.LuceneQueryParsingTest.java

License:Apache License

@Test
public void test_parse_in_listquery() throws Exception {
    String query = "select foo from '/path/index/' where name='Beth' and score in (0, 50, 55)";
    QueryParseResults qc = parseQueryAndFilter(query);
    BooleanClause[] clauses = qc.getBooleanQuery().getClauses();
    assertThat(clauses.length, is(2));//from  w  w w.  j  a v  a  2  s .  co m

    TermQuery termQuery = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery.getTerm().field(), is("name"));
    assertThat(termQuery.getTerm().text(), is("beth"));

    BooleanQuery booleanQuery = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery.clauses().size(), is(3));

    clauses = booleanQuery.getClauses();

    TermQuery termQuery1 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery1.getTerm().field(), is("score"));
    assertThat(termQuery1.getTerm().text(), is("0"));

    TermQuery termQuery2 = (TermQuery) clauses[1].getQuery();
    assertThat(termQuery2.getTerm().field(), is("score"));
    assertThat(termQuery2.getTerm().text(), is("50"));

    TermQuery termQuery3 = (TermQuery) clauses[2].getQuery();
    assertThat(termQuery3.getTerm().field(), is("score"));
    assertThat(termQuery3.getTerm().text(), is("55"));

}

From source file:bbejeck.nosql.lucene.LuceneQueryParsingTest.java

License:Apache License

@Test
public void test_parse_in_term_listquery() throws Exception {
    String query = "select foo from '/path/index/' where name='Beth' and score in ('0', '50', '55')";
    QueryParseResults qc = parseQueryAndFilter(query);
    BooleanClause[] clauses = qc.getBooleanQuery().getClauses();
    assertThat(clauses.length, is(2));//from  w  ww .ja  v  a 2s .c  o m

    TermQuery termQuery = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery.getTerm().field(), is("name"));
    assertThat(termQuery.getTerm().text(), is("beth"));

    BooleanQuery booleanQuery = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery.clauses().size(), is(3));

    clauses = booleanQuery.getClauses();

    TermQuery termQuery1 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery1.getTerm().field(), is("score"));
    assertThat(termQuery1.getTerm().text(), is("0"));

    TermQuery termQuery2 = (TermQuery) clauses[1].getQuery();
    assertThat(termQuery2.getTerm().field(), is("score"));
    assertThat(termQuery2.getTerm().text(), is("50"));

    TermQuery termQuery3 = (TermQuery) clauses[2].getQuery();
    assertThat(termQuery3.getTerm().field(), is("score"));
    assertThat(termQuery3.getTerm().text(), is("55"));

}

From source file:bbejeck.nosql.lucene.LuceneQueryParsingTest.java

License:Apache License

@Test
public void test_parse_nested_query() throws Exception {
    String query = "select foo from 'D:/some/path/' where a='1' and (b='2' and c='3' and d='4')";
    QueryParseResults qc = parseQueryAndFilter(query);
    BooleanClause[] clauses = qc.getBooleanQuery().getClauses();
    assertThat(clauses.length, is(2));// w  w  w .j  a  v  a  2s .c o m

    TermQuery termQuery = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery.getTerm().field(), is("a"));
    assertThat(termQuery.getTerm().text(), is("1"));

    BooleanQuery booleanQuery = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery.clauses().size(), is(3));

    clauses = booleanQuery.getClauses();

    TermQuery termQuery1 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery1.getTerm().field(), is("b"));
    assertThat(termQuery1.getTerm().text(), is("2"));

    TermQuery termQuery2 = (TermQuery) clauses[1].getQuery();
    assertThat(termQuery2.getTerm().field(), is("c"));
    assertThat(termQuery2.getTerm().text(), is("3"));

    TermQuery termQuery3 = (TermQuery) clauses[2].getQuery();
    assertThat(termQuery3.getTerm().field(), is("d"));
    assertThat(termQuery3.getTerm().text(), is("4"));
}

From source file:bbejeck.nosql.lucene.LuceneQueryParsingTest.java

License:Apache License

@Test
public void test_parse_deeper_nested_query() throws Exception {
    String query = "select foo from '/some/path/' where a='1' and (b='2' and (c='3' and (d='4' and e='5')))";
    QueryParseResults qc = parseQueryAndFilter(query);
    //Overall query
    BooleanClause[] clauses = qc.getBooleanQuery().getClauses();
    assertThat(clauses.length, is(2));//from w  w  w .  j  a va  2  s  .  co  m

    TermQuery termQuery = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery.getTerm().field(), is("a"));
    assertThat(termQuery.getTerm().text(), is("1"));

    BooleanQuery booleanQuery = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery.clauses().size(), is(2));

    //First nesting
    clauses = booleanQuery.getClauses();

    TermQuery termQuery1 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery1.getTerm().field(), is("b"));
    assertThat(termQuery1.getTerm().text(), is("2"));

    BooleanQuery booleanQuery1 = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery1.clauses().size(), is(2));

    //Second nesting
    clauses = booleanQuery1.getClauses();

    TermQuery termQuery2 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery2.getTerm().field(), is("c"));
    assertThat(termQuery2.getTerm().text(), is("3"));

    BooleanQuery booleanQuery2 = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery2.clauses().size(), is(2));

    //Third nesting
    clauses = booleanQuery2.getClauses();

    TermQuery termQuery3 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery3.getTerm().field(), is("d"));
    assertThat(termQuery3.getTerm().text(), is("4"));

    TermQuery termQuery4 = (TermQuery) clauses[1].getQuery();
    assertThat(termQuery4.getTerm().field(), is("e"));
    assertThat(termQuery4.getTerm().text(), is("5"));
}

From source file:bbejeck.nosql.lucene.LuceneQueryParsingTest.java

License:Apache License

@Test
public void test_parse_deeper_nested_with_in_term_clause_query() throws Exception {
    String query = "select foo from '/some/path/' where a='1' and (b='2' and (c='3' and (d='4' and e in (5,6,7))))";
    QueryParseResults qc = parseQueryAndFilter(query);
    //Overall query
    BooleanClause[] clauses = qc.getBooleanQuery().getClauses();
    assertThat(clauses.length, is(2));/*from   ww w .j a v a  2  s . c  o  m*/

    TermQuery termQuery = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery.getTerm().field(), is("a"));
    assertThat(termQuery.getTerm().text(), is("1"));

    BooleanQuery booleanQuery = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery.clauses().size(), is(2));

    //First nesting
    clauses = booleanQuery.getClauses();

    TermQuery termQuery1 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery1.getTerm().field(), is("b"));
    assertThat(termQuery1.getTerm().text(), is("2"));

    BooleanQuery booleanQuery1 = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery1.clauses().size(), is(2));

    //Second nesting
    clauses = booleanQuery1.getClauses();

    TermQuery termQuery2 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery2.getTerm().field(), is("c"));
    assertThat(termQuery2.getTerm().text(), is("3"));

    BooleanQuery booleanQuery2 = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery2.clauses().size(), is(2));

    //Third nesting
    clauses = booleanQuery2.getClauses();

    TermQuery termQuery3 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery3.getTerm().field(), is("d"));
    assertThat(termQuery3.getTerm().text(), is("4"));

    BooleanQuery booleanQuery3 = (BooleanQuery) clauses[1].getQuery();
    assertThat(booleanQuery3.clauses().size(), is(3));

    clauses = booleanQuery3.getClauses();

    TermQuery termQuery4 = (TermQuery) clauses[0].getQuery();
    assertThat(termQuery4.getTerm().field(), is("e"));
    assertThat(termQuery4.getTerm().text(), is("5"));

    TermQuery termQuery5 = (TermQuery) clauses[1].getQuery();
    assertThat(termQuery5.getTerm().field(), is("e"));
    assertThat(termQuery5.getTerm().text(), is("6"));

    TermQuery termQuery6 = (TermQuery) clauses[2].getQuery();
    assertThat(termQuery6.getTerm().field(), is("e"));
    assertThat(termQuery6.getTerm().text(), is("7"));
}

From source file:biospectra.classify.Classifier.java

License:Apache License

protected BooleanQuery createQuery(KmerQueryAnalyzer analyzer, String field, String queryText,
        double minShouldMatch, QueryGenerationAlgorithm queryGenerationAlgorithm) {
    BooleanQuery queryClauses = createQueryClauses(analyzer, field, queryText, queryGenerationAlgorithm);
    BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
    booleanQueryBuilder.setDisableCoord(queryClauses.isCoordDisabled());
    booleanQueryBuilder.setMinimumNumberShouldMatch((int) (minShouldMatch * queryClauses.clauses().size()));
    for (BooleanClause clause : queryClauses) {
        booleanQueryBuilder.add(clause);
    }/*from w w w.  j  a va2  s. co m*/
    return booleanQueryBuilder.build();
}

From source file:com.esri.gpt.catalog.lucene.LuceneQueryAdapter.java

License:Apache License

/**
 * Executes a query against a Lucene index.
 * @param discoveryQuery the query to execute
 *//*from w ww . j  a v a2s. com*/
protected void executeQuery(DiscoveryQuery discoveryQuery)
        throws DiscoveryException, ParseException, CorruptIndexException, IOException {

    IndexSearcher searcher = null;
    try {

        // initialize
        searcher = getIndexAdapter().newSearcher();
        this.maxDoc = searcher.maxDoc();
        boolean bExecuteQuery = true;
        boolean bProcessHits = true;
        RequestContext reqContext = this.getIndexAdapter().getRequestContext();
        BooleanQuery rootQuery = new BooleanQuery();
        DiscoveryFilter discoveryFilter = discoveryQuery.getFilter();
        DiscoveryResult discoveryResult = discoveryQuery.getResult();
        Discoverables returnables = discoveryQuery.getReturnables();
        if ((returnables == null) || (returnables.size() == 0) || (discoveryFilter.getMaxRecords() <= 0)) {
            bProcessHits = false;
        }

        // CSW query provider options
        boolean isDublinCoreResponse = true;
        boolean isBriefResponse = false;
        boolean isSummaryResponse = false;
        QueryOptions cswQueryOptions = (QueryOptions) reqContext.getObjectMap()
                .get("com.esri.gpt.server.csw.provider.components.QueryOptions");

        // build the query (if no query was supplied, we'll query everything)
        LogicalClauseAdapter logicalAdapter = new LogicalClauseAdapter(this);
        LogicalClause rootClause = discoveryFilter.getRootClause();
        if ((rootClause == null) || (rootClause.getClauses().size() == 0)) {
            if (discoveryFilter.getMaxRecords() <= QUERYALL_THRESHOLD) {
                LOGGER.finer("No filter was supplied, querying all...");
                logicalAdapter.appendSelectAll(rootQuery);
            } else {
                LOGGER.finer("No filter was supplied, query will not be executed.");
                bExecuteQuery = false;
            }
        } else {
            logicalAdapter.adaptLogicalClause(rootQuery, rootClause);
            if ((rootQuery.clauses() == null) && (rootQuery.clauses().size() > 0)) {
                bExecuteQuery = false;
            }
        }
        if (!bExecuteQuery)
            return;

        // execute the query and process the hits if required

        // set the sort option
        Sort sortOption = null;
        if (bProcessHits && (searcher.maxDoc() > 0)) {
            sortOption = makeSortOption(discoveryQuery);
        }

        // filters
        Filter filter = null;

        // make the access control filter
        MetadataAcl acl = new MetadataAcl(reqContext);
        AuthenticationStatus auth = reqContext.getUser().getAuthenticationStatus();
        boolean bAdmin = auth.getAuthenticatedRoles().hasRole("gptAdministrator");
        if (!bAdmin && !acl.isPolicyUnrestricted()) {
            String[] aclValues = acl.makeUserAcl();
            filter = new AclFilter(Storeables.FIELD_ACL, aclValues);
        }

        // isPartOf filter
        filter = IsPartOfFilter.make(reqContext, filter);

        // make the schema filter
        if (cswQueryOptions != null) {
            String schemaName = Val.chkStr(cswQueryOptions.getSchemaFilter());
            if (schemaName.length() > 0) {
                filter = new SchemaFilter(schemaName, filter);
                isDublinCoreResponse = cswQueryOptions.isDublinCoreResponse();
                if (!isDublinCoreResponse) {
                    String elementSetType = Val.chkStr(cswQueryOptions.getElementSetType());
                    if (elementSetType.equalsIgnoreCase("brief")) {
                        isBriefResponse = true;
                    } else if (elementSetType.equalsIgnoreCase("summary")) {
                        isSummaryResponse = true;
                    }
                }
            }
        }

        // determine the start/end positions
        int startRecord = discoveryFilter.getStartRecord() - 1;
        int maxRecords = discoveryFilter.getMaxRecords();
        if (startRecord < 0)
            startRecord = 0;
        int recordsPerPage = maxRecords;
        if (recordsPerPage <= 0)
            recordsPerPage = 1;
        int hitsToReturn = startRecord + recordsPerPage;
        int nextRecord = 0;
        int numDocs = 0;

        // execute the query 
        LOGGER.finer("Executing Lucene Query:\n" + rootQuery);
        TopDocs topDocs = null;
        if (filter != null) {
            if (sortOption != null) {
                topDocs = searcher.search(rootQuery, filter, hitsToReturn, sortOption);
            } else {
                topDocs = searcher.search(rootQuery, filter, hitsToReturn);
            }
        } else {
            if (sortOption != null) {
                topDocs = searcher.search(rootQuery, filter, hitsToReturn, sortOption);
            } else {
                topDocs = searcher.search(rootQuery, hitsToReturn);
            }
        }

        // determine the hit count
        int totalHits = topDocs.totalHits;
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if ((scoreDocs != null) && (scoreDocs.length) > 0) {
            numDocs = scoreDocs.length;
            if (totalHits > numDocs) {
                nextRecord = numDocs + 1;
            }
        }
        discoveryResult.setNumberOfHits(totalHits);
        LOGGER.finer("Total query hits: " + totalHits);

        if (startRecord > (totalHits - 1))
            bProcessHits = false;
        if (maxRecords <= 0)
            bProcessHits = false;
        int nTotal = numDocs - startRecord;
        if (!bProcessHits)
            return;

        // warn if many records were requested
        if (nTotal >= TOOMANY_WARNING_THRESHOLD) {
            LOGGER.warning("A request to process " + nTotal
                    + " discovery records was recieved and will be exceuted.\n" + discoveryQuery.toString());
        }

        // process the hits, build the results
        LOGGER.finer("Processing " + nTotal + " records from: " + (startRecord + 1) + " to: " + numDocs);
        Storeable storeable;
        DiscoveredRecords records = discoveryResult.getRecords();
        IndexReader reader = searcher.getIndexReader();
        for (int i = startRecord; i < numDocs; i++) {
            ScoreDoc scoreDoc = scoreDocs[i];
            Document document = reader.document(scoreDoc.doc);
            DiscoveredRecord record = new DiscoveredRecord();

            // Dublin Core based responses
            if (isDublinCoreResponse) {
                for (Discoverable target : returnables) {
                    ArrayList<Object> values = new ArrayList<Object>();
                    storeable = (Storeable) target.getStorable();

                    if (storeable instanceof AnyTextProperty) {
                        values = null;

                    } else if (storeable instanceof GeometryProperty) {
                        GeometryProperty geom = (GeometryProperty) storeable;
                        values.add(geom.readEnvelope(document));

                    } else if (target.getMeaning().getMeaningType().equals(PropertyMeaningType.XMLURL)) {
                        String uuid = document.get(Storeables.FIELD_UUID);
                        uuid = URLEncoder.encode(uuid, "UTF-8");
                        values.add("?getxml=" + uuid);

                    } else {
                        DatastoreField retrievalField = storeable.getRetrievalField();
                        Field[] fields = document.getFields(retrievalField.getName());
                        if (fields != null) {
                            for (Field f : fields) {
                                Object value = retrievalField.makeValueToReturn(f.stringValue());
                                values.add(value);
                            }
                        }
                    }

                    if (values != null) {
                        Object[] oValues = null;
                        if (values.size() >= 0)
                            oValues = values.toArray();
                        record.addField(target, oValues);
                    }
                }

                // non Dublin Core based responses
            } else {
                String responseXml = null;
                if (isBriefResponse && (responseXml == null)) {
                    Field field = document.getField(Storeables.FIELD_XML_BRIEF);
                    if (field != null) {
                        responseXml = field.stringValue();
                    }
                } else if (isSummaryResponse && (responseXml == null)) {
                    Field field = document.getField(Storeables.FIELD_XML_SUMMARY);
                    if (field != null) {
                        responseXml = field.stringValue();
                    }
                } else if (responseXml == null) {
                    Field field = document.getField(Storeables.FIELD_XML);
                    if (field != null) {
                        responseXml = field.stringValue();
                    }
                }
                record.setResponseXml(responseXml);
            }

            records.add(record);
        }
        int nPopulated = records.size();
        LOGGER.finer("Populated " + nPopulated + " records.");

    } finally {
        getIndexAdapter().closeSearcher(searcher);
    }
}

From source file:com.hrstc.lucene.queryexpansion.PatentClassCodeBasedQueryExpansion.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )//from  ww  w  .j a  v a  2 s. co m
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //********************************************************************
    //**************** Get the sec of definition codes ******************* 
    //********************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    Query codesQuery = GenerateClassCodesQuery.generateQuery(query.getFullClassCodes());
    //        System.err.println(codesQuery);
    classCodesSearcher.search(codesQuery, collector);
    IndexReader ir = classCodesSearcher.getIndexReader();
    TopDocs hits = classCodesSearcher.search(codesQuery, Math.max(1, collector.getTotalHits())); // Compute PRF set
    //                System.err.println("Found " + hits.totalHits
    //                        + " document(s)  that matched query '"
    //                        + codesQuery + "':");
    //                for (ScoreDoc scoreDoc : hits.scoreDocs) {
    //                    System.out.println("----------");
    //                    Document doc = classCodesSearcher.doc(scoreDoc.doc);
    //                    System.out.println(scoreDoc.score + "\t" + doc.get(PatentDocument.Classification) + "\t" + doc.get(PatentDocument.Title));// + "\t" + doc.get("type") + "\t" + doc.get("num") + "\t" + doc.get("lang"));
    ////            System.out.println(explanation.toString());
    //                }
    //                System.out.println("*************************************");
    Query expandedQuery = null;
    ClassCodeBasedQueryExpansion queryExpansion = new ClassCodeBasedQueryExpansion(hits, ir, parameters,
            Nbr_Terms);
    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            if (query.isFilter()) {
                Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                        new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
                bQueryFields.add(filter, BooleanClause.Occur.MUST);
            }
            if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
                bQueryFields.add(q, BooleanClause.Occur.MUST);
            }

            //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
            if (expandedQuery == null) {
                expandedQuery = queryExpansion.expandQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}

From source file:com.hrstc.lucene.queryexpansion.PatentRocchioQueryExpansion.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )/*  w w w  .  j  a v a 2 s.c  o m*/
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //*****************************************************************
    //**************** Compute the PRF for field (i)******************* 
    //*****************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query.parse(), collector);
    IndexReader ir = searcher.getIndexReader();
    TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set
    //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
    Query expandedQuery = null;
    RocchioQueryExpansion queryExpansion = new RocchioQueryExpansion(hits, ir, parameters, source, Nbr_Docs,
            Nbr_Terms);
    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            //                BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            //                if (query.isFilter()) {
            //                    Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
            //                            new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
            //                    bQueryFields.add(filter, BooleanClause.Occur.MUST);
            //                }
            //                if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
            //                    bQueryFields.add(q, BooleanClause.Occur.MUST);
            //                }                
            if (expandedQuery == null) {
                expandedQuery = queryExpansion.expandQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        TopDocs hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}