Example usage for org.apache.lucene.search BooleanQuery BooleanQuery

List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery BooleanQuery.

Prototype

BooleanQuery

Source Link

Usage

From source file:edu.uic.cs.t_verifier.index.synonym.WordNetSynonymEngine.java

License:Apache License

public String[] getSynonyms(String word) throws IOException {
    // List<String> synList = new ArrayList<String>();
    Set<String> synList = new TreeSet<String>();

    AllDocCollector collector = new AllDocCollector(); // #A

    String stemmedWord = stemmer.stem(word);
    // System.out.print("\n>>>>> " + word + " " + stemmedWord);

    Query originalWordQuery = new TermQuery(new Term("word", word));
    Query stemmedWordQuery = new TermQuery(new Term("word", stemmedWord));
    BooleanQuery query = new BooleanQuery();
    query.add(originalWordQuery, Occur.SHOULD);
    query.add(stemmedWordQuery, Occur.SHOULD);

    searcher.search(query, collector);/* w  w  w .j av  a  2s .c o  m*/

    for (ScoreDoc hit : collector.getHits()) { // #B
        Document doc = searcher.doc(hit.doc);

        String[] values = doc.getValues("syn");

        for (String syn : values) { // #C
            synList.add(syn);
        }
    }

    return synList.toArray(new String[0]);
}

From source file:edu.unika.aifb.graphindex.searcher.entity.EntitySearcher.java

License:Open Source License

public Collection<String> searchConceptWithClause(IndexSearcher searcher, Query clause) {
    Collection<String> result = new HashSet<String>();
    try {/*from  w  ww  .ja  v a  2 s . c  o m*/
        Hits hits = searcher.search(clause);
        /********* add fuzzy query funtion here **************/
        if (hits == null || hits.length() == 0) {
            Set<Term> terms = new HashSet<Term>();
            clause.extractTerms(terms);
            //if clause query is a term query
            if (terms.size() != 0) {
                BooleanQuery query = new BooleanQuery();
                for (Term term : terms) {
                    query.add(new FuzzyQuery(term, 0.8f, 1), Occur.MUST);
                }
                hits = searcher.search(query);
                log.debug(query + " " + hits);
            }
        }
        /************************************************/

        for (int i = 0; i < hits.length(); i++) {
            Document doc = hits.doc(i);
            float score = hits.score(i);
            if (score >= SCHEMA_THRESHOLD) {
                String type = doc.get(Constant.TYPE_FIELD);
                if (type == null) {
                    System.err.println("type is null!");
                    continue;
                }

                if (type.equals(TypeUtil.CONCEPT)) {
                    result.add(doc.get(Constant.URI_FIELD));
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}

From source file:edu.unika.aifb.graphindex.searcher.entity.EntitySearcher.java

License:Open Source License

public Collection<String> searchAttributesWithClause(IndexSearcher searcher, Query clause) {
    Collection<String> result = new HashSet<String>();
    try {//  w w w  . j  av a2 s  .c  o  m
        Hits hits = searcher.search(clause);
        /********* add fuzzy query funtion here **************/
        if (hits == null || hits.length() == 0) {
            Set<Term> terms = new HashSet<Term>();
            clause.extractTerms(terms);
            //if clause query is a term query
            if (terms.size() != 0) {
                BooleanQuery query = new BooleanQuery();
                for (Term term : terms) {
                    query.add(new FuzzyQuery(term, 0.8f, 1), Occur.MUST);
                }
                hits = searcher.search(query);
                log.debug(query + " " + hits);
            }
        }
        /************************************************/

        for (int i = 0; i < hits.length(); i++) {
            Document doc = hits.doc(i);
            float score = hits.score(i);
            if (score >= SCHEMA_THRESHOLD) {
                String type = doc.get(Constant.TYPE_FIELD);
                if (type == null) {
                    System.err.println("type is null!");
                    continue;
                }

                if (type.equals(TypeUtil.ATTRIBUTE)) {
                    result.add(doc.get(Constant.URI_FIELD));
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}

From source file:edu.unika.aifb.graphindex.searcher.entity.EntitySearcher.java

License:Open Source License

private void searchEntitiesByAttributeVauleCompounds(IndexSearcher searcher,
        Map<String, Collection<String>> queries, Map<String, Collection<String>> attributes,
        Collection<String> concepts, Collection<KeywordElement> entities, int cutOff) {
    BooleanQuery entityQuery = new BooleanQuery();
    try {/*from   w  ww  . j  a v a  2s  . co m*/
        StandardAnalyzer analyzer = new StandardAnalyzer();
        for (String keywordForAttribute : queries.keySet()) {
            BooleanQuery attributeQuery = new BooleanQuery();
            Collection<String> attributeElements = attributes.get(keywordForAttribute);
            if (attributeElements != null && attributeElements.size() != 0)
                for (String attribute : attributeElements) {
                    QueryParser parser = new QueryParser(attribute, analyzer);
                    BooleanQuery bq = new BooleanQuery();
                    for (String value : queries.get(keywordForAttribute)) {
                        Query q = parser.parse(value);
                        if (q instanceof BooleanQuery) {
                            BooleanQuery bquery = (BooleanQuery) q;
                            for (BooleanClause clause : bquery.getClauses()) {
                                clause.setOccur(Occur.MUST);
                            }
                        }
                        bq.add(q, BooleanClause.Occur.MUST);
                    }
                    attributeQuery.add(bq, BooleanClause.Occur.SHOULD);
                }
            entityQuery.add(attributeQuery, BooleanClause.Occur.MUST);
        }
        if (concepts != null && concepts.size() != 0) {
            BooleanQuery typeQuery = new BooleanQuery();
            for (String type : concepts) {
                TermQuery tq = new TermQuery(new Term(Constant.CONCEPT_FIELD, type));
                typeQuery.add(tq, BooleanClause.Occur.MUST);
            }
            entityQuery.add(typeQuery, BooleanClause.Occur.MUST);
        }

        searchEntitiesWithClause(searcher, entityQuery, entities, cutOff);
    } catch (ParseException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:edu.unika.aifb.graphindex.searcher.keyword.KeywordQueryParser.java

License:Open Source License

public Query parse(String query, String[] fields) throws ParseException {
    BooleanQuery bQuery = new BooleanQuery();
    for (int i = 0; i < fields.length; i++) {
        QueryParser qp = new QueryParser(fields[i], getAnalyzer());
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query q = qp.parse(query);
        bQuery.add(q, Occur.SHOULD);//from  ww w.  j  av a 2 s.c  om
    }
    return bQuery;
}

From source file:edu.unika.aifb.graphindex.searcher.keyword.KeywordQueryParser.java

License:Open Source License

public Query parse(String query) throws ParseException {
    BooleanQuery bQuery = new BooleanQuery();
    for (int i = 0; i < fields.length; i++) {
        QueryParser qp = new QueryParser(fields[i], getAnalyzer());
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query q = qp.parse(query);
        bQuery.add(q, Occur.SHOULD);/*from   ww  w  . j av a  2  s  . c om*/
    }
    return bQuery;
}

From source file:edu.unika.aifb.graphindex.searcher.keyword.KeywordSearcher.java

License:Open Source License

private void searchEntitiesByAttributesAndValues(SortedSet<KeywordSegment> segements,
        Map<String, Collection<KeywordElement>> attributes,
        Map<KeywordElement, KeywordSegment> entities2Segement,
        Map<KeywordSegment, Collection<KeywordElement>> segments2Entities,
        Map<String, Collection<KeywordElement>> keywords2Entities) throws IOException {

    StandardAnalyzer analyzer = new StandardAnalyzer();

    Set<KeywordSegment> segementsWithResults = new HashSet<KeywordSegment>();
    for (String keywordForAttribute : attributes.keySet()) {
        for (KeywordElement attribute : attributes.get(keywordForAttribute)) {
            QueryParser parser = new QueryParser(Constant.CONTENT_FIELD, analyzer);
            parser.setDefaultOperator(QueryParser.AND_OPERATOR);

            for (KeywordSegment segement : segements) {
                try {
                    BooleanQuery q = new BooleanQuery();
                    Query valueQuery = parser.parse(segement.getQuery());
                    Query attributeQuery = new TermQuery(
                            new Term(Constant.ATTRIBUTE_FIELD, attribute.getUri()));
                    q.add(valueQuery, Occur.MUST);
                    q.add(attributeQuery, Occur.MUST);
                    boolean hasResults = searchEntitiesWithClause(q, segement, keywordForAttribute,
                            entities2Segement, segments2Entities, keywords2Entities);

                    if (hasResults)
                        segementsWithResults.add(segement);
                } catch (ParseException e) {
                    e.printStackTrace();
                    continue;
                }/*from  w  ww  .  ja  v  a 2s . c o m*/
            }
        }
    }
}

From source file:edu.virginia.cs.searcher.PostSearcher.java

/**
 * The main search function./* w ww  .j  ava2s . c o m*/
 *
 * @param searchQuery Set this object's attributes as needed.
 * @return
 */
private SearchResult search(SearchQuery searchQuery) {
    searchQuery.numResults(numOfResults);
    BooleanQuery combinedQuery = new BooleanQuery();
    for (String field : searchQuery.fields()) {
        QueryParser parser = new QueryParser(Version.LUCENE_46, field, analyzer);
        try {
            Query textQuery = parser.parse(parser.escape(searchQuery.queryText()));
            combinedQuery.add(textQuery, BooleanClause.Occur.MUST);
        } catch (ParseException exception) {
            exception.printStackTrace();
        }
    }
    return runSearch(combinedQuery, searchQuery);
}

From source file:engine.easy.search.RelevanceFeedBackUtil.java

License:Apache License

/**
 * Computes a term frequency map for the specific terms at the specified location.
 * Builds a Boolean OR query out of the "most frequent" terms in the index
 * and returns it. "Most Frequent" is defined as the terms whose frequencies
 * are greater than or equal to the topTermCutoff * the frequency of the top
 * term, where the topTermCutoff is number between 0 and 1.
 * /*ww  w.  j a v  a 2 s. com*/
 * @param ramdir the directory where the index is created.
 * @return a Boolean OR query.
 * @throws Exception if one is thrown.
 */
private static Query computeTopTermQuery(List<String> termlist, Map<String, Integer> frequencyMap, int numOf)
        throws Exception {

    // sort the term map by frequency descending
    Collections.sort(termlist, new ReverseComparator(new ByValueComparator<String, Integer>(frequencyMap)));

    // retrieve the top terms based on topTermCutoff
    List<String> topTerms = new ArrayList<String>();
    float topFreq = -1.0F;
    for (String term : termlist) {
        if (topFreq < 0.0F) {
            // first term, capture the value
            topFreq = (float) frequencyMap.get(term);
            topTerms.add(term);
        } else {
            // not the first term, compute the ratio and discard if below
            // topTermCutoff score
            float ratio = (float) ((float) frequencyMap.get(term) / topFreq);
            if (ratio >= AppConstants.TOP_TERM_CUT_OFF_FREQ) {
                topTerms.add(term);
            } else {
                break;
            }
        }
    }
    //
    //      //Top results
    //      List<String> topArray = null;
    //      if (topTerms.size() > numOf) {
    //         topArray = new ArrayList<String>(numOf);
    //
    //         for (int position=0; position < numOf; position++) {
    //            topArray.add(topTerms.get(position));
    //         }
    //      }
    //      else {
    //         topArray = topTerms;
    //      }

    StringBuilder termBuf = new StringBuilder();
    BooleanQuery q = new BooleanQuery();
    for (String topTerm : topTerms) {
        termBuf.append(topTerm).append("(").append(frequencyMap.get(topTerm)).append(");");
        q.add(new TermQuery(new Term("CONTENT", topTerm)), Occur.SHOULD);
    }
    System.out.println(">>> top terms: " + termBuf.toString());
    System.out.println(">>> query: " + q.toString());
    return q;
}

From source file:es.pode.indexador.negocio.servicios.busqueda.SrvBuscadorServiceImpl.java

License:Open Source License

/**
 * @see es.pode.indexador.negocio.servicios.busqueda.SrvBuscadorServiceBase#handleSolicitudDocsCountArbolCurricular(ParamDocsCountVO)
 * //from w w  w  .  j  a v  a 2  s  .c  o  m
 * @param ParamDocsCountVO VO que alberga los parametros que acepta una solicitud de numero de documentos para el nodo de un arbol curricular.
 * @return ResultadosCountVO VO que alberga el resultado de la solicitud de la suma de documentos para un nodo de arbol curricular.
 */
protected ResultadosCountVO handleSolicitudDocsCount(ParamDocsCountVO paramBusq) throws Exception {
    //      logger.debug("SrvBuscadorServiceImpl - handleSolicitudDocsCount: AreaCurricular=" + array2String(paramBusq.getAreaCurricular())+ ", Tesauros="+array2String(paramBusq.getTesauros()));
    //      return NumTermsArbol.obtenerNumeroNodos(paramBusq.getAreaCurricular(),getIndexPathByLanguage(paramBusq.getIdiomaBusqueda()),(paramBusq.getAreaCurricular()!=null && paramBusq.getAreaCurricular().length > 0)?"areaCurricular":"tesauro");
    Directory directorioIndiceSimple = this.getIndexByLanguage(paramBusq.getIdiomaBusqueda());
    IndexSearcher searcher = new IndexSearcher(directorioIndiceSimple);
    ResultadosCountVO resultado = new ResultadosCountVO();
    searcher.setSimilarity(new DefaultSimilarity());
    int suma = 0;
    if (paramBusq.getAreaCurricular() != null && paramBusq.getAreaCurricular().length > 0) {
        resultado.setConteo(new Integer[paramBusq.getAreaCurricular().length]);
        for (int i = 0; i < paramBusq.getAreaCurricular().length; i++) {
            BooleanQuery andQuery = new BooleanQuery();
            andQuery.add(
                    getTermQuery(props.getProperty(CAMPO_AREA_CURRICULAR), paramBusq.getAreaCurricular()[i]),
                    BooleanClause.Occur.MUST);
            andQuery.add(getTermQuery(props.getProperty(CAMPO_ARBOL_CURRICULAR_VIGENTE),
                    paramBusq.getArbolCurricularVigente()), BooleanClause.Occur.MUST);
            Hits hits = searcher.search(andQuery);
            resultado.getConteo()[i] = new Integer(hits.length());
            suma = suma + resultado.getConteo()[i].intValue();
        }
        resultado.setDocumentosCount(new Integer(suma));
    } else {
        resultado = NumTermsArbol.obtenerNumeroNodos(paramBusq.getTesauros(),
                getIndexPathByLanguage(paramBusq.getIdiomaBusqueda()), "tesauro");
    }
    return resultado;
}