List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery
BooleanQuery
From source file:edu.uic.cs.t_verifier.index.synonym.WordNetSynonymEngine.java
License:Apache License
public String[] getSynonyms(String word) throws IOException { // List<String> synList = new ArrayList<String>(); Set<String> synList = new TreeSet<String>(); AllDocCollector collector = new AllDocCollector(); // #A String stemmedWord = stemmer.stem(word); // System.out.print("\n>>>>> " + word + " " + stemmedWord); Query originalWordQuery = new TermQuery(new Term("word", word)); Query stemmedWordQuery = new TermQuery(new Term("word", stemmedWord)); BooleanQuery query = new BooleanQuery(); query.add(originalWordQuery, Occur.SHOULD); query.add(stemmedWordQuery, Occur.SHOULD); searcher.search(query, collector);/* w w w .j av a 2s .c o m*/ for (ScoreDoc hit : collector.getHits()) { // #B Document doc = searcher.doc(hit.doc); String[] values = doc.getValues("syn"); for (String syn : values) { // #C synList.add(syn); } } return synList.toArray(new String[0]); }
From source file:edu.unika.aifb.graphindex.searcher.entity.EntitySearcher.java
License:Open Source License
public Collection<String> searchConceptWithClause(IndexSearcher searcher, Query clause) { Collection<String> result = new HashSet<String>(); try {/*from w ww .ja v a 2 s . c o m*/ Hits hits = searcher.search(clause); /********* add fuzzy query funtion here **************/ if (hits == null || hits.length() == 0) { Set<Term> terms = new HashSet<Term>(); clause.extractTerms(terms); //if clause query is a term query if (terms.size() != 0) { BooleanQuery query = new BooleanQuery(); for (Term term : terms) { query.add(new FuzzyQuery(term, 0.8f, 1), Occur.MUST); } hits = searcher.search(query); log.debug(query + " " + hits); } } /************************************************/ for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); float score = hits.score(i); if (score >= SCHEMA_THRESHOLD) { String type = doc.get(Constant.TYPE_FIELD); if (type == null) { System.err.println("type is null!"); continue; } if (type.equals(TypeUtil.CONCEPT)) { result.add(doc.get(Constant.URI_FIELD)); } } } } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:edu.unika.aifb.graphindex.searcher.entity.EntitySearcher.java
License:Open Source License
public Collection<String> searchAttributesWithClause(IndexSearcher searcher, Query clause) { Collection<String> result = new HashSet<String>(); try {// w w w . j av a2 s .c o m Hits hits = searcher.search(clause); /********* add fuzzy query funtion here **************/ if (hits == null || hits.length() == 0) { Set<Term> terms = new HashSet<Term>(); clause.extractTerms(terms); //if clause query is a term query if (terms.size() != 0) { BooleanQuery query = new BooleanQuery(); for (Term term : terms) { query.add(new FuzzyQuery(term, 0.8f, 1), Occur.MUST); } hits = searcher.search(query); log.debug(query + " " + hits); } } /************************************************/ for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); float score = hits.score(i); if (score >= SCHEMA_THRESHOLD) { String type = doc.get(Constant.TYPE_FIELD); if (type == null) { System.err.println("type is null!"); continue; } if (type.equals(TypeUtil.ATTRIBUTE)) { result.add(doc.get(Constant.URI_FIELD)); } } } } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:edu.unika.aifb.graphindex.searcher.entity.EntitySearcher.java
License:Open Source License
private void searchEntitiesByAttributeVauleCompounds(IndexSearcher searcher, Map<String, Collection<String>> queries, Map<String, Collection<String>> attributes, Collection<String> concepts, Collection<KeywordElement> entities, int cutOff) { BooleanQuery entityQuery = new BooleanQuery(); try {/*from w ww . j a v a 2s . co m*/ StandardAnalyzer analyzer = new StandardAnalyzer(); for (String keywordForAttribute : queries.keySet()) { BooleanQuery attributeQuery = new BooleanQuery(); Collection<String> attributeElements = attributes.get(keywordForAttribute); if (attributeElements != null && attributeElements.size() != 0) for (String attribute : attributeElements) { QueryParser parser = new QueryParser(attribute, analyzer); BooleanQuery bq = new BooleanQuery(); for (String value : queries.get(keywordForAttribute)) { Query q = parser.parse(value); if (q instanceof BooleanQuery) { BooleanQuery bquery = (BooleanQuery) q; for (BooleanClause clause : bquery.getClauses()) { clause.setOccur(Occur.MUST); } } bq.add(q, BooleanClause.Occur.MUST); } attributeQuery.add(bq, BooleanClause.Occur.SHOULD); } entityQuery.add(attributeQuery, BooleanClause.Occur.MUST); } if (concepts != null && concepts.size() != 0) { BooleanQuery typeQuery = new BooleanQuery(); for (String type : concepts) { TermQuery tq = new TermQuery(new Term(Constant.CONCEPT_FIELD, type)); typeQuery.add(tq, BooleanClause.Occur.MUST); } entityQuery.add(typeQuery, BooleanClause.Occur.MUST); } searchEntitiesWithClause(searcher, entityQuery, entities, cutOff); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:edu.unika.aifb.graphindex.searcher.keyword.KeywordQueryParser.java
License:Open Source License
public Query parse(String query, String[] fields) throws ParseException { BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { QueryParser qp = new QueryParser(fields[i], getAnalyzer()); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query q = qp.parse(query); bQuery.add(q, Occur.SHOULD);//from ww w. j av a 2 s.c om } return bQuery; }
From source file:edu.unika.aifb.graphindex.searcher.keyword.KeywordQueryParser.java
License:Open Source License
public Query parse(String query) throws ParseException { BooleanQuery bQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { QueryParser qp = new QueryParser(fields[i], getAnalyzer()); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query q = qp.parse(query); bQuery.add(q, Occur.SHOULD);/*from ww w . j av a 2 s . c om*/ } return bQuery; }
From source file:edu.unika.aifb.graphindex.searcher.keyword.KeywordSearcher.java
License:Open Source License
private void searchEntitiesByAttributesAndValues(SortedSet<KeywordSegment> segements, Map<String, Collection<KeywordElement>> attributes, Map<KeywordElement, KeywordSegment> entities2Segement, Map<KeywordSegment, Collection<KeywordElement>> segments2Entities, Map<String, Collection<KeywordElement>> keywords2Entities) throws IOException { StandardAnalyzer analyzer = new StandardAnalyzer(); Set<KeywordSegment> segementsWithResults = new HashSet<KeywordSegment>(); for (String keywordForAttribute : attributes.keySet()) { for (KeywordElement attribute : attributes.get(keywordForAttribute)) { QueryParser parser = new QueryParser(Constant.CONTENT_FIELD, analyzer); parser.setDefaultOperator(QueryParser.AND_OPERATOR); for (KeywordSegment segement : segements) { try { BooleanQuery q = new BooleanQuery(); Query valueQuery = parser.parse(segement.getQuery()); Query attributeQuery = new TermQuery( new Term(Constant.ATTRIBUTE_FIELD, attribute.getUri())); q.add(valueQuery, Occur.MUST); q.add(attributeQuery, Occur.MUST); boolean hasResults = searchEntitiesWithClause(q, segement, keywordForAttribute, entities2Segement, segments2Entities, keywords2Entities); if (hasResults) segementsWithResults.add(segement); } catch (ParseException e) { e.printStackTrace(); continue; }/*from w ww . ja v a 2s . c o m*/ } } } }
From source file:edu.virginia.cs.searcher.PostSearcher.java
/** * The main search function./* w ww .j ava2s . c o m*/ * * @param searchQuery Set this object's attributes as needed. * @return */ private SearchResult search(SearchQuery searchQuery) { searchQuery.numResults(numOfResults); BooleanQuery combinedQuery = new BooleanQuery(); for (String field : searchQuery.fields()) { QueryParser parser = new QueryParser(Version.LUCENE_46, field, analyzer); try { Query textQuery = parser.parse(parser.escape(searchQuery.queryText())); combinedQuery.add(textQuery, BooleanClause.Occur.MUST); } catch (ParseException exception) { exception.printStackTrace(); } } return runSearch(combinedQuery, searchQuery); }
From source file:engine.easy.search.RelevanceFeedBackUtil.java
License:Apache License
/** * Computes a term frequency map for the specific terms at the specified location. * Builds a Boolean OR query out of the "most frequent" terms in the index * and returns it. "Most Frequent" is defined as the terms whose frequencies * are greater than or equal to the topTermCutoff * the frequency of the top * term, where the topTermCutoff is number between 0 and 1. * /*ww w. j a v a 2 s. com*/ * @param ramdir the directory where the index is created. * @return a Boolean OR query. * @throws Exception if one is thrown. */ private static Query computeTopTermQuery(List<String> termlist, Map<String, Integer> frequencyMap, int numOf) throws Exception { // sort the term map by frequency descending Collections.sort(termlist, new ReverseComparator(new ByValueComparator<String, Integer>(frequencyMap))); // retrieve the top terms based on topTermCutoff List<String> topTerms = new ArrayList<String>(); float topFreq = -1.0F; for (String term : termlist) { if (topFreq < 0.0F) { // first term, capture the value topFreq = (float) frequencyMap.get(term); topTerms.add(term); } else { // not the first term, compute the ratio and discard if below // topTermCutoff score float ratio = (float) ((float) frequencyMap.get(term) / topFreq); if (ratio >= AppConstants.TOP_TERM_CUT_OFF_FREQ) { topTerms.add(term); } else { break; } } } // // //Top results // List<String> topArray = null; // if (topTerms.size() > numOf) { // topArray = new ArrayList<String>(numOf); // // for (int position=0; position < numOf; position++) { // topArray.add(topTerms.get(position)); // } // } // else { // topArray = topTerms; // } StringBuilder termBuf = new StringBuilder(); BooleanQuery q = new BooleanQuery(); for (String topTerm : topTerms) { termBuf.append(topTerm).append("(").append(frequencyMap.get(topTerm)).append(");"); q.add(new TermQuery(new Term("CONTENT", topTerm)), Occur.SHOULD); } System.out.println(">>> top terms: " + termBuf.toString()); System.out.println(">>> query: " + q.toString()); return q; }
From source file:es.pode.indexador.negocio.servicios.busqueda.SrvBuscadorServiceImpl.java
License:Open Source License
/** * @see es.pode.indexador.negocio.servicios.busqueda.SrvBuscadorServiceBase#handleSolicitudDocsCountArbolCurricular(ParamDocsCountVO) * //from w w w . j a v a 2 s .c o m * @param ParamDocsCountVO VO que alberga los parametros que acepta una solicitud de numero de documentos para el nodo de un arbol curricular. * @return ResultadosCountVO VO que alberga el resultado de la solicitud de la suma de documentos para un nodo de arbol curricular. */ protected ResultadosCountVO handleSolicitudDocsCount(ParamDocsCountVO paramBusq) throws Exception { // logger.debug("SrvBuscadorServiceImpl - handleSolicitudDocsCount: AreaCurricular=" + array2String(paramBusq.getAreaCurricular())+ ", Tesauros="+array2String(paramBusq.getTesauros())); // return NumTermsArbol.obtenerNumeroNodos(paramBusq.getAreaCurricular(),getIndexPathByLanguage(paramBusq.getIdiomaBusqueda()),(paramBusq.getAreaCurricular()!=null && paramBusq.getAreaCurricular().length > 0)?"areaCurricular":"tesauro"); Directory directorioIndiceSimple = this.getIndexByLanguage(paramBusq.getIdiomaBusqueda()); IndexSearcher searcher = new IndexSearcher(directorioIndiceSimple); ResultadosCountVO resultado = new ResultadosCountVO(); searcher.setSimilarity(new DefaultSimilarity()); int suma = 0; if (paramBusq.getAreaCurricular() != null && paramBusq.getAreaCurricular().length > 0) { resultado.setConteo(new Integer[paramBusq.getAreaCurricular().length]); for (int i = 0; i < paramBusq.getAreaCurricular().length; i++) { BooleanQuery andQuery = new BooleanQuery(); andQuery.add( getTermQuery(props.getProperty(CAMPO_AREA_CURRICULAR), paramBusq.getAreaCurricular()[i]), BooleanClause.Occur.MUST); andQuery.add(getTermQuery(props.getProperty(CAMPO_ARBOL_CURRICULAR_VIGENTE), paramBusq.getArbolCurricularVigente()), BooleanClause.Occur.MUST); Hits hits = searcher.search(andQuery); resultado.getConteo()[i] = new Integer(hits.length()); suma = suma + resultado.getConteo()[i].intValue(); } resultado.setDocumentosCount(new Integer(suma)); } else { resultado = NumTermsArbol.obtenerNumeroNodos(paramBusq.getTesauros(), getIndexPathByLanguage(paramBusq.getIdiomaBusqueda()), "tesauro"); } return resultado; }