List of usage examples for org.apache.lucene.search BooleanClause BooleanClause
public BooleanClause(Query query, Occur occur)
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * count the number of documents in the index having at least a value for the 'class' field * * @return the no. of documents having a value for the 'class' field * @throws IOException if accessing to term vectors or search fails *//*from www . j a va 2 s.c o m*/ protected int countDocsWithClass() throws IOException { int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount(); if (docCount == -1) { // in case codec doesn't support getDocCount TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector(); BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add(new BooleanClause( new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST)); if (query != null) { q.add(query, BooleanClause.Occur.MUST); } indexSearcher.search(q.build(), classQueryCountCollector); docCount = classQueryCountCollector.getTotalHits(); } return docCount; }
From source file:SimpleNaiveBayesClassifier.java
License:Apache License
/** * Returns the number of documents of the input class ( from the whole index or from a subset) * that contains the word ( in a specific field or in all the fields if no one selected) * @param word the token produced by the analyzer * @param term the term representing the class * @return the number of documents of the input class * @throws IOException if a low level I/O problem happens *//* w w w .j a va 2 s . c om*/ private int getWordFreqForClass(String word, Term term) throws IOException { BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); BooleanQuery.Builder subQuery = new BooleanQuery.Builder(); for (String textFieldName : textFieldNames) { subQuery.add( new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD)); } booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST)); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); indexSearcher.search(booleanQuery.build(), totalHitCountCollector); return totalHitCountCollector.getTotalHits(); }
From source file:KNearestNeighbourDocumentClassifier.java
License:Apache License
/** * Returns the top k results from a More Like This query based on the input document * * @param document the document to use for More Like This search * @return the top results for the MLT query * @throws IOException If there is a low-level I/O error *///from w w w. j av a 2s .c om private TopDocs knnSearch(Document document) throws IOException { BooleanQuery.Builder mltQuery = new BooleanQuery.Builder(); for (String fieldName : textFieldNames) { String boost = null; if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = field2boost[1]; } String[] fieldValues = document.getValues(fieldName); if (boost != null) { mlt.setBoost(true); mlt.setBoostFactor(Float.parseFloat(boost)); } mlt.setAnalyzer(field2analyzer.get(fieldName)); for (String fieldContent : fieldValues) { mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD)); } mlt.setBoost(false); } Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*")); mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST)); if (query != null) { mltQuery.add(query, BooleanClause.Occur.MUST); } return indexSearcher.search(mltQuery.build(), k); }
From source file:KNearestNeighborClassifier.java
License:Apache License
private TopDocs knnSearch(String text) throws IOException { BooleanQuery.Builder mltQuery = new BooleanQuery.Builder(); for (String fieldName : textFieldNames) { String boost = null;/* w ww. j a v a 2 s .com*/ mlt.setBoost(true); //terms boost actually helps in MLT queries if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = field2boost[1]; } if (boost != null) { mlt.setBoostFactor(Float.parseFloat(boost));//if we have a field boost, we add it } mltQuery.add( new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD)); mlt.setBoostFactor(1);// restore neutral boost for next field } Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*")); mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST)); if (query != null) { mltQuery.add(query, BooleanClause.Occur.MUST); } return indexSearcher.search(mltQuery.build(), k); }
From source file:SimpleNaiveBayesDocumentClassifier.java
License:Apache License
/** * Returns the number of documents of the input class ( from the whole index or from a subset) * that contains the word ( in a specific field or in all the fields if no one selected) * * @param word the token produced by the analyzer * @param fieldName the field the word is coming from * @param term the class term/*from w w w .j a va 2 s . c o m*/ * @return number of documents of the input class * @throws java.io.IOException If there is a low-level I/O error */ private int getWordFreqForClass(String word, String fieldName, Term term) throws IOException { BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); BooleanQuery.Builder subQuery = new BooleanQuery.Builder(); subQuery.add(new BooleanClause(new TermQuery(new Term(fieldName, word)), BooleanClause.Occur.SHOULD)); booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST)); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); if (query != null) { booleanQuery.add(query, BooleanClause.Occur.MUST); } TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); indexSearcher.search(booleanQuery.build(), totalHitCountCollector); return totalHitCountCollector.getTotalHits(); }
From source file:bbejeck.nosql.lucene.LuceneQueryBuilder.java
License:Apache License
public BooleanClause build() { if (occur == null) { occur = BooleanClause.Occur.MUST; }// w w w.j av a2 s. c o m if (booleanClauses == null || booleanClauses.isEmpty()) { return new BooleanClause(queryType.query(field, text), occur); } BooleanQuery booleanQuery = LuceneQueryFunctions.toBooleanQuery.apply(booleanClauses); return new BooleanClause(booleanQuery, occur); }
From source file:com.aistor.modules.cms.service.ArticleService.java
License:Open Source License
/** * // w w w . j av a 2 s . co m */ public Page<Article> search(Page<Article> page, String q) { // ? BooleanQuery query = articleDao.getFullTextQuery(q, "title", "keywords", "desciption", "articleData.content"); // ? BooleanQuery queryFilter = articleDao.getFullTextQuery( new BooleanClause(new TermQuery(new Term("status", Article.STATUS_RELEASE)), Occur.MUST)); // ? Sort sort = new Sort(new SortField("updateDate", SortField.DOC, true)); // articleDao.search(page, query, queryFilter, sort); // articleDao.keywordsHighlight(query, page.getList(), "desciption", "articleData.content"); return page; }
From source file:com.appeligo.amazon.AmazonSearcher.java
License:Apache License
public AmazonItem getProgramPurchases(String programId) { Searcher searcher = getProductSearcher(); BooleanQuery query = new BooleanQuery(); query.add(new BooleanClause(new TermQuery(new Term("programId", programId)), Occur.MUST)); query.add(new BooleanClause(new TermQuery(new Term("type", "product")), Occur.MUST)); AmazonItem item = null;// www.j a v a2 s. c om try { Hits hits = searcher.search(query); if (hits.length() > 0) { item = createItem(hits.doc(0)); } } catch (IOException e) { if (log.isWarnEnabled()) { log.warn("Error searching for program purchase: " + programId, e); } } return item; }
From source file:com.bewsia.script.LuceneHandler.java
License:Open Source License
public int count(String kind, Query query, Filter filter, Sort sort, int max) { int tag = 0;//from ww w .ja v a2 s .c o m try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST)); if (query != null) { boolQuery.add(new BooleanClause(query, Occur.MUST)); } TopDocs td = null; if (filter != null && sort != null) { td = searcher.search(boolQuery, filter, max, sort); } else if (filter != null) { td = searcher.search(boolQuery, filter, max); } else if (sort != null) { td = searcher.search(boolQuery, max, sort); } else { td = searcher.search(boolQuery, max); } tag = td.totalHits; searcher.close(); reader.close(); } catch (Exception e) { } return tag; }
From source file:com.bewsia.script.LuceneHandler.java
License:Open Source License
public List<SEntity> search(String kind, Query query, Filter filter, Sort sort, int max) { List<SEntity> tag = new ArrayList<SEntity>(); try {//from ww w. j a v a2s . co m IndexReader reader = IndexReader.open(FSDirectory.open(new File(dirIndex))); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(new BooleanClause(new TermQuery(new Term(SEntity.KIND, kind)), Occur.MUST)); if (query != null) { boolQuery.add(new BooleanClause(query, Occur.MUST)); } TopDocs td = null; if (filter != null && sort != null) { td = searcher.search(boolQuery, filter, max, sort); } else if (filter != null) { td = searcher.search(boolQuery, filter, max); } else if (sort != null) { td = searcher.search(boolQuery, max, sort); } else { td = searcher.search(boolQuery, max); } for (int i = 0; i < td.totalHits; i++) { SEntity item = new SEntity(this); Document doc = searcher.doc(td.scoreDocs[i].doc); item.setSchema(doc.get(SEntity.SCHEMA)); read(item, doc); tag.add(item); } searcher.close(); reader.close(); } catch (Exception e) { } return tag; }