List of usage examples for org.apache.lucene.search BooleanQuery clauses
List clauses
To view the source code for org.apache.lucene.search BooleanQuery clauses.
Click Source Link
From source file:nicta.com.au.patent.pac.search.RewriteQuery.java
public Query rewrite(String queryid, PatentQuery query) throws ParseException { BooleanQuery bQuery = (BooleanQuery) query.parse(); BooleanQuery bQuery2 = (BooleanQuery) ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0] .getQuery();/*from w ww. j av a 2s.co m*/ BooleanQuery bQueryFinal = new BooleanQuery(); BooleanQuery bQuery3 = bQuery2.clone(); try { FileInputStream fstream = new FileInputStream(termsImpactFilename); // Get the object of DataInputStream DataInputStream in = new DataInputStream(fstream); try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) { String str; while ((str = br.readLine()) != null) { if (str.startsWith("#")) { continue; } if (str.trim().length() == 0) { continue; } StringTokenizer st = new StringTokenizer(str); String id = st.nextToken(); String term = st.nextToken(); double oldav = Double.parseDouble(st.nextToken()); double av = Double.parseDouble(st.nextToken()); double impact = Double.parseDouble(st.nextToken()); int y = Integer.parseInt(st.nextToken()); if (queryid.equals(id) && y == 0) { for (BooleanClause bC : bQuery3.clauses()) { TermQuery tq = (TermQuery) bC.getQuery(); if (term.startsWith(tq.getTerm().text())) { bQuery3.clauses().remove(bC); break; } } } } } } catch (IOException | NumberFormatException e) { e.printStackTrace(); } BooleanQuery bq = new BooleanQuery(); for (int i = 1; i < PatentQuery.getFields().length; i++) { if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6) && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) { BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bQuery3.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text()); TermQuery tq2 = new TermQuery(term); tq2.setBoost(tq.getBoost()); bq2.add(tq2, BooleanClause.Occur.SHOULD); } bq.add(bq2, BooleanClause.Occur.SHOULD); } } bQueryFinal.add((Query) bQuery.getClauses()[0].getQuery(), BooleanClause.Occur.MUST); bQueryFinal.add(bq, BooleanClause.Occur.MUST); return bQueryFinal; }
From source file:nicta.com.au.patent.pac.terms.impact.FeaturesSelection.java
public void iterateOverQueryTerms() throws ParseException, Exception { long start = System.currentTimeMillis(); int l = 0;/*w w w . ja va 2 s .co m*/ // System.out.println("queryid\tterm\ttf\tln_tf\tidf\ttfidf\ttLength\tratioTerm\t" // + "nbrUniqTerms\tqSize\tscq\tisInTitle\tisInAbstract\tisInDescription\tisInClaims"); System.out.println( "queryid\tremovedBooleanClause\ttf\tln_tf\tidf\ttfidf\ttLength\tratioTerm\tnbrUniqTerms\tqSize\tscq\tSCS\tictf\tQC\tclarity\tfreqInTitle\tratioInTitle\tfreqDescription\tratioInDescription\tfreqClaims\tratioInClaims"); for (Map.Entry<String, PatentDocument> e : topics.getTopics().entrySet()) { l++; String queryid = e.getKey(); PatentDocument pt = e.getValue(); // System.err.print(l + "- " + queryid + " -> " + pt.getUcid() + ": "); long start2 = System.currentTimeMillis(); PatentQuery query = new PatentQuery(pt, boosts, filter, stopWords); BooleanQuery bQuery = (BooleanQuery) query.parse(); if (bQuery.getClauses().length != 2 || !(bQuery.getClauses()[1].getQuery() instanceof BooleanQuery) || ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses().length == 0 || !(((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0] .getQuery() instanceof BooleanQuery)) { continue; } BooleanQuery bQuery2 = (BooleanQuery) ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0] .getQuery(); for (int i = 0; i < bQuery2.clauses().size(); i++) { BooleanQuery bQueryFinal = new BooleanQuery(); BooleanQuery bQuery3 = bQuery2.clone(); BooleanClause removedBooleanClause = bQuery3.clauses().remove(i); bQueryFinal.add((Query) bQuery.getClauses()[0].getQuery(), BooleanClause.Occur.MUST); bQueryFinal.add(bQuery3, BooleanClause.Occur.MUST); //*************************** // Get features //*************************** IndexReader ir = searcher.getIndexSearch().getIndexReader(); TermQuery term = (TermQuery) removedBooleanClause.getQuery(); double tf = removedBooleanClause.getQuery().getBoost();// Term frequency double ln_tf = Math.log(1 + tf);// Get log of the term frequency int totalTF = ir.docFreq(term.getTerm()); int docs = ir.getDocCount(term.getTerm().field()); double idf = 0; if (totalTF != 0) { idf = Math.log10((double) docs / (totalTF));// Inverse document frequency } double tfidf = ln_tf * idf;// Compute the TFIDF int tLength = term.getTerm().text().length();// Term length int qSize = 0; if (term.getTerm().field().endsWith(PatentDocument.Title)) { qSize = query.getTitleSize(); // Query size } else if (term.getTerm().field().endsWith(PatentDocument.Abstract)) { qSize = query.getAbstractSize(); // Query size } else if (term.getTerm().field().endsWith(PatentDocument.Description)) { qSize = query.getDescriptionSize(); // Query size } else if (term.getTerm().field().endsWith(PatentDocument.Claims)) { qSize = query.getClaimsSize(); // Query size } double ratioTerm = (double) tf / qSize; int nbrUniqTerms = bQuery2.getClauses().length; long totalTermFreq = ir.totalTermFreq(term.getTerm()); double ln_totalTermFreq = Math.log(1 + totalTermFreq); double scq = ln_totalTermFreq * idf; double freqInTitle = query.getFreqInTitle(term.getTerm().text()); double ratioInTitle = (double) freqInTitle / query.getTitleSize(); double freqAbstract = query.getFreqInAbstract(term.getTerm().text()); double ratioInAbstract = (double) freqAbstract / query.getAbstractSize(); double freqDescription = query.getFreqInDescription(term.getTerm().text()); double ratioInDescription = (double) freqDescription / query.getDescriptionSize(); double freqClaims = query.getFreqInClaims(term.getTerm().text()); double ratioInClaims = (double) freqClaims / query.getClaimsSize(); double Pcoll = (double) totalTermFreq / ir.getSumTotalTermFreq(term.getTerm().field()); double SCS = 0; double ictf = 0; List<TermFreqVector> docsTermVector = getDocsTerms(searcher.search(term), term.getTerm().field()); double a1 = 0; for (TermFreqVector vec : docsTermVector) { a1 += Math.sqrt((double) vec.getFreq(term.getTerm().text()) / vec.numberOfTerms()); } double clarity = 0; if (totalTermFreq != 0) { SCS = ratioTerm * Log2(ratioTerm / Pcoll);// Simplified Clarity Score ictf = Math.log10((double) docs / (totalTermFreq));// Inverse Collection Term Frequency clarity = a1 * Log2(a1 / Pcoll); } double QC = totalTF / (double) docs;// QueryScope //*************************** System.out.println(queryid + "\t" + removedBooleanClause + "\t" + tf + "\t" + ln_tf + "\t" + idf + "\t" + tfidf + "\t" + tLength + "\t" + ratioTerm + "\t" + nbrUniqTerms + "\t" + qSize + "\t" + scq + "\t" + SCS + "\t" + ictf + "\t" + QC + "\t" + clarity + "\t" + freqInTitle + "\t" + ratioInTitle + "\t" + freqDescription + "\t" + ratioInDescription + "\t" + freqClaims + "\t" + ratioInClaims); } long end2 = System.currentTimeMillis(); // System.err.println(bQuery2.clauses().size() + " terms processed in " + Functions.getTimer(end2 - start2) + "."); } long end = System.currentTimeMillis(); long millis = (end - start); System.err.println("#Global Execution time: " + Functions.getTimer(millis) + "."); }
From source file:nicta.com.au.patent.pac.terms.impact.TermsImpact.java
public void iterateOverQueryTerms() throws ParseException, Exception { long start = System.currentTimeMillis(); int l = 0;/* w w w .j a v a 2s . c o m*/ for (Map.Entry<String, PatentDocument> e : topics.getTopics().entrySet()) { l++; String queryid = e.getKey(); PatentDocument pt = e.getValue(); System.err.print(l + "- " + queryid + " -> " + pt.getUcid() + ": "); long start2 = System.currentTimeMillis(); PatentQuery query = new PatentQuery(pt, boosts, filter, stopWords); TopDocs hitsAll = searcher.search(query.parse()); ; int j = 0; if (hitsAll.totalHits == 0) { System.out.println(queryid + " allTerms " + " Q0 XXXXXXXXXX 1 0.0 STANDARD"); } for (ScoreDoc scoreDoc : hitsAll.scoreDocs) { j++; Document doc = searcher.getIndexSearch().doc(scoreDoc.doc); System.out.println(queryid + " allTerms " + " Q0 " + doc.get(PatentDocument.FileName).substring(3) + " " + j + " " + scoreDoc.score + " STANDARD"); } BooleanQuery bQuery = (BooleanQuery) query.parse(); if (bQuery.getClauses().length != 2 || !(bQuery.getClauses()[1].getQuery() instanceof BooleanQuery)) { continue; } if (((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses().length == 0 || !(((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0] .getQuery() instanceof BooleanQuery)) { continue; } BooleanQuery bQuery2 = (BooleanQuery) ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0] .getQuery(); for (int i = 0; i < bQuery2.clauses().size(); i++) { BooleanQuery bQueryFinal = new BooleanQuery(); BooleanQuery bQueryFinal2 = new BooleanQuery(); BooleanQuery bQuery3 = bQuery2.clone(); BooleanClause bClause = bQuery3.clauses().remove(i); //************* // System.out.println(bQuery3); for (int k = 1; k < PatentQuery.getFields().length; k++) { if (query.getQueries()[k] != null && !query.getQueries()[k].equals("") && (k != 4 || k != 6) && query.getBoosts().get(PatentQuery.getFields()[k]) != 0) { BooleanQuery bq = ((BooleanQuery) bQuery3).clone(); BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bq.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); Term term = new Term(PatentQuery.getFields()[k], tq.getTerm().text()); TermQuery tq2 = new TermQuery(term); tq2.setBoost(tq.getBoost()); bq2.add(tq2, BooleanClause.Occur.SHOULD); } // System.out.println(bq2); bQueryFinal2.add(bq2, BooleanClause.Occur.SHOULD); } } //******************************* bQueryFinal.add((Query) bQuery.getClauses()[0].getQuery(), BooleanClause.Occur.MUST); bQueryFinal.add(bQueryFinal2, BooleanClause.Occur.MUST); TopDocs hits = searcher.search(bQueryFinal); j = 0; //*************************** // Get features //*************************** TermQuery term = (TermQuery) bClause.getQuery(); double tf = bClause.getQuery().getBoost();// Term frequency int totalTF = searcher.getIndexSearch().getIndexReader().docFreq(term.getTerm()); int docs = searcher.getIndexSearch().getIndexReader().getDocCount(term.getTerm().field()); double idf = Math.log10((double) docs / (totalTF + 1));// Inverse document frequency int tLength = term.getTerm().text().length();// Term length int qSize = bQuery2.getClauses().length; // Query size //*************************** if (hits.totalHits == 0) { System.out.println(queryid + " " + bClause + " " + " Q0 XXXXXXXXXX 1 0.0 STANDARD"); } for (ScoreDoc scoreDoc : hits.scoreDocs) { j++; Document doc = searcher.getIndexSearch().doc(scoreDoc.doc); System.out.println( queryid + " " + bClause + " Q0 " + doc.get(PatentDocument.FileName).substring(3) + " " + j + " " + scoreDoc.score + " STANDARD"); } } long end2 = System.currentTimeMillis(); System.err.println( bQuery2.clauses().size() + " terms processed in " + Functions.getTimer(end2 - start2) + "."); } long end = System.currentTimeMillis(); long millis = (end - start); System.err.println("#Global Execution time: " + Functions.getTimer(millis) + "."); }
From source file:nicta.com.au.patent.queryexpansion.PatentMMRQueryExpansion.java
License:Apache License
/** * Performs Rocchio's query expansion with pseudo feedback for each fields * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel * docs vector )/* w w w . j a v a2 s . c o m*/ * * @param query * * @return expandedQuery * * @throws IOException * @throws ParseException */ @Override public Query expandQuery(PatentQuery query) throws ParseException, IOException { IndexReader ir = searcher.getIndexReader(); BooleanQuery bQuery = new BooleanQuery(); BooleanQuery bQueryFieldsExpanded = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); //***************************************************************** //**************** Compute the PRF for field (i)******************* //***************************************************************** TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query.parse(), collector); TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set Map<String, Float> parameters = new HashMap<>(); parameters.put(RocchioQueryExpansion.ROCCHIO_ALPHA_FLD, (float) 1.0); parameters.put(RocchioQueryExpansion.ROCCHIO_BETA_FLD, (float) 1.0); parameters.put(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD, (float) 1.0); parameters.put(RocchioQueryExpansion.DECAY_FLD, (float) 0.0); Map<String, Float> rocchioVector = null; if (model.equals("tfidf")) { RocchioQueryExpansion rocchioQueryExpansion = new RocchioQueryExpansion(hits, ir, parameters, source, Nbr_Docs, Nbr_Terms); rocchioVector = rocchioQueryExpansion.getRocchioVector(PatentQuery.getFields()[source]); } // System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + "."); Query expandedQuery = null; MMRQueryExpansion qe = new MMRQueryExpansion(hits, ir, rocchioVector, MMRQE_ALPHA, MMRQE_LAMBDA, PatentQuery.getFields()[source], Nbr_Docs, Nbr_Terms); for (int i = 1; i < PatentQuery.getFields().length; i++) { if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6) && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) { QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i], new StandardAnalyzer(Version.LUCENE_48)); // BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field Query q = qp.parse(query.getQueries()[i]); // if (query.isFilter()) { // Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], // new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); // bQueryFields.add(filter, BooleanClause.Occur.MUST); // } // if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) { // bQueryFields.add(q, BooleanClause.Occur.MUST); // } if (expandedQuery == null) { expandedQuery = qe.expandQuery(q, PatentQuery.getFields()[i]); } else { BooleanQuery bq = ((BooleanQuery) expandedQuery).clone(); BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bq.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text()); TermQuery tq2 = new TermQuery(term); tq2.setBoost(tq.getBoost()); bq2.add(tq2, BooleanClause.Occur.SHOULD); } expandedQuery = bq2; } bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set // System.err.println("Expanded Query: " + expandedQuery); // hits = searcher.search(expandedQuery, 100); // System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + "."); } } if (query.isFilter()) { Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0])); bQuery.add(q, BooleanClause.Occur.MUST); } bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST); // TopDocs hits = searcher.search(bQuery, 100); // System.err.println(hits.totalHits + " total matching documents."); return bQuery; }
From source file:nicta.com.au.patent.queryreduction.PatentMMRQueryReduction.java
License:Apache License
/** * Performs Rocchio's query expansion with pseudo feedback for each fields * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel * docs vector )/*from ww w. ja v a 2 s . c om*/ * * @param query * * @return expandedQuery * * @throws IOException * @throws ParseException */ @Override public Query expandQuery(PatentQuery query) throws ParseException, IOException { IndexReader ir = searcher.getIndexReader(); BooleanQuery bQuery = new BooleanQuery(); BooleanQuery bQueryFieldsExpanded = new BooleanQuery(); BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); //***************************************************************** //**************** Compute the PRF for field (i)******************* //***************************************************************** TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query.parse(), collector); TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set // System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + "."); Query expandedQuery = null; MMRQueryReduction qe = new MMRQueryReduction(hits, ir, MMRQE_LAMBDA, PatentQuery.getFields()[source], Nbr_Docs, Nbr_Terms); for (int i = 1; i < PatentQuery.getFields().length; i++) { if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6) && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) { QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i], new StandardAnalyzer(Version.LUCENE_48)); // BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field Query q = qp.parse(query.getQueries()[i]); // if (query.isFilter()) { // Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], // new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); // bQueryFields.add(filter, BooleanClause.Occur.MUST); // } // if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) { // bQueryFields.add(q, BooleanClause.Occur.MUST); // } if (expandedQuery == null) { expandedQuery = qe.reduceQuery(q, PatentQuery.getFields()[i]); } else { BooleanQuery bq = ((BooleanQuery) expandedQuery).clone(); BooleanQuery bq2 = new BooleanQuery(); for (BooleanClause bc : bq.clauses()) { TermQuery tq = (TermQuery) bc.getQuery(); Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text()); TermQuery tq2 = new TermQuery(term); tq2.setBoost(tq.getBoost()); bq2.add(tq2, BooleanClause.Occur.SHOULD); } expandedQuery = bq2; } bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set // System.err.println("Expanded Query: " + expandedQuery); // hits = searcher.search(expandedQuery, 100); // System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + "."); } } if (query.isFilter()) { Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0], new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]); q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0])); bQuery.add(q, BooleanClause.Occur.MUST); } bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST); // TopDocs hits = searcher.search(bQuery, 100); // System.err.println(hits.totalHits + " total matching documents."); return bQuery; }
From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java
License:Apache License
static boolean isNegative(Query q) { if (!(q instanceof BooleanQuery)) return false; BooleanQuery bq = (BooleanQuery) q; List<BooleanClause> clauses = bq.clauses(); if (clauses.size() == 0) return false; for (BooleanClause clause : clauses) { if (!clause.isProhibited()) return false; }//from w w w. j ava 2 s. c om return true; }
From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java
License:Open Source License
@SuppressWarnings("unchecked") protected Query getFieldQueryImpl(String field, String queryText, AnalysisMode analysisMode, LuceneFunction luceneFunction) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or noth // TODO: Untokenised columns with functions require special handling if (luceneFunction != LuceneFunction.FIELD) { throw new UnsupportedOperationException( "Field queries are not supported on lucene functions (UPPER, LOWER, etc)"); }/*from w w w . j av a2 s. c o m*/ // if the incoming string already has a language identifier we strip it iff and addit back on again String localePrefix = ""; String toTokenise = queryText; if (queryText.startsWith("{")) { int position = queryText.indexOf("}"); String language = queryText.substring(0, position + 1); Locale locale = new Locale(queryText.substring(1, position)); String token = queryText.substring(position + 1); boolean found = false; if (!locale.toString().isEmpty()) { for (Locale current : Locale.getAvailableLocales()) { if (current.toString().equalsIgnoreCase(locale.toString())) { found = true; break; } } } if (found) { localePrefix = language; toTokenise = token; } else { toTokenise = token; } } String testText = toTokenise; boolean requiresMLTokenDuplication = false; String localeString = null; if (field.startsWith(PROPERTY_FIELD_PREFIX) && (localePrefix.length() == 0)) { if ((queryText.length() > 0) && (queryText.charAt(0) == '\u0000')) { int position = queryText.indexOf("\u0000", 1); testText = queryText.substring(position + 1); requiresMLTokenDuplication = true; localeString = queryText.substring(1, position); } } // find the positions of any escaped * and ? and ignore them Set<Integer> wildcardPoistions = getWildcardPositions(testText); TokenStream source; if ((localePrefix.length() == 0) || (wildcardPoistions.size() > 0) || (analysisMode == AnalysisMode.IDENTIFIER)) { source = getAnalyzer().tokenStream(field, new StringReader(toTokenise), analysisMode); } else { source = getAnalyzer().tokenStream(field, new StringReader( "\u0000" + localePrefix.substring(1, localePrefix.length() - 1) + "\u0000" + toTokenise), analysisMode); localePrefix = ""; } ArrayList<org.apache.lucene.analysis.Token> list = new ArrayList<org.apache.lucene.analysis.Token>(); org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token(); org.apache.lucene.analysis.Token nextToken; int positionCount = 0; boolean severalTokensAtSamePosition = false; while (true) { try { nextToken = source.next(reusableToken); } catch (IOException e) { nextToken = null; } if (nextToken == null) break; list.add((org.apache.lucene.analysis.Token) nextToken.clone()); if (nextToken.getPositionIncrement() != 0) positionCount += nextToken.getPositionIncrement(); else severalTokensAtSamePosition = true; } try { source.close(); } catch (IOException e) { // ignore } // add any alpha numeric wildcards that have been missed // Fixes most stop word and wild card issues for (int index = 0; index < testText.length(); index++) { char current = testText.charAt(index); if (((current == '*') || (current == '?')) && wildcardPoistions.contains(index)) { StringBuilder pre = new StringBuilder(10); if (index == 0) { // "*" and "?" at the start boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= 0) && (0 < test.endOffset())) { found = true; break; } } if (!found && (testText.length() == 1)) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token(0, 0); newToken.setTermBuffer(""); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null ? defaultSearchMLAnalysisMode : searchParameters.getMlAnalaysisMode(); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } else if (index > 0) { // Add * and ? back into any tokens from which it has been removed boolean tokenFound = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= index) && (index < test.endOffset())) { if (requiresMLTokenDuplication) { String termText = new String(test.termBuffer(), 0, test.termLength()); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); if (index >= test.startOffset() + token.length()) { test.setTermBuffer(language + token + current); } } else { if (index >= test.startOffset() + test.termLength()) { test.setTermBuffer(test.term() + current); } } tokenFound = true; break; } } if (!tokenFound) { for (int i = index - 1; i >= 0; i--) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { pre.insert(0, c); } } else { break; } } if (pre.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token( index - pre.length(), index); newToken.setTermBuffer(pre.toString()); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null ? defaultSearchMLAnalysisMode : searchParameters.getMlAnalaysisMode(); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } StringBuilder post = new StringBuilder(10); if (index > 0) { for (int i = index + 1; i < testText.length(); i++) { char c = testText.charAt(i); if (Character.isLetterOrDigit(c)) { boolean found = false; for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token test = list.get(j); if ((test.startOffset() <= i) && (i < test.endOffset())) { found = true; break; } } if (found) { break; } else { post.append(c); } } else { break; } } if (post.length() > 0) { // Add new token followed by * not given by the tokeniser org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token(index + 1, index + 1 + post.length()); newToken.setTermBuffer(post.toString()); newToken.setType("ALPHANUM"); if (requiresMLTokenDuplication) { Locale locale = I18NUtil.parseLocale(localeString); MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null ? defaultSearchMLAnalysisMode : searchParameters.getMlAnalaysisMode(); MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode); Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken); if (it != null) { int count = 0; while (it.hasNext()) { list.add(it.next()); count++; if (count > 1) { severalTokensAtSamePosition = true; } } } } // content else { list.add(newToken); } } } } } Collections.sort(list, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); if (dif != 0) { return dif; } else { return o2.getPositionIncrement() - o1.getPositionIncrement(); } } }); // Combined * and ? based strings - should redo the tokeniser // Build tokens by position LinkedList<LinkedList<org.apache.lucene.analysis.Token>> tokensByPosition = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); LinkedList<org.apache.lucene.analysis.Token> currentList = null; for (org.apache.lucene.analysis.Token c : list) { if (c.getPositionIncrement() == 0) { if (currentList == null) { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); } currentList.add(c); } else { currentList = new LinkedList<org.apache.lucene.analysis.Token>(); tokensByPosition.add(currentList); currentList.add(c); } } // Build all the token sequences and see which ones get strung together LinkedList<LinkedList<org.apache.lucene.analysis.Token>> allTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokensAtPosition : tokensByPosition) { if (allTokenSequences.size() == 0) { for (org.apache.lucene.analysis.Token t : tokensAtPosition) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.add(t); allTokenSequences.add(newEntry); } } else { LinkedList<LinkedList<org.apache.lucene.analysis.Token>> newAllTokeSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); FOR_FIRST_TOKEN_AT_POSITION_ONLY: for (org.apache.lucene.analysis.Token t : tokensAtPosition) { boolean tokenFoundSequence = false; for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.addAll(tokenSequence); if (newEntry.getLast().endOffset() <= t.startOffset()) { newEntry.add(t); tokenFoundSequence = true; } newAllTokeSequences.add(newEntry); } if (false == tokenFoundSequence) { LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>(); newEntry.add(t); newAllTokeSequences.add(newEntry); } // Limit the max number of permutations we consider if (newAllTokeSequences.size() > 64) { break FOR_FIRST_TOKEN_AT_POSITION_ONLY; } } allTokenSequences = newAllTokeSequences; } } // build the uniquie LinkedList<LinkedList<org.apache.lucene.analysis.Token>> fixedTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) { LinkedList<org.apache.lucene.analysis.Token> fixedTokenSequence = new LinkedList<org.apache.lucene.analysis.Token>(); fixedTokenSequences.add(fixedTokenSequence); org.apache.lucene.analysis.Token replace = null; for (org.apache.lucene.analysis.Token c : tokenSequence) { if (replace == null) { StringBuilder prefix = new StringBuilder(); for (int i = c.startOffset() - 1; i >= 0; i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { prefix.insert(0, test); } else { break; } } String pre = prefix.toString(); if (requiresMLTokenDuplication) { String termText = new String(c.termBuffer(), 0, c.termLength()); int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(language + pre + token); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String termText = new String(c.termBuffer(), 0, c.termLength()); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(pre + termText); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } else { StringBuilder prefix = new StringBuilder(); StringBuilder postfix = new StringBuilder(); StringBuilder builder = prefix; for (int i = c.startOffset() - 1; i >= replace.endOffset(); i--) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { builder.insert(0, test); } else { builder = postfix; postfix.setLength(0); } } String pre = prefix.toString(); String post = postfix.toString(); // Does it bridge? if ((pre.length() > 0) && (replace.endOffset() + pre.length()) == c.startOffset()) { String termText = new String(c.termBuffer(), 0, c.termLength()); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); @SuppressWarnings("unused") String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); replace = new org.apache.lucene.analysis.Token(replace.startOffset(), c.endOffset()); replace.setTermBuffer(replaceTermText + pre + token); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } else { int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); replace = new org.apache.lucene.analysis.Token(replace.startOffset(), c.endOffset()); replace.setTermBuffer(replaceTermText + pre + termText); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); } } else { String termText = new String(c.termBuffer(), 0, c.termLength()); if (requiresMLTokenDuplication) { int position = termText.indexOf("}"); String language = termText.substring(0, position + 1); String token = termText.substring(position + 1); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replace.startOffset(), replace.endOffset() + post.length()); last.setTermBuffer(replaceTermText + post); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(language + pre + token); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } else { String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token( replace.startOffset(), replace.endOffset() + post.length()); last.setTermBuffer(replaceTermText + post); last.setType(replace.type()); last.setPositionIncrement(replace.getPositionIncrement()); fixedTokenSequence.add(last); replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(), c.endOffset()); replace.setTermBuffer(pre + termText); replace.setType(c.type()); replace.setPositionIncrement(c.getPositionIncrement()); } } } } // finish last if (replace != null) { StringBuilder postfix = new StringBuilder(); if ((replace.endOffset() >= 0) && (replace.endOffset() < testText.length())) { for (int i = replace.endOffset(); i < testText.length(); i++) { char test = testText.charAt(i); if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) { postfix.append(test); } else { break; } } } String post = postfix.toString(); int oldPositionIncrement = replace.getPositionIncrement(); String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength()); replace = new org.apache.lucene.analysis.Token(replace.startOffset(), replace.endOffset() + post.length()); replace.setTermBuffer(replaceTermText + post); replace.setType(replace.type()); replace.setPositionIncrement(oldPositionIncrement); fixedTokenSequence.add(replace); } } // rebuild fixed list ArrayList<org.apache.lucene.analysis.Token> fixed = new ArrayList<org.apache.lucene.analysis.Token>(); for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { for (org.apache.lucene.analysis.Token token : tokenSequence) { fixed.add(token); } } // reorder by start position and increment Collections.sort(fixed, new Comparator<org.apache.lucene.analysis.Token>() { public int compare(Token o1, Token o2) { int dif = o1.startOffset() - o2.startOffset(); if (dif != 0) { return dif; } else { return o1.getPositionIncrement() - o2.getPositionIncrement(); } } }); // make sure we remove any tokens we have duplicated @SuppressWarnings("rawtypes") OrderedHashSet unique = new OrderedHashSet(); unique.addAll(fixed); fixed = new ArrayList<org.apache.lucene.analysis.Token>(unique); list = fixed; // add any missing locales back to the tokens if (localePrefix.length() > 0) { for (int j = 0; j < list.size(); j++) { org.apache.lucene.analysis.Token currentToken = list.get(j); String termText = new String(currentToken.termBuffer(), 0, currentToken.termLength()); currentToken.setTermBuffer(localePrefix + termText); } } if (list.size() == 0) return null; else if (list.size() == 1) { nextToken = list.get(0); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); if (termText.contains("*") || termText.contains("?")) { return newWildcardQuery( new Term(field, getLowercaseExpandedTerms() ? termText.toLowerCase() : termText)); } else { return newTermQuery(new Term(field, termText)); } } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = newBooleanQuery(true); for (int i = 0; i < list.size(); i++) { Query currentQuery; nextToken = list.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); if (termText.contains("*") || termText.contains("?")) { currentQuery = newWildcardQuery(new Term(field, getLowercaseExpandedTerms() ? termText.toLowerCase() : termText)); } else { currentQuery = newTermQuery(new Term(field, termText)); } q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } // Consider if we can use a multi-phrase query (e.g for synonym use rather then WordDelimiterFilterFactory) else if (canUseMultiPhraseQuery(fixedTokenSequences)) { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(internalSlop); ArrayList<Term> multiTerms = new ArrayList<Term>(); int position = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); Term term = new Term(field, termText); if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { addWildcardTerms(multiTerms, term); } else { multiTerms.add(term); } if (nextToken.getPositionIncrement() > 0 && multiTerms.size() > 0) { if (getEnablePositionIncrements()) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } checkTermCount(field, queryText, mpq); multiTerms.clear(); } position += nextToken.getPositionIncrement(); } if (getEnablePositionIncrements()) { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0]), position); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }, position); // } } else { if (multiTerms.size() > 0) { mpq.add(multiTerms.toArray(new Term[0])); } // else // { // mpq.add(new Term[] { new Term(field, "\u0000") }); // } } checkTermCount(field, queryText, mpq); return mpq; } // Word delimiter factory and other odd things generate complex token patterns // Smart skip token sequences with small tokens that generate toomany wildcards // Fall back to the larger pattern // e.g Site1* will not do (S ite 1*) or (Site 1*) if 1* matches too much (S ite1*) and (Site1*) will still be OK // If we skip all (for just 1* in the input) this is still an issue. else { boolean skippedTokens = false; BooleanQuery q = newBooleanQuery(true); TOKEN_SEQUENCE: for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(internalSlop); int position = 0; for (int i = 0; i < tokenSequence.size(); i++) { nextToken = (org.apache.lucene.analysis.Token) tokenSequence.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); Term term = new Term(field, termText); if (getEnablePositionIncrements()) { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { mpq.add(getMatchingTerms(field, term), position); } else { mpq.add(new Term[] { term }, position); } if (exceedsTermCount(mpq)) { // We could duplicate the token sequence without the failing wildcard expansion and try again ?? skippedTokens = true; continue TOKEN_SEQUENCE; } if (nextToken.getPositionIncrement() > 0) { position += nextToken.getPositionIncrement(); } else { position++; } } else { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { mpq.add(getMatchingTerms(field, term)); } else { mpq.add(term); } if (exceedsTermCount(mpq)) { skippedTokens = true; continue TOKEN_SEQUENCE; } } } q.add(mpq, BooleanClause.Occur.SHOULD); } if (skippedTokens && (q.clauses().size() == 0)) { throw new LuceneQueryParserException( "Query skipped all token sequences as wildcards generated too many clauses: " + field + " " + queryText); } return q; } } else { MultiPhraseQuery q = new MultiPhraseQuery(); q.setSlop(internalSlop); int position = 0; for (int i = 0; i < list.size(); i++) { nextToken = list.get(i); String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength()); Term term = new Term(field, termText); if (getEnablePositionIncrements()) { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { q.add(getMatchingTerms(field, term), position); } else { q.add(new Term[] { term }, position); } checkTermCount(field, queryText, q); if (nextToken.getPositionIncrement() > 0) { position += nextToken.getPositionIncrement(); } else { position++; } } else { if ((termText != null) && (termText.contains("*") || termText.contains("?"))) { q.add(getMatchingTerms(field, term)); } else { q.add(term); } checkTermCount(field, queryText, q); } } return q; } } }
From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java
License:Open Source License
protected Query buildDateTimeRange(String field, Calendar startIn, int startResolution, Calendar endIn, int endResolution, boolean includeLower, boolean includeUpper) throws ParseException { int minResolution = (startResolution <= endResolution) ? startResolution : endResolution; // fix start and end dates and treat all as inclusive ranges Calendar start = Calendar.getInstance(); start.setTime(startIn.getTime());//www .ja va 2 s . c o m if (!includeLower) { start.add(startResolution, 1); } Calendar end = Calendar.getInstance(); end.setTime(endIn.getTime()); if (!includeUpper) { end.add(endResolution, -1); } // Calendar comparison does not work for MAX .... joy if (start.get(Calendar.YEAR) > end.get(Calendar.YEAR)) { return createNoMatchQuery(); } else if (start.get(Calendar.YEAR) == end.get(Calendar.YEAR)) { if (start.get(Calendar.MONTH) > end.get(Calendar.MONTH)) { return createNoMatchQuery(); } else if (start.get(Calendar.MONTH) == end.get(Calendar.MONTH)) { if (start.get(Calendar.DAY_OF_MONTH) > end.get(Calendar.DAY_OF_MONTH)) { return createNoMatchQuery(); } else if (start.get(Calendar.DAY_OF_MONTH) == end.get(Calendar.DAY_OF_MONTH)) { if (start.get(Calendar.HOUR_OF_DAY) > end.get(Calendar.HOUR_OF_DAY)) { return createNoMatchQuery(); } else if (start.get(Calendar.HOUR_OF_DAY) == end.get(Calendar.HOUR_OF_DAY)) { if (start.get(Calendar.MINUTE) > end.get(Calendar.MINUTE)) { return createNoMatchQuery(); } else if (start.get(Calendar.MINUTE) == end.get(Calendar.MINUTE)) { if (start.get(Calendar.SECOND) > end.get(Calendar.SECOND)) { return createNoMatchQuery(); } else if (start.get(Calendar.SECOND) == end.get(Calendar.SECOND)) { if (start.get(Calendar.MILLISECOND) > end.get(Calendar.MILLISECOND)) { return createNoMatchQuery(); } else if (start.get(Calendar.MILLISECOND) == end.get(Calendar.MILLISECOND)) { // continue } } } } } } } BooleanQuery query = new BooleanQuery(); Query part; if ((minResolution > Calendar.YEAR) && (start.get(Calendar.YEAR) == end.get(Calendar.YEAR))) { part = new TermQuery(new Term(field, "YE" + start.get(Calendar.YEAR))); query.add(part, Occur.MUST); if ((minResolution > Calendar.MONTH) && (start.get(Calendar.MONTH) == end.get(Calendar.MONTH))) { part = new TermQuery(new Term(field, build2SF("MO", start.get(Calendar.MONTH)))); query.add(part, Occur.MUST); if ((minResolution > Calendar.DAY_OF_MONTH) && (start.get(Calendar.DAY_OF_MONTH) == end.get(Calendar.DAY_OF_MONTH))) { part = new TermQuery(new Term(field, build2SF("DA", start.get(Calendar.DAY_OF_MONTH)))); query.add(part, Occur.MUST); if ((minResolution > Calendar.HOUR_OF_DAY) && (start.get(Calendar.HOUR_OF_DAY) == end.get(Calendar.HOUR_OF_DAY))) { part = new TermQuery(new Term(field, build2SF("HO", start.get(Calendar.HOUR_OF_DAY)))); query.add(part, Occur.MUST); if ((minResolution > Calendar.MINUTE) && (start.get(Calendar.MINUTE) == end.get(Calendar.MINUTE))) { part = new TermQuery(new Term(field, build2SF("MI", start.get(Calendar.MINUTE)))); query.add(part, Occur.MUST); if ((minResolution > Calendar.SECOND) && (start.get(Calendar.SECOND) == end.get(Calendar.SECOND))) { part = new TermQuery(new Term(field, build2SF("SE", start.get(Calendar.SECOND)))); query.add(part, Occur.MUST); if (minResolution >= Calendar.MILLISECOND) { if (start.get(Calendar.MILLISECOND) == end.get(Calendar.MILLISECOND)) { part = new TermQuery( new Term(field, build3SF("MS", start.get(Calendar.MILLISECOND)))); query.add(part, Occur.MUST); } else { part = new ConstantScoreRangeQuery(field, build3SF("MS", start.get(Calendar.MILLISECOND)), build3SF("MS", end.get(Calendar.MILLISECOND)), true, true); query.add(part, Occur.MUST); } } else { return createNoMatchQuery(); } } else { // s + ms BooleanQuery subQuery = new BooleanQuery(); Query subPart; for (int i : new int[] { Calendar.MILLISECOND }) { subPart = buildStart(field, start, Calendar.SECOND, i, startResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.SECOND < minResolution) { if ((end.get(Calendar.SECOND) - start.get(Calendar.SECOND)) > 1) { subPart = new ConstantScoreRangeQuery(field, build2SF("SE", start.get(Calendar.SECOND)), build2SF("SE", end.get(Calendar.SECOND)), false, false); subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.SECOND == minResolution) { if (start.get(Calendar.SECOND) == end.get(Calendar.SECOND)) { if (includeLower && includeUpper) { part = new TermQuery( new Term(field, build2SF("SE", start.get(Calendar.SECOND)))); query.add(part, Occur.MUST); } else { return createNoMatchQuery(); } } else { subPart = new ConstantScoreRangeQuery(field, build2SF("SE", start.get(Calendar.SECOND)), build2SF("SE", end.get(Calendar.SECOND)), includeLower, includeUpper); subQuery.add(subPart, Occur.SHOULD); } } for (int i : new int[] { Calendar.MILLISECOND }) { subPart = buildEnd(field, end, Calendar.SECOND, i, endResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (subQuery.clauses().size() > 0) { query.add(subQuery, Occur.MUST); } } } else { // min + s + ms BooleanQuery subQuery = new BooleanQuery(); Query subPart; for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND }) { subPart = buildStart(field, start, Calendar.MINUTE, i, startResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.MINUTE < minResolution) { if ((end.get(Calendar.MINUTE) - start.get(Calendar.MINUTE)) > 1) { subPart = new ConstantScoreRangeQuery(field, build2SF("MI", start.get(Calendar.MINUTE)), build2SF("MI", end.get(Calendar.MINUTE)), false, false); subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.MINUTE == minResolution) { if (start.get(Calendar.MINUTE) == end.get(Calendar.MINUTE)) { if (includeLower && includeUpper) { part = new TermQuery( new Term(field, build2SF("MI", start.get(Calendar.MINUTE)))); query.add(part, Occur.MUST); } else { return createNoMatchQuery(); } } else { subPart = new ConstantScoreRangeQuery(field, build2SF("MI", start.get(Calendar.MINUTE)), build2SF("MI", end.get(Calendar.MINUTE)), includeLower, includeUpper); subQuery.add(subPart, Occur.SHOULD); } } for (int i : new int[] { Calendar.SECOND, Calendar.MILLISECOND }) { subPart = buildEnd(field, end, Calendar.MINUTE, i, endResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (subQuery.clauses().size() > 0) { query.add(subQuery, Occur.MUST); } } } else { // hr + min + s + ms BooleanQuery subQuery = new BooleanQuery(); Query subPart; for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE }) { subPart = buildStart(field, start, Calendar.HOUR_OF_DAY, i, startResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.HOUR_OF_DAY < minResolution) { if ((end.get(Calendar.HOUR_OF_DAY) - start.get(Calendar.HOUR_OF_DAY)) > 1) { subPart = new ConstantScoreRangeQuery(field, build2SF("HO", start.get(Calendar.HOUR_OF_DAY)), build2SF("HO", end.get(Calendar.HOUR_OF_DAY)), false, false); subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.HOUR_OF_DAY == minResolution) { if (start.get(Calendar.HOUR_OF_DAY) == end.get(Calendar.HOUR_OF_DAY)) { if (includeLower && includeUpper) { part = new TermQuery( new Term(field, build2SF("HO", start.get(Calendar.HOUR_OF_DAY)))); query.add(part, Occur.MUST); } else { return createNoMatchQuery(); } } else { subPart = new ConstantScoreRangeQuery(field, build2SF("HO", start.get(Calendar.HOUR_OF_DAY)), build2SF("HO", end.get(Calendar.HOUR_OF_DAY)), includeLower, includeUpper); subQuery.add(subPart, Occur.SHOULD); } } for (int i : new int[] { Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND }) { subPart = buildEnd(field, end, Calendar.HOUR_OF_DAY, i, endResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (subQuery.clauses().size() > 0) { query.add(subQuery, Occur.MUST); } } } else { // day + hr + min + s + ms BooleanQuery subQuery = new BooleanQuery(); Query subPart; for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE, Calendar.HOUR_OF_DAY }) { subPart = buildStart(field, start, Calendar.DAY_OF_MONTH, i, startResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.DAY_OF_MONTH < minResolution) { if ((end.get(Calendar.DAY_OF_MONTH) - start.get(Calendar.DAY_OF_MONTH)) > 1) { subPart = new ConstantScoreRangeQuery(field, build2SF("DA", start.get(Calendar.DAY_OF_MONTH)), build2SF("DA", end.get(Calendar.DAY_OF_MONTH)), false, false); subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.DAY_OF_MONTH == minResolution) { if (start.get(Calendar.DAY_OF_MONTH) == end.get(Calendar.DAY_OF_MONTH)) { if (includeLower && includeUpper) { part = new TermQuery( new Term(field, build2SF("DA", start.get(Calendar.DAY_OF_MONTH)))); query.add(part, Occur.MUST); } else { return createNoMatchQuery(); } } else { subPart = new ConstantScoreRangeQuery(field, build2SF("DA", start.get(Calendar.DAY_OF_MONTH)), build2SF("DA", end.get(Calendar.DAY_OF_MONTH)), includeLower, includeUpper); subQuery.add(subPart, Occur.SHOULD); } } for (int i : new int[] { Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND }) { subPart = buildEnd(field, end, Calendar.DAY_OF_MONTH, i, endResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (subQuery.clauses().size() > 0) { query.add(subQuery, Occur.MUST); } } } else { // month + day + hr + min + s + ms BooleanQuery subQuery = new BooleanQuery(); Query subPart; for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE, Calendar.HOUR_OF_DAY, Calendar.DAY_OF_MONTH }) { subPart = buildStart(field, start, Calendar.MONTH, i, startResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.MONTH < minResolution) { if ((end.get(Calendar.MONTH) - start.get(Calendar.MONTH)) > 1) { subPart = new ConstantScoreRangeQuery(field, build2SF("MO", start.get(Calendar.MONTH)), build2SF("MO", end.get(Calendar.MONTH)), false, false); subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.MONTH == minResolution) { if (start.get(Calendar.MONTH) == end.get(Calendar.MONTH)) { if (includeLower && includeUpper) { part = new TermQuery(new Term(field, build2SF("MO", start.get(Calendar.MONTH)))); query.add(part, Occur.MUST); } else { return createNoMatchQuery(); } } else { subPart = new ConstantScoreRangeQuery(field, build2SF("MO", start.get(Calendar.MONTH)), build2SF("MO", end.get(Calendar.MONTH)), includeLower, includeUpper); subQuery.add(subPart, Occur.SHOULD); } } for (int i : new int[] { Calendar.DAY_OF_MONTH, Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND }) { subPart = buildEnd(field, end, Calendar.MONTH, i, endResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (subQuery.clauses().size() > 0) { query.add(subQuery, Occur.MUST); } } } else { // year + month + day + hr + min + s + ms BooleanQuery subQuery = new BooleanQuery(); Query subPart; for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE, Calendar.HOUR_OF_DAY, Calendar.DAY_OF_MONTH, Calendar.MONTH }) { subPart = buildStart(field, start, Calendar.YEAR, i, startResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.YEAR < minResolution) { if ((end.get(Calendar.YEAR) - start.get(Calendar.YEAR)) > 1) { subPart = new ConstantScoreRangeQuery(field, "YE" + start.get(Calendar.YEAR), "YE" + end.get(Calendar.YEAR), false, false); subQuery.add(subPart, Occur.SHOULD); } } if (Calendar.YEAR == minResolution) { if (start.get(Calendar.YEAR) == end.get(Calendar.YEAR)) { if (includeLower && includeUpper) { part = new TermQuery(new Term(field, "YE" + start.get(Calendar.YEAR))); query.add(part, Occur.MUST); } else { return createNoMatchQuery(); } } else { subPart = new ConstantScoreRangeQuery(field, "YE" + start.get(Calendar.YEAR), "YE" + end.get(Calendar.YEAR), includeLower, includeUpper); subQuery.add(subPart, Occur.SHOULD); } } for (int i : new int[] { Calendar.MONTH, Calendar.DAY_OF_MONTH, Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND }) { subPart = buildEnd(field, end, Calendar.YEAR, i, endResolution); if (subPart != null) { subQuery.add(subPart, Occur.SHOULD); } } if (subQuery.clauses().size() > 0) { query.add(subQuery, Occur.MUST); } } return query; }
From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java
License:Open Source License
/** * Returns null if all clause words were filtered away by the analyzer * @param booleanQuery - initial BooleanQuery * @return BooleanQuery or <code>null</code> if booleanQuery has no clauses *//*w w w.j a v a2 s . com*/ protected BooleanQuery getNonEmptyBooleanQuery(BooleanQuery booleanQuery) { if (booleanQuery.clauses().size() > 0) { return booleanQuery; } else { return null; } }
From source file:org.alfresco.solr.query.PostFilterQuery.java
License:Open Source License
private void getPostFilters(Query q, List<PostFilter> postFilters) { if (q instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) q; List<BooleanClause> clauses = bq.clauses(); for (BooleanClause clause : clauses) { Query q1 = clause.getQuery(); getPostFilters(q1, postFilters); }/* www . ja v a 2s . c om*/ } else if (q instanceof ContextAwareQuery) { ContextAwareQuery cq = (ContextAwareQuery) q; getPostFilters(cq.getLuceneQuery(), postFilters); } else if (q instanceof PostFilter) { postFilters.add((PostFilter) q); } else if (q instanceof BoostQuery) { BoostQuery bq = (BoostQuery) q; getPostFilters(bq.getQuery(), postFilters); } }