Example usage for org.apache.lucene.search BooleanQuery clauses

List of usage examples for org.apache.lucene.search BooleanQuery clauses

Introduction

In this page you can find the example usage for org.apache.lucene.search BooleanQuery clauses.

Prototype

List clauses

To view the source code for org.apache.lucene.search BooleanQuery clauses.

Click Source Link

Usage

From source file:nicta.com.au.patent.pac.search.RewriteQuery.java

public Query rewrite(String queryid, PatentQuery query) throws ParseException {

    BooleanQuery bQuery = (BooleanQuery) query.parse();
    BooleanQuery bQuery2 = (BooleanQuery) ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0]
            .getQuery();/*from   w  ww. j av a 2s.co  m*/
    BooleanQuery bQueryFinal = new BooleanQuery();
    BooleanQuery bQuery3 = bQuery2.clone();
    try {
        FileInputStream fstream = new FileInputStream(termsImpactFilename);
        // Get the object of DataInputStream
        DataInputStream in = new DataInputStream(fstream);
        try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) {
            String str;
            while ((str = br.readLine()) != null) {
                if (str.startsWith("#")) {
                    continue;
                }
                if (str.trim().length() == 0) {
                    continue;
                }
                StringTokenizer st = new StringTokenizer(str);
                String id = st.nextToken();
                String term = st.nextToken();
                double oldav = Double.parseDouble(st.nextToken());
                double av = Double.parseDouble(st.nextToken());
                double impact = Double.parseDouble(st.nextToken());
                int y = Integer.parseInt(st.nextToken());
                if (queryid.equals(id) && y == 0) {
                    for (BooleanClause bC : bQuery3.clauses()) {
                        TermQuery tq = (TermQuery) bC.getQuery();
                        if (term.startsWith(tq.getTerm().text())) {
                            bQuery3.clauses().remove(bC);
                            break;
                        }
                    }
                }
            }
        }
    } catch (IOException | NumberFormatException e) {
        e.printStackTrace();
    }
    BooleanQuery bq = new BooleanQuery();
    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {

            BooleanQuery bq2 = new BooleanQuery();
            for (BooleanClause bc : bQuery3.clauses()) {
                TermQuery tq = (TermQuery) bc.getQuery();
                Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                TermQuery tq2 = new TermQuery(term);
                tq2.setBoost(tq.getBoost());
                bq2.add(tq2, BooleanClause.Occur.SHOULD);
            }
            bq.add(bq2, BooleanClause.Occur.SHOULD);
        }
    }
    bQueryFinal.add((Query) bQuery.getClauses()[0].getQuery(), BooleanClause.Occur.MUST);
    bQueryFinal.add(bq, BooleanClause.Occur.MUST);

    return bQueryFinal;
}

From source file:nicta.com.au.patent.pac.terms.impact.FeaturesSelection.java

public void iterateOverQueryTerms() throws ParseException, Exception {
    long start = System.currentTimeMillis();
    int l = 0;/*w w  w . ja va 2  s  .co  m*/
    //        System.out.println("queryid\tterm\ttf\tln_tf\tidf\ttfidf\ttLength\tratioTerm\t"
    //                + "nbrUniqTerms\tqSize\tscq\tisInTitle\tisInAbstract\tisInDescription\tisInClaims");

    System.out.println(
            "queryid\tremovedBooleanClause\ttf\tln_tf\tidf\ttfidf\ttLength\tratioTerm\tnbrUniqTerms\tqSize\tscq\tSCS\tictf\tQC\tclarity\tfreqInTitle\tratioInTitle\tfreqDescription\tratioInDescription\tfreqClaims\tratioInClaims");
    for (Map.Entry<String, PatentDocument> e : topics.getTopics().entrySet()) {
        l++;
        String queryid = e.getKey();
        PatentDocument pt = e.getValue();
        //            System.err.print(l + "- " + queryid + " -> " + pt.getUcid() + ": ");
        long start2 = System.currentTimeMillis();
        PatentQuery query = new PatentQuery(pt, boosts, filter, stopWords);
        BooleanQuery bQuery = (BooleanQuery) query.parse();
        if (bQuery.getClauses().length != 2 || !(bQuery.getClauses()[1].getQuery() instanceof BooleanQuery)
                || ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses().length == 0
                || !(((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0]
                        .getQuery() instanceof BooleanQuery)) {
            continue;
        }
        BooleanQuery bQuery2 = (BooleanQuery) ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0]
                .getQuery();
        for (int i = 0; i < bQuery2.clauses().size(); i++) {
            BooleanQuery bQueryFinal = new BooleanQuery();
            BooleanQuery bQuery3 = bQuery2.clone();
            BooleanClause removedBooleanClause = bQuery3.clauses().remove(i);
            bQueryFinal.add((Query) bQuery.getClauses()[0].getQuery(), BooleanClause.Occur.MUST);
            bQueryFinal.add(bQuery3, BooleanClause.Occur.MUST);
            //***************************
            // Get features
            //*************************** 
            IndexReader ir = searcher.getIndexSearch().getIndexReader();
            TermQuery term = (TermQuery) removedBooleanClause.getQuery();
            double tf = removedBooleanClause.getQuery().getBoost();// Term frequency
            double ln_tf = Math.log(1 + tf);// Get log of the term frequency
            int totalTF = ir.docFreq(term.getTerm());
            int docs = ir.getDocCount(term.getTerm().field());
            double idf = 0;
            if (totalTF != 0) {
                idf = Math.log10((double) docs / (totalTF));// Inverse document frequency
            }
            double tfidf = ln_tf * idf;// Compute the TFIDF
            int tLength = term.getTerm().text().length();// Term length
            int qSize = 0;
            if (term.getTerm().field().endsWith(PatentDocument.Title)) {
                qSize = query.getTitleSize(); // Query size
            } else if (term.getTerm().field().endsWith(PatentDocument.Abstract)) {
                qSize = query.getAbstractSize(); // Query size
            } else if (term.getTerm().field().endsWith(PatentDocument.Description)) {
                qSize = query.getDescriptionSize(); // Query size
            } else if (term.getTerm().field().endsWith(PatentDocument.Claims)) {
                qSize = query.getClaimsSize(); // Query size
            }
            double ratioTerm = (double) tf / qSize;
            int nbrUniqTerms = bQuery2.getClauses().length;
            long totalTermFreq = ir.totalTermFreq(term.getTerm());
            double ln_totalTermFreq = Math.log(1 + totalTermFreq);
            double scq = ln_totalTermFreq * idf;
            double freqInTitle = query.getFreqInTitle(term.getTerm().text());
            double ratioInTitle = (double) freqInTitle / query.getTitleSize();
            double freqAbstract = query.getFreqInAbstract(term.getTerm().text());
            double ratioInAbstract = (double) freqAbstract / query.getAbstractSize();
            double freqDescription = query.getFreqInDescription(term.getTerm().text());
            double ratioInDescription = (double) freqDescription / query.getDescriptionSize();
            double freqClaims = query.getFreqInClaims(term.getTerm().text());
            double ratioInClaims = (double) freqClaims / query.getClaimsSize();
            double Pcoll = (double) totalTermFreq / ir.getSumTotalTermFreq(term.getTerm().field());
            double SCS = 0;
            double ictf = 0;
            List<TermFreqVector> docsTermVector = getDocsTerms(searcher.search(term), term.getTerm().field());
            double a1 = 0;
            for (TermFreqVector vec : docsTermVector) {
                a1 += Math.sqrt((double) vec.getFreq(term.getTerm().text()) / vec.numberOfTerms());
            }
            double clarity = 0;
            if (totalTermFreq != 0) {
                SCS = ratioTerm * Log2(ratioTerm / Pcoll);// Simplified Clarity Score
                ictf = Math.log10((double) docs / (totalTermFreq));// Inverse Collection Term Frequency
                clarity = a1 * Log2(a1 / Pcoll);
            }
            double QC = totalTF / (double) docs;// QueryScope

            //***************************
            System.out.println(queryid + "\t" + removedBooleanClause + "\t" + tf + "\t" + ln_tf + "\t" + idf
                    + "\t" + tfidf + "\t" + tLength + "\t" + ratioTerm + "\t" + nbrUniqTerms + "\t" + qSize
                    + "\t" + scq + "\t" + SCS + "\t" + ictf + "\t" + QC + "\t" + clarity + "\t" + freqInTitle
                    + "\t" + ratioInTitle + "\t" + freqDescription + "\t" + ratioInDescription + "\t"
                    + freqClaims + "\t" + ratioInClaims);
        }
        long end2 = System.currentTimeMillis();
        //            System.err.println(bQuery2.clauses().size() + " terms processed in " + Functions.getTimer(end2 - start2) + ".");
    }
    long end = System.currentTimeMillis();
    long millis = (end - start);
    System.err.println("#Global Execution time: " + Functions.getTimer(millis) + ".");
}

From source file:nicta.com.au.patent.pac.terms.impact.TermsImpact.java

public void iterateOverQueryTerms() throws ParseException, Exception {
    long start = System.currentTimeMillis();
    int l = 0;/*  w w w  .j a v a  2s  . c o  m*/
    for (Map.Entry<String, PatentDocument> e : topics.getTopics().entrySet()) {
        l++;
        String queryid = e.getKey();
        PatentDocument pt = e.getValue();
        System.err.print(l + "- " + queryid + " -> " + pt.getUcid() + ": ");
        long start2 = System.currentTimeMillis();
        PatentQuery query = new PatentQuery(pt, boosts, filter, stopWords);

        TopDocs hitsAll = searcher.search(query.parse());
        ;

        int j = 0;
        if (hitsAll.totalHits == 0) {
            System.out.println(queryid + " allTerms " + " Q0 XXXXXXXXXX 1 0.0 STANDARD");
        }
        for (ScoreDoc scoreDoc : hitsAll.scoreDocs) {
            j++;
            Document doc = searcher.getIndexSearch().doc(scoreDoc.doc);
            System.out.println(queryid + " allTerms " + " Q0 " + doc.get(PatentDocument.FileName).substring(3)
                    + " " + j + " " + scoreDoc.score + " STANDARD");
        }

        BooleanQuery bQuery = (BooleanQuery) query.parse();
        if (bQuery.getClauses().length != 2 || !(bQuery.getClauses()[1].getQuery() instanceof BooleanQuery)) {
            continue;
        }
        if (((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses().length == 0
                || !(((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0]
                        .getQuery() instanceof BooleanQuery)) {
            continue;
        }

        BooleanQuery bQuery2 = (BooleanQuery) ((BooleanQuery) bQuery.getClauses()[1].getQuery()).getClauses()[0]
                .getQuery();
        for (int i = 0; i < bQuery2.clauses().size(); i++) {
            BooleanQuery bQueryFinal = new BooleanQuery();
            BooleanQuery bQueryFinal2 = new BooleanQuery();
            BooleanQuery bQuery3 = bQuery2.clone();
            BooleanClause bClause = bQuery3.clauses().remove(i);
            //*************
            //            System.out.println(bQuery3);
            for (int k = 1; k < PatentQuery.getFields().length; k++) {
                if (query.getQueries()[k] != null && !query.getQueries()[k].equals("") && (k != 4 || k != 6)
                        && query.getBoosts().get(PatentQuery.getFields()[k]) != 0) {

                    BooleanQuery bq = ((BooleanQuery) bQuery3).clone();
                    BooleanQuery bq2 = new BooleanQuery();
                    for (BooleanClause bc : bq.clauses()) {
                        TermQuery tq = (TermQuery) bc.getQuery();
                        Term term = new Term(PatentQuery.getFields()[k], tq.getTerm().text());
                        TermQuery tq2 = new TermQuery(term);
                        tq2.setBoost(tq.getBoost());
                        bq2.add(tq2, BooleanClause.Occur.SHOULD);
                    }
                    //                    System.out.println(bq2);
                    bQueryFinal2.add(bq2, BooleanClause.Occur.SHOULD);

                }

            }
            //*******************************
            bQueryFinal.add((Query) bQuery.getClauses()[0].getQuery(), BooleanClause.Occur.MUST);
            bQueryFinal.add(bQueryFinal2, BooleanClause.Occur.MUST);

            TopDocs hits = searcher.search(bQueryFinal);
            j = 0;
            //***************************
            // Get features
            //***************************      
            TermQuery term = (TermQuery) bClause.getQuery();
            double tf = bClause.getQuery().getBoost();// Term frequency
            int totalTF = searcher.getIndexSearch().getIndexReader().docFreq(term.getTerm());
            int docs = searcher.getIndexSearch().getIndexReader().getDocCount(term.getTerm().field());
            double idf = Math.log10((double) docs / (totalTF + 1));// Inverse document frequency
            int tLength = term.getTerm().text().length();// Term length
            int qSize = bQuery2.getClauses().length; // Query size
            //***************************

            if (hits.totalHits == 0) {
                System.out.println(queryid + " " + bClause + " " + " Q0 XXXXXXXXXX 1 0.0 STANDARD");
            }

            for (ScoreDoc scoreDoc : hits.scoreDocs) {
                j++;

                Document doc = searcher.getIndexSearch().doc(scoreDoc.doc);
                System.out.println(
                        queryid + " " + bClause + " Q0 " + doc.get(PatentDocument.FileName).substring(3) + " "
                                + j + " " + scoreDoc.score + " STANDARD");
            }
        }
        long end2 = System.currentTimeMillis();
        System.err.println(
                bQuery2.clauses().size() + " terms processed in " + Functions.getTimer(end2 - start2) + ".");
    }
    long end = System.currentTimeMillis();
    long millis = (end - start);
    System.err.println("#Global Execution time: " + Functions.getTimer(millis) + ".");
}

From source file:nicta.com.au.patent.queryexpansion.PatentMMRQueryExpansion.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )/*  w w  w . j  a  v a2  s . c  o m*/
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    IndexReader ir = searcher.getIndexReader();
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //*****************************************************************
    //**************** Compute the PRF for field (i)******************* 
    //*****************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query.parse(), collector);
    TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set

    Map<String, Float> parameters = new HashMap<>();
    parameters.put(RocchioQueryExpansion.ROCCHIO_ALPHA_FLD, (float) 1.0);
    parameters.put(RocchioQueryExpansion.ROCCHIO_BETA_FLD, (float) 1.0);
    parameters.put(RocchioQueryExpansion.ROCCHIO_GAMMA_FLD, (float) 1.0);
    parameters.put(RocchioQueryExpansion.DECAY_FLD, (float) 0.0);
    Map<String, Float> rocchioVector = null;
    if (model.equals("tfidf")) {
        RocchioQueryExpansion rocchioQueryExpansion = new RocchioQueryExpansion(hits, ir, parameters, source,
                Nbr_Docs, Nbr_Terms);
        rocchioVector = rocchioQueryExpansion.getRocchioVector(PatentQuery.getFields()[source]);
    }

    //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
    Query expandedQuery = null;
    MMRQueryExpansion qe = new MMRQueryExpansion(hits, ir, rocchioVector, MMRQE_ALPHA, MMRQE_LAMBDA,
            PatentQuery.getFields()[source], Nbr_Docs, Nbr_Terms);

    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            //                BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            //                if (query.isFilter()) {
            //                    Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
            //                            new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
            //                    bQueryFields.add(filter, BooleanClause.Occur.MUST);
            //                }
            //                if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
            //                    bQueryFields.add(q, BooleanClause.Occur.MUST);
            //                }
            if (expandedQuery == null) {
                expandedQuery = qe.expandQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        TopDocs hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}

From source file:nicta.com.au.patent.queryreduction.PatentMMRQueryReduction.java

License:Apache License

/**
 * Performs Rocchio's query expansion with pseudo feedback for each fields
 * separatlly qm = alpha * query + ( beta / relevanDocsCount ) * Sum ( rel
 * docs vector )/*from ww  w.  ja  v a  2 s .  c om*/
 *
 * @param query
 *
 * @return expandedQuery
 *
 * @throws IOException
 * @throws ParseException
 */
@Override
public Query expandQuery(PatentQuery query) throws ParseException, IOException {
    IndexReader ir = searcher.getIndexReader();
    BooleanQuery bQuery = new BooleanQuery();
    BooleanQuery bQueryFieldsExpanded = new BooleanQuery();
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
    //*****************************************************************
    //**************** Compute the PRF for field (i)******************* 
    //*****************************************************************
    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query.parse(), collector);
    TopDocs hits = searcher.search(query.parse(), Math.max(1, collector.getTotalHits())); // Compute PRF set

    //                System.err.println(hits.totalHits + " total matching documents for field " + query.getFields()[i] + ".");
    Query expandedQuery = null;
    MMRQueryReduction qe = new MMRQueryReduction(hits, ir, MMRQE_LAMBDA, PatentQuery.getFields()[source],
            Nbr_Docs, Nbr_Terms);

    for (int i = 1; i < PatentQuery.getFields().length; i++) {
        if (query.getQueries()[i] != null && !query.getQueries()[i].equals("") && (i != 4 || i != 6)
                && query.getBoosts().get(PatentQuery.getFields()[i]) != 0) {
            QueryParser qp = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[i],
                    new StandardAnalyzer(Version.LUCENE_48));
            //                BooleanQuery bQueryFields = new BooleanQuery();// Contain a field to make the PRF field by field
            Query q = qp.parse(query.getQueries()[i]);
            //                if (query.isFilter()) {
            //                    Query filter = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
            //                            new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
            //                    bQueryFields.add(filter, BooleanClause.Occur.MUST);
            //                }
            //                if (!(q instanceof BooleanQuery) || ((BooleanQuery) q).getClauses().length > 0) {
            //                    bQueryFields.add(q, BooleanClause.Occur.MUST);
            //                }
            if (expandedQuery == null) {
                expandedQuery = qe.reduceQuery(q, PatentQuery.getFields()[i]);
            } else {
                BooleanQuery bq = ((BooleanQuery) expandedQuery).clone();
                BooleanQuery bq2 = new BooleanQuery();
                for (BooleanClause bc : bq.clauses()) {
                    TermQuery tq = (TermQuery) bc.getQuery();
                    Term term = new Term(PatentQuery.getFields()[i], tq.getTerm().text());
                    TermQuery tq2 = new TermQuery(term);
                    tq2.setBoost(tq.getBoost());
                    bq2.add(tq2, BooleanClause.Occur.SHOULD);
                }
                expandedQuery = bq2;
            }
            bQueryFieldsExpanded.add(expandedQuery, BooleanClause.Occur.SHOULD);// Compute the new expanded query based on PRF set
            //                System.err.println("Expanded Query: " + expandedQuery);
            //                hits = searcher.search(expandedQuery, 100);
            //                System.err.println(hits.totalHits + " total matching documents"+ query.getFields()[i] + ".");
        }
    }
    if (query.isFilter()) {
        Query q = new QueryParser(Version.LUCENE_48, PatentQuery.getFields()[0],
                new StandardAnalyzer(Version.LUCENE_48)).parse(query.getQueries()[0]);
        q.setBoost(query.getBoosts().get(PatentQuery.getFields()[0]));
        bQuery.add(q, BooleanClause.Occur.MUST);
    }
    bQuery.add(bQueryFieldsExpanded, BooleanClause.Occur.MUST);
    //        TopDocs hits = searcher.search(bQuery, 100);
    //                System.err.println(hits.totalHits + " total matching documents.");
    return bQuery;
}

From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java

License:Apache License

static boolean isNegative(Query q) {
    if (!(q instanceof BooleanQuery))
        return false;
    BooleanQuery bq = (BooleanQuery) q;
    List<BooleanClause> clauses = bq.clauses();
    if (clauses.size() == 0)
        return false;
    for (BooleanClause clause : clauses) {
        if (!clause.isProhibited())
            return false;
    }//from w  w  w. j  ava 2 s.  c om
    return true;
}

From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java

License:Open Source License

@SuppressWarnings("unchecked")
protected Query getFieldQueryImpl(String field, String queryText, AnalysisMode analysisMode,
        LuceneFunction luceneFunction) throws ParseException {
    // Use the analyzer to get all the tokens, and then build a TermQuery,
    // PhraseQuery, or noth

    // TODO: Untokenised columns with functions require special handling

    if (luceneFunction != LuceneFunction.FIELD) {
        throw new UnsupportedOperationException(
                "Field queries are not supported on lucene functions (UPPER, LOWER, etc)");
    }/*from  w w  w  . j av a2  s. c o m*/

    // if the incoming string already has a language identifier we strip it iff and addit back on again

    String localePrefix = "";

    String toTokenise = queryText;

    if (queryText.startsWith("{")) {
        int position = queryText.indexOf("}");
        String language = queryText.substring(0, position + 1);
        Locale locale = new Locale(queryText.substring(1, position));
        String token = queryText.substring(position + 1);
        boolean found = false;
        if (!locale.toString().isEmpty()) {
            for (Locale current : Locale.getAvailableLocales()) {
                if (current.toString().equalsIgnoreCase(locale.toString())) {
                    found = true;
                    break;
                }
            }
        }
        if (found) {
            localePrefix = language;
            toTokenise = token;
        } else {
            toTokenise = token;
        }
    }

    String testText = toTokenise;
    boolean requiresMLTokenDuplication = false;
    String localeString = null;
    if (field.startsWith(PROPERTY_FIELD_PREFIX) && (localePrefix.length() == 0)) {
        if ((queryText.length() > 0) && (queryText.charAt(0) == '\u0000')) {
            int position = queryText.indexOf("\u0000", 1);
            testText = queryText.substring(position + 1);
            requiresMLTokenDuplication = true;
            localeString = queryText.substring(1, position);
        }
    }

    // find the positions of any escaped * and ? and ignore them

    Set<Integer> wildcardPoistions = getWildcardPositions(testText);

    TokenStream source;
    if ((localePrefix.length() == 0) || (wildcardPoistions.size() > 0)
            || (analysisMode == AnalysisMode.IDENTIFIER)) {
        source = getAnalyzer().tokenStream(field, new StringReader(toTokenise), analysisMode);
    } else {
        source = getAnalyzer().tokenStream(field, new StringReader(
                "\u0000" + localePrefix.substring(1, localePrefix.length() - 1) + "\u0000" + toTokenise),
                analysisMode);
        localePrefix = "";
    }

    ArrayList<org.apache.lucene.analysis.Token> list = new ArrayList<org.apache.lucene.analysis.Token>();
    org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token();
    org.apache.lucene.analysis.Token nextToken;
    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    while (true) {
        try {
            nextToken = source.next(reusableToken);
        } catch (IOException e) {
            nextToken = null;
        }
        if (nextToken == null)
            break;
        list.add((org.apache.lucene.analysis.Token) nextToken.clone());
        if (nextToken.getPositionIncrement() != 0)
            positionCount += nextToken.getPositionIncrement();
        else
            severalTokensAtSamePosition = true;
    }
    try {
        source.close();
    } catch (IOException e) {
        // ignore
    }

    // add any alpha numeric wildcards that have been missed
    // Fixes most stop word and wild card issues

    for (int index = 0; index < testText.length(); index++) {
        char current = testText.charAt(index);
        if (((current == '*') || (current == '?')) && wildcardPoistions.contains(index)) {
            StringBuilder pre = new StringBuilder(10);
            if (index == 0) {
                // "*" and "?" at the start

                boolean found = false;
                for (int j = 0; j < list.size(); j++) {
                    org.apache.lucene.analysis.Token test = list.get(j);
                    if ((test.startOffset() <= 0) && (0 < test.endOffset())) {
                        found = true;
                        break;
                    }
                }
                if (!found && (testText.length() == 1)) {
                    // Add new token followed by * not given by the tokeniser
                    org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token(0, 0);
                    newToken.setTermBuffer("");
                    newToken.setType("ALPHANUM");
                    if (requiresMLTokenDuplication) {
                        Locale locale = I18NUtil.parseLocale(localeString);
                        MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null
                                ? defaultSearchMLAnalysisMode
                                : searchParameters.getMlAnalaysisMode();
                        MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode);
                        Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken);
                        if (it != null) {
                            int count = 0;
                            while (it.hasNext()) {
                                list.add(it.next());
                                count++;
                                if (count > 1) {
                                    severalTokensAtSamePosition = true;
                                }
                            }
                        }
                    }
                    // content
                    else {
                        list.add(newToken);
                    }
                }
            } else if (index > 0) {
                // Add * and ? back into any tokens from which it has been removed

                boolean tokenFound = false;
                for (int j = 0; j < list.size(); j++) {
                    org.apache.lucene.analysis.Token test = list.get(j);
                    if ((test.startOffset() <= index) && (index < test.endOffset())) {
                        if (requiresMLTokenDuplication) {
                            String termText = new String(test.termBuffer(), 0, test.termLength());
                            int position = termText.indexOf("}");
                            String language = termText.substring(0, position + 1);
                            String token = termText.substring(position + 1);
                            if (index >= test.startOffset() + token.length()) {
                                test.setTermBuffer(language + token + current);
                            }
                        } else {
                            if (index >= test.startOffset() + test.termLength()) {
                                test.setTermBuffer(test.term() + current);
                            }
                        }
                        tokenFound = true;
                        break;
                    }
                }

                if (!tokenFound) {
                    for (int i = index - 1; i >= 0; i--) {
                        char c = testText.charAt(i);
                        if (Character.isLetterOrDigit(c)) {
                            boolean found = false;
                            for (int j = 0; j < list.size(); j++) {
                                org.apache.lucene.analysis.Token test = list.get(j);
                                if ((test.startOffset() <= i) && (i < test.endOffset())) {
                                    found = true;
                                    break;
                                }
                            }
                            if (found) {
                                break;
                            } else {
                                pre.insert(0, c);
                            }
                        } else {
                            break;
                        }
                    }
                    if (pre.length() > 0) {
                        // Add new token followed by * not given by the tokeniser
                        org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token(
                                index - pre.length(), index);
                        newToken.setTermBuffer(pre.toString());
                        newToken.setType("ALPHANUM");
                        if (requiresMLTokenDuplication) {
                            Locale locale = I18NUtil.parseLocale(localeString);
                            MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null
                                    ? defaultSearchMLAnalysisMode
                                    : searchParameters.getMlAnalaysisMode();
                            MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode);
                            Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken);
                            if (it != null) {
                                int count = 0;
                                while (it.hasNext()) {
                                    list.add(it.next());
                                    count++;
                                    if (count > 1) {
                                        severalTokensAtSamePosition = true;
                                    }
                                }
                            }
                        }
                        // content
                        else {
                            list.add(newToken);
                        }
                    }
                }
            }

            StringBuilder post = new StringBuilder(10);
            if (index > 0) {
                for (int i = index + 1; i < testText.length(); i++) {
                    char c = testText.charAt(i);
                    if (Character.isLetterOrDigit(c)) {
                        boolean found = false;
                        for (int j = 0; j < list.size(); j++) {
                            org.apache.lucene.analysis.Token test = list.get(j);
                            if ((test.startOffset() <= i) && (i < test.endOffset())) {
                                found = true;
                                break;
                            }
                        }
                        if (found) {
                            break;
                        } else {
                            post.append(c);
                        }
                    } else {
                        break;
                    }
                }
                if (post.length() > 0) {
                    // Add new token followed by * not given by the tokeniser
                    org.apache.lucene.analysis.Token newToken = new org.apache.lucene.analysis.Token(index + 1,
                            index + 1 + post.length());
                    newToken.setTermBuffer(post.toString());
                    newToken.setType("ALPHANUM");
                    if (requiresMLTokenDuplication) {
                        Locale locale = I18NUtil.parseLocale(localeString);
                        MLAnalysisMode mlAnalysisMode = searchParameters.getMlAnalaysisMode() == null
                                ? defaultSearchMLAnalysisMode
                                : searchParameters.getMlAnalaysisMode();
                        MLTokenDuplicator duplicator = new MLTokenDuplicator(locale, mlAnalysisMode);
                        Iterator<org.apache.lucene.analysis.Token> it = duplicator.buildIterator(newToken);
                        if (it != null) {
                            int count = 0;
                            while (it.hasNext()) {
                                list.add(it.next());
                                count++;
                                if (count > 1) {
                                    severalTokensAtSamePosition = true;
                                }
                            }
                        }
                    }
                    // content
                    else {
                        list.add(newToken);
                    }
                }
            }

        }
    }

    Collections.sort(list, new Comparator<org.apache.lucene.analysis.Token>() {

        public int compare(Token o1, Token o2) {
            int dif = o1.startOffset() - o2.startOffset();
            if (dif != 0) {
                return dif;
            } else {
                return o2.getPositionIncrement() - o1.getPositionIncrement();
            }
        }
    });

    // Combined * and ? based strings - should redo the tokeniser

    // Build tokens by position

    LinkedList<LinkedList<org.apache.lucene.analysis.Token>> tokensByPosition = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>();
    LinkedList<org.apache.lucene.analysis.Token> currentList = null;
    for (org.apache.lucene.analysis.Token c : list) {
        if (c.getPositionIncrement() == 0) {
            if (currentList == null) {
                currentList = new LinkedList<org.apache.lucene.analysis.Token>();
                tokensByPosition.add(currentList);
            }
            currentList.add(c);
        } else {
            currentList = new LinkedList<org.apache.lucene.analysis.Token>();
            tokensByPosition.add(currentList);
            currentList.add(c);
        }
    }

    // Build all the token sequences and see which ones get strung together

    LinkedList<LinkedList<org.apache.lucene.analysis.Token>> allTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>();
    for (LinkedList<org.apache.lucene.analysis.Token> tokensAtPosition : tokensByPosition) {
        if (allTokenSequences.size() == 0) {
            for (org.apache.lucene.analysis.Token t : tokensAtPosition) {
                LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>();
                newEntry.add(t);
                allTokenSequences.add(newEntry);
            }
        } else {
            LinkedList<LinkedList<org.apache.lucene.analysis.Token>> newAllTokeSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>();

            FOR_FIRST_TOKEN_AT_POSITION_ONLY: for (org.apache.lucene.analysis.Token t : tokensAtPosition) {
                boolean tokenFoundSequence = false;
                for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) {
                    LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>();
                    newEntry.addAll(tokenSequence);
                    if (newEntry.getLast().endOffset() <= t.startOffset()) {
                        newEntry.add(t);
                        tokenFoundSequence = true;
                    }
                    newAllTokeSequences.add(newEntry);
                }
                if (false == tokenFoundSequence) {
                    LinkedList<org.apache.lucene.analysis.Token> newEntry = new LinkedList<org.apache.lucene.analysis.Token>();
                    newEntry.add(t);
                    newAllTokeSequences.add(newEntry);
                }
                // Limit the max number of permutations we consider
                if (newAllTokeSequences.size() > 64) {
                    break FOR_FIRST_TOKEN_AT_POSITION_ONLY;
                }
            }
            allTokenSequences = newAllTokeSequences;
        }
    }

    // build the uniquie

    LinkedList<LinkedList<org.apache.lucene.analysis.Token>> fixedTokenSequences = new LinkedList<LinkedList<org.apache.lucene.analysis.Token>>();
    for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : allTokenSequences) {
        LinkedList<org.apache.lucene.analysis.Token> fixedTokenSequence = new LinkedList<org.apache.lucene.analysis.Token>();
        fixedTokenSequences.add(fixedTokenSequence);
        org.apache.lucene.analysis.Token replace = null;
        for (org.apache.lucene.analysis.Token c : tokenSequence) {
            if (replace == null) {
                StringBuilder prefix = new StringBuilder();
                for (int i = c.startOffset() - 1; i >= 0; i--) {
                    char test = testText.charAt(i);
                    if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) {
                        prefix.insert(0, test);
                    } else {
                        break;
                    }
                }
                String pre = prefix.toString();
                if (requiresMLTokenDuplication) {
                    String termText = new String(c.termBuffer(), 0, c.termLength());
                    int position = termText.indexOf("}");
                    String language = termText.substring(0, position + 1);
                    String token = termText.substring(position + 1);
                    replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(),
                            c.endOffset());
                    replace.setTermBuffer(language + pre + token);
                    replace.setType(c.type());
                    replace.setPositionIncrement(c.getPositionIncrement());
                } else {
                    String termText = new String(c.termBuffer(), 0, c.termLength());
                    replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(),
                            c.endOffset());
                    replace.setTermBuffer(pre + termText);
                    replace.setType(c.type());
                    replace.setPositionIncrement(c.getPositionIncrement());
                }
            } else {
                StringBuilder prefix = new StringBuilder();
                StringBuilder postfix = new StringBuilder();
                StringBuilder builder = prefix;
                for (int i = c.startOffset() - 1; i >= replace.endOffset(); i--) {
                    char test = testText.charAt(i);
                    if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) {
                        builder.insert(0, test);
                    } else {
                        builder = postfix;
                        postfix.setLength(0);
                    }
                }
                String pre = prefix.toString();
                String post = postfix.toString();

                // Does it bridge?
                if ((pre.length() > 0) && (replace.endOffset() + pre.length()) == c.startOffset()) {
                    String termText = new String(c.termBuffer(), 0, c.termLength());
                    if (requiresMLTokenDuplication) {
                        int position = termText.indexOf("}");
                        @SuppressWarnings("unused")
                        String language = termText.substring(0, position + 1);
                        String token = termText.substring(position + 1);
                        int oldPositionIncrement = replace.getPositionIncrement();
                        String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength());
                        replace = new org.apache.lucene.analysis.Token(replace.startOffset(), c.endOffset());
                        replace.setTermBuffer(replaceTermText + pre + token);
                        replace.setType(replace.type());
                        replace.setPositionIncrement(oldPositionIncrement);
                    } else {
                        int oldPositionIncrement = replace.getPositionIncrement();
                        String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength());
                        replace = new org.apache.lucene.analysis.Token(replace.startOffset(), c.endOffset());
                        replace.setTermBuffer(replaceTermText + pre + termText);
                        replace.setType(replace.type());
                        replace.setPositionIncrement(oldPositionIncrement);
                    }
                } else {
                    String termText = new String(c.termBuffer(), 0, c.termLength());
                    if (requiresMLTokenDuplication) {
                        int position = termText.indexOf("}");
                        String language = termText.substring(0, position + 1);
                        String token = termText.substring(position + 1);
                        String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength());
                        org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token(
                                replace.startOffset(), replace.endOffset() + post.length());
                        last.setTermBuffer(replaceTermText + post);
                        last.setType(replace.type());
                        last.setPositionIncrement(replace.getPositionIncrement());
                        fixedTokenSequence.add(last);
                        replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(),
                                c.endOffset());
                        replace.setTermBuffer(language + pre + token);
                        replace.setType(c.type());
                        replace.setPositionIncrement(c.getPositionIncrement());
                    } else {
                        String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength());
                        org.apache.lucene.analysis.Token last = new org.apache.lucene.analysis.Token(
                                replace.startOffset(), replace.endOffset() + post.length());
                        last.setTermBuffer(replaceTermText + post);
                        last.setType(replace.type());
                        last.setPositionIncrement(replace.getPositionIncrement());
                        fixedTokenSequence.add(last);
                        replace = new org.apache.lucene.analysis.Token(c.startOffset() - pre.length(),
                                c.endOffset());
                        replace.setTermBuffer(pre + termText);
                        replace.setType(c.type());
                        replace.setPositionIncrement(c.getPositionIncrement());
                    }
                }
            }
        }
        // finish last
        if (replace != null) {
            StringBuilder postfix = new StringBuilder();
            if ((replace.endOffset() >= 0) && (replace.endOffset() < testText.length())) {
                for (int i = replace.endOffset(); i < testText.length(); i++) {
                    char test = testText.charAt(i);
                    if (((test == '*') || (test == '?')) && wildcardPoistions.contains(i)) {
                        postfix.append(test);
                    } else {
                        break;
                    }
                }
            }
            String post = postfix.toString();
            int oldPositionIncrement = replace.getPositionIncrement();
            String replaceTermText = new String(replace.termBuffer(), 0, replace.termLength());
            replace = new org.apache.lucene.analysis.Token(replace.startOffset(),
                    replace.endOffset() + post.length());
            replace.setTermBuffer(replaceTermText + post);
            replace.setType(replace.type());
            replace.setPositionIncrement(oldPositionIncrement);
            fixedTokenSequence.add(replace);
        }
    }

    // rebuild fixed list

    ArrayList<org.apache.lucene.analysis.Token> fixed = new ArrayList<org.apache.lucene.analysis.Token>();
    for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) {
        for (org.apache.lucene.analysis.Token token : tokenSequence) {
            fixed.add(token);
        }
    }

    // reorder by start position and increment

    Collections.sort(fixed, new Comparator<org.apache.lucene.analysis.Token>() {

        public int compare(Token o1, Token o2) {
            int dif = o1.startOffset() - o2.startOffset();
            if (dif != 0) {
                return dif;
            } else {
                return o1.getPositionIncrement() - o2.getPositionIncrement();
            }
        }
    });

    // make sure we remove any tokens we have duplicated

    @SuppressWarnings("rawtypes")
    OrderedHashSet unique = new OrderedHashSet();
    unique.addAll(fixed);
    fixed = new ArrayList<org.apache.lucene.analysis.Token>(unique);

    list = fixed;

    // add any missing locales back to the tokens

    if (localePrefix.length() > 0) {
        for (int j = 0; j < list.size(); j++) {
            org.apache.lucene.analysis.Token currentToken = list.get(j);
            String termText = new String(currentToken.termBuffer(), 0, currentToken.termLength());
            currentToken.setTermBuffer(localePrefix + termText);
        }
    }

    if (list.size() == 0)
        return null;
    else if (list.size() == 1) {
        nextToken = list.get(0);
        String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength());
        if (termText.contains("*") || termText.contains("?")) {
            return newWildcardQuery(
                    new Term(field, getLowercaseExpandedTerms() ? termText.toLowerCase() : termText));
        } else {
            return newTermQuery(new Term(field, termText));
        }
    } else {
        if (severalTokensAtSamePosition) {
            if (positionCount == 1) {
                // no phrase query:
                BooleanQuery q = newBooleanQuery(true);
                for (int i = 0; i < list.size(); i++) {
                    Query currentQuery;
                    nextToken = list.get(i);
                    String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength());
                    if (termText.contains("*") || termText.contains("?")) {
                        currentQuery = newWildcardQuery(new Term(field,
                                getLowercaseExpandedTerms() ? termText.toLowerCase() : termText));
                    } else {
                        currentQuery = newTermQuery(new Term(field, termText));
                    }
                    q.add(currentQuery, BooleanClause.Occur.SHOULD);
                }
                return q;
            }
            // Consider if we can use a multi-phrase query (e.g for synonym use rather then WordDelimiterFilterFactory)
            else if (canUseMultiPhraseQuery(fixedTokenSequences)) {
                // phrase query:
                MultiPhraseQuery mpq = newMultiPhraseQuery();
                mpq.setSlop(internalSlop);
                ArrayList<Term> multiTerms = new ArrayList<Term>();
                int position = 0;
                for (int i = 0; i < list.size(); i++) {
                    nextToken = list.get(i);
                    String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength());

                    Term term = new Term(field, termText);
                    if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
                        addWildcardTerms(multiTerms, term);
                    } else {
                        multiTerms.add(term);
                    }

                    if (nextToken.getPositionIncrement() > 0 && multiTerms.size() > 0) {
                        if (getEnablePositionIncrements()) {
                            mpq.add(multiTerms.toArray(new Term[0]), position);
                        } else {
                            mpq.add(multiTerms.toArray(new Term[0]));
                        }
                        checkTermCount(field, queryText, mpq);
                        multiTerms.clear();
                    }
                    position += nextToken.getPositionIncrement();

                }
                if (getEnablePositionIncrements()) {
                    if (multiTerms.size() > 0) {
                        mpq.add(multiTerms.toArray(new Term[0]), position);
                    }
                    //                        else
                    //                        {
                    //                            mpq.add(new Term[] { new Term(field, "\u0000") }, position);
                    //                        }
                } else {
                    if (multiTerms.size() > 0) {
                        mpq.add(multiTerms.toArray(new Term[0]));
                    }
                    //                        else
                    //                        {
                    //                            mpq.add(new Term[] { new Term(field, "\u0000") });
                    //                        }
                }
                checkTermCount(field, queryText, mpq);
                return mpq;

            }
            // Word delimiter factory and other odd things generate complex token patterns
            // Smart skip token  sequences with small tokens that generate toomany wildcards
            // Fall back to the larger pattern
            // e.g Site1* will not do (S ite 1*) or (Site 1*)  if 1* matches too much (S ite1*)  and (Site1*) will still be OK 
            // If we skip all (for just 1* in the input) this is still an issue.
            else {
                boolean skippedTokens = false;
                BooleanQuery q = newBooleanQuery(true);
                TOKEN_SEQUENCE: for (LinkedList<org.apache.lucene.analysis.Token> tokenSequence : fixedTokenSequences) {
                    // phrase query:
                    MultiPhraseQuery mpq = newMultiPhraseQuery();
                    mpq.setSlop(internalSlop);
                    int position = 0;
                    for (int i = 0; i < tokenSequence.size(); i++) {
                        nextToken = (org.apache.lucene.analysis.Token) tokenSequence.get(i);
                        String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength());

                        Term term = new Term(field, termText);

                        if (getEnablePositionIncrements()) {
                            if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
                                mpq.add(getMatchingTerms(field, term), position);
                            } else {
                                mpq.add(new Term[] { term }, position);
                            }
                            if (exceedsTermCount(mpq)) {
                                // We could duplicate the token sequence without the failing wildcard expansion and try again ??
                                skippedTokens = true;
                                continue TOKEN_SEQUENCE;
                            }
                            if (nextToken.getPositionIncrement() > 0) {
                                position += nextToken.getPositionIncrement();
                            } else {
                                position++;
                            }

                        } else {
                            if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
                                mpq.add(getMatchingTerms(field, term));
                            } else {
                                mpq.add(term);
                            }
                            if (exceedsTermCount(mpq)) {
                                skippedTokens = true;
                                continue TOKEN_SEQUENCE;
                            }
                        }
                    }
                    q.add(mpq, BooleanClause.Occur.SHOULD);
                }
                if (skippedTokens && (q.clauses().size() == 0)) {
                    throw new LuceneQueryParserException(
                            "Query skipped all token sequences as wildcards generated too many clauses: "
                                    + field + " " + queryText);
                }
                return q;
            }
        } else {
            MultiPhraseQuery q = new MultiPhraseQuery();
            q.setSlop(internalSlop);
            int position = 0;
            for (int i = 0; i < list.size(); i++) {
                nextToken = list.get(i);
                String termText = new String(nextToken.termBuffer(), 0, nextToken.termLength());
                Term term = new Term(field, termText);
                if (getEnablePositionIncrements()) {
                    if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
                        q.add(getMatchingTerms(field, term), position);
                    } else {
                        q.add(new Term[] { term }, position);
                    }
                    checkTermCount(field, queryText, q);
                    if (nextToken.getPositionIncrement() > 0) {
                        position += nextToken.getPositionIncrement();
                    } else {
                        position++;
                    }
                } else {
                    if ((termText != null) && (termText.contains("*") || termText.contains("?"))) {
                        q.add(getMatchingTerms(field, term));
                    } else {
                        q.add(term);
                    }
                    checkTermCount(field, queryText, q);
                }
            }
            return q;
        }
    }
}

From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java

License:Open Source License

protected Query buildDateTimeRange(String field, Calendar startIn, int startResolution, Calendar endIn,
        int endResolution, boolean includeLower, boolean includeUpper) throws ParseException {
    int minResolution = (startResolution <= endResolution) ? startResolution : endResolution;

    // fix start and end dates and treat all as inclusive ranges

    Calendar start = Calendar.getInstance();
    start.setTime(startIn.getTime());//www .ja va  2  s .  c o  m
    if (!includeLower) {
        start.add(startResolution, 1);
    }

    Calendar end = Calendar.getInstance();
    end.setTime(endIn.getTime());
    if (!includeUpper) {
        end.add(endResolution, -1);
    }

    // Calendar comparison does not work for MAX .... joy
    if (start.get(Calendar.YEAR) > end.get(Calendar.YEAR)) {
        return createNoMatchQuery();
    } else if (start.get(Calendar.YEAR) == end.get(Calendar.YEAR)) {
        if (start.get(Calendar.MONTH) > end.get(Calendar.MONTH)) {
            return createNoMatchQuery();
        } else if (start.get(Calendar.MONTH) == end.get(Calendar.MONTH)) {
            if (start.get(Calendar.DAY_OF_MONTH) > end.get(Calendar.DAY_OF_MONTH)) {
                return createNoMatchQuery();
            } else if (start.get(Calendar.DAY_OF_MONTH) == end.get(Calendar.DAY_OF_MONTH)) {
                if (start.get(Calendar.HOUR_OF_DAY) > end.get(Calendar.HOUR_OF_DAY)) {
                    return createNoMatchQuery();
                } else if (start.get(Calendar.HOUR_OF_DAY) == end.get(Calendar.HOUR_OF_DAY)) {
                    if (start.get(Calendar.MINUTE) > end.get(Calendar.MINUTE)) {
                        return createNoMatchQuery();
                    } else if (start.get(Calendar.MINUTE) == end.get(Calendar.MINUTE)) {
                        if (start.get(Calendar.SECOND) > end.get(Calendar.SECOND)) {
                            return createNoMatchQuery();
                        } else if (start.get(Calendar.SECOND) == end.get(Calendar.SECOND)) {
                            if (start.get(Calendar.MILLISECOND) > end.get(Calendar.MILLISECOND)) {
                                return createNoMatchQuery();
                            } else if (start.get(Calendar.MILLISECOND) == end.get(Calendar.MILLISECOND)) {
                                // continue
                            }
                        }
                    }
                }
            }
        }
    }

    BooleanQuery query = new BooleanQuery();
    Query part;
    if ((minResolution > Calendar.YEAR) && (start.get(Calendar.YEAR) == end.get(Calendar.YEAR))) {
        part = new TermQuery(new Term(field, "YE" + start.get(Calendar.YEAR)));
        query.add(part, Occur.MUST);
        if ((minResolution > Calendar.MONTH) && (start.get(Calendar.MONTH) == end.get(Calendar.MONTH))) {
            part = new TermQuery(new Term(field, build2SF("MO", start.get(Calendar.MONTH))));
            query.add(part, Occur.MUST);
            if ((minResolution > Calendar.DAY_OF_MONTH)
                    && (start.get(Calendar.DAY_OF_MONTH) == end.get(Calendar.DAY_OF_MONTH))) {
                part = new TermQuery(new Term(field, build2SF("DA", start.get(Calendar.DAY_OF_MONTH))));
                query.add(part, Occur.MUST);
                if ((minResolution > Calendar.HOUR_OF_DAY)
                        && (start.get(Calendar.HOUR_OF_DAY) == end.get(Calendar.HOUR_OF_DAY))) {
                    part = new TermQuery(new Term(field, build2SF("HO", start.get(Calendar.HOUR_OF_DAY))));
                    query.add(part, Occur.MUST);
                    if ((minResolution > Calendar.MINUTE)
                            && (start.get(Calendar.MINUTE) == end.get(Calendar.MINUTE))) {
                        part = new TermQuery(new Term(field, build2SF("MI", start.get(Calendar.MINUTE))));
                        query.add(part, Occur.MUST);
                        if ((minResolution > Calendar.SECOND)
                                && (start.get(Calendar.SECOND) == end.get(Calendar.SECOND))) {
                            part = new TermQuery(new Term(field, build2SF("SE", start.get(Calendar.SECOND))));
                            query.add(part, Occur.MUST);
                            if (minResolution >= Calendar.MILLISECOND) {
                                if (start.get(Calendar.MILLISECOND) == end.get(Calendar.MILLISECOND)) {

                                    part = new TermQuery(
                                            new Term(field, build3SF("MS", start.get(Calendar.MILLISECOND))));
                                    query.add(part, Occur.MUST);

                                } else {
                                    part = new ConstantScoreRangeQuery(field,
                                            build3SF("MS", start.get(Calendar.MILLISECOND)),
                                            build3SF("MS", end.get(Calendar.MILLISECOND)), true, true);
                                    query.add(part, Occur.MUST);
                                }
                            } else {
                                return createNoMatchQuery();
                            }
                        } else {
                            // s + ms

                            BooleanQuery subQuery = new BooleanQuery();
                            Query subPart;

                            for (int i : new int[] { Calendar.MILLISECOND }) {
                                subPart = buildStart(field, start, Calendar.SECOND, i, startResolution);
                                if (subPart != null) {
                                    subQuery.add(subPart, Occur.SHOULD);
                                }
                            }

                            if (Calendar.SECOND < minResolution) {
                                if ((end.get(Calendar.SECOND) - start.get(Calendar.SECOND)) > 1) {
                                    subPart = new ConstantScoreRangeQuery(field,
                                            build2SF("SE", start.get(Calendar.SECOND)),
                                            build2SF("SE", end.get(Calendar.SECOND)), false, false);
                                    subQuery.add(subPart, Occur.SHOULD);
                                }
                            }
                            if (Calendar.SECOND == minResolution) {
                                if (start.get(Calendar.SECOND) == end.get(Calendar.SECOND)) {
                                    if (includeLower && includeUpper) {
                                        part = new TermQuery(
                                                new Term(field, build2SF("SE", start.get(Calendar.SECOND))));
                                        query.add(part, Occur.MUST);
                                    }

                                    else {
                                        return createNoMatchQuery();
                                    }
                                } else {
                                    subPart = new ConstantScoreRangeQuery(field,
                                            build2SF("SE", start.get(Calendar.SECOND)),
                                            build2SF("SE", end.get(Calendar.SECOND)), includeLower,
                                            includeUpper);
                                    subQuery.add(subPart, Occur.SHOULD);
                                }
                            }

                            for (int i : new int[] { Calendar.MILLISECOND }) {

                                subPart = buildEnd(field, end, Calendar.SECOND, i, endResolution);
                                if (subPart != null) {
                                    subQuery.add(subPart, Occur.SHOULD);
                                }

                            }

                            if (subQuery.clauses().size() > 0) {
                                query.add(subQuery, Occur.MUST);
                            }

                        }
                    } else {
                        // min + s + ms

                        BooleanQuery subQuery = new BooleanQuery();
                        Query subPart;

                        for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND }) {

                            subPart = buildStart(field, start, Calendar.MINUTE, i, startResolution);
                            if (subPart != null) {
                                subQuery.add(subPart, Occur.SHOULD);
                            }

                        }

                        if (Calendar.MINUTE < minResolution) {
                            if ((end.get(Calendar.MINUTE) - start.get(Calendar.MINUTE)) > 1) {
                                subPart = new ConstantScoreRangeQuery(field,
                                        build2SF("MI", start.get(Calendar.MINUTE)),
                                        build2SF("MI", end.get(Calendar.MINUTE)), false, false);
                                subQuery.add(subPart, Occur.SHOULD);
                            }
                        }
                        if (Calendar.MINUTE == minResolution) {
                            if (start.get(Calendar.MINUTE) == end.get(Calendar.MINUTE)) {
                                if (includeLower && includeUpper) {
                                    part = new TermQuery(
                                            new Term(field, build2SF("MI", start.get(Calendar.MINUTE))));
                                    query.add(part, Occur.MUST);
                                }

                                else {
                                    return createNoMatchQuery();
                                }
                            } else {
                                subPart = new ConstantScoreRangeQuery(field,
                                        build2SF("MI", start.get(Calendar.MINUTE)),
                                        build2SF("MI", end.get(Calendar.MINUTE)), includeLower, includeUpper);
                                subQuery.add(subPart, Occur.SHOULD);
                            }
                        }

                        for (int i : new int[] { Calendar.SECOND, Calendar.MILLISECOND }) {

                            subPart = buildEnd(field, end, Calendar.MINUTE, i, endResolution);
                            if (subPart != null) {
                                subQuery.add(subPart, Occur.SHOULD);
                            }

                        }

                        if (subQuery.clauses().size() > 0) {
                            query.add(subQuery, Occur.MUST);
                        }
                    }
                } else {
                    // hr + min + s + ms

                    BooleanQuery subQuery = new BooleanQuery();
                    Query subPart;

                    for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE }) {

                        subPart = buildStart(field, start, Calendar.HOUR_OF_DAY, i, startResolution);
                        if (subPart != null) {
                            subQuery.add(subPart, Occur.SHOULD);
                        }

                    }

                    if (Calendar.HOUR_OF_DAY < minResolution) {
                        if ((end.get(Calendar.HOUR_OF_DAY) - start.get(Calendar.HOUR_OF_DAY)) > 1) {
                            subPart = new ConstantScoreRangeQuery(field,
                                    build2SF("HO", start.get(Calendar.HOUR_OF_DAY)),
                                    build2SF("HO", end.get(Calendar.HOUR_OF_DAY)), false, false);
                            subQuery.add(subPart, Occur.SHOULD);
                        }
                    }
                    if (Calendar.HOUR_OF_DAY == minResolution) {
                        if (start.get(Calendar.HOUR_OF_DAY) == end.get(Calendar.HOUR_OF_DAY)) {
                            if (includeLower && includeUpper) {
                                part = new TermQuery(
                                        new Term(field, build2SF("HO", start.get(Calendar.HOUR_OF_DAY))));
                                query.add(part, Occur.MUST);
                            }

                            else {
                                return createNoMatchQuery();
                            }
                        } else {
                            subPart = new ConstantScoreRangeQuery(field,
                                    build2SF("HO", start.get(Calendar.HOUR_OF_DAY)),
                                    build2SF("HO", end.get(Calendar.HOUR_OF_DAY)), includeLower, includeUpper);
                            subQuery.add(subPart, Occur.SHOULD);
                        }
                    }
                    for (int i : new int[] { Calendar.MINUTE, Calendar.SECOND, Calendar.MILLISECOND }) {

                        subPart = buildEnd(field, end, Calendar.HOUR_OF_DAY, i, endResolution);
                        if (subPart != null) {
                            subQuery.add(subPart, Occur.SHOULD);
                        }

                    }

                    if (subQuery.clauses().size() > 0) {
                        query.add(subQuery, Occur.MUST);
                    }
                }
            } else {
                // day + hr + min + s + ms

                BooleanQuery subQuery = new BooleanQuery();
                Query subPart;

                for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE,
                        Calendar.HOUR_OF_DAY }) {

                    subPart = buildStart(field, start, Calendar.DAY_OF_MONTH, i, startResolution);
                    if (subPart != null) {
                        subQuery.add(subPart, Occur.SHOULD);
                    }

                }

                if (Calendar.DAY_OF_MONTH < minResolution) {
                    if ((end.get(Calendar.DAY_OF_MONTH) - start.get(Calendar.DAY_OF_MONTH)) > 1) {
                        subPart = new ConstantScoreRangeQuery(field,
                                build2SF("DA", start.get(Calendar.DAY_OF_MONTH)),
                                build2SF("DA", end.get(Calendar.DAY_OF_MONTH)), false, false);
                        subQuery.add(subPart, Occur.SHOULD);
                    }
                }
                if (Calendar.DAY_OF_MONTH == minResolution) {
                    if (start.get(Calendar.DAY_OF_MONTH) == end.get(Calendar.DAY_OF_MONTH)) {
                        if (includeLower && includeUpper) {
                            part = new TermQuery(
                                    new Term(field, build2SF("DA", start.get(Calendar.DAY_OF_MONTH))));
                            query.add(part, Occur.MUST);
                        }

                        else {
                            return createNoMatchQuery();
                        }
                    } else {
                        subPart = new ConstantScoreRangeQuery(field,
                                build2SF("DA", start.get(Calendar.DAY_OF_MONTH)),
                                build2SF("DA", end.get(Calendar.DAY_OF_MONTH)), includeLower, includeUpper);
                        subQuery.add(subPart, Occur.SHOULD);
                    }
                }

                for (int i : new int[] { Calendar.HOUR_OF_DAY, Calendar.MINUTE, Calendar.SECOND,
                        Calendar.MILLISECOND }) {

                    subPart = buildEnd(field, end, Calendar.DAY_OF_MONTH, i, endResolution);
                    if (subPart != null) {
                        subQuery.add(subPart, Occur.SHOULD);
                    }

                }

                if (subQuery.clauses().size() > 0) {
                    query.add(subQuery, Occur.MUST);
                }

            }
        } else {
            // month + day + hr + min + s + ms

            BooleanQuery subQuery = new BooleanQuery();
            Query subPart;

            for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE,
                    Calendar.HOUR_OF_DAY, Calendar.DAY_OF_MONTH }) {

                subPart = buildStart(field, start, Calendar.MONTH, i, startResolution);
                if (subPart != null) {
                    subQuery.add(subPart, Occur.SHOULD);
                }

            }

            if (Calendar.MONTH < minResolution) {
                if ((end.get(Calendar.MONTH) - start.get(Calendar.MONTH)) > 1) {
                    subPart = new ConstantScoreRangeQuery(field, build2SF("MO", start.get(Calendar.MONTH)),
                            build2SF("MO", end.get(Calendar.MONTH)), false, false);
                    subQuery.add(subPart, Occur.SHOULD);
                }
            }
            if (Calendar.MONTH == minResolution) {
                if (start.get(Calendar.MONTH) == end.get(Calendar.MONTH)) {
                    if (includeLower && includeUpper) {
                        part = new TermQuery(new Term(field, build2SF("MO", start.get(Calendar.MONTH))));
                        query.add(part, Occur.MUST);
                    }

                    else {
                        return createNoMatchQuery();
                    }
                } else {
                    subPart = new ConstantScoreRangeQuery(field, build2SF("MO", start.get(Calendar.MONTH)),
                            build2SF("MO", end.get(Calendar.MONTH)), includeLower, includeUpper);
                    subQuery.add(subPart, Occur.SHOULD);
                }
            }

            for (int i : new int[] { Calendar.DAY_OF_MONTH, Calendar.HOUR_OF_DAY, Calendar.MINUTE,
                    Calendar.SECOND, Calendar.MILLISECOND }) {

                subPart = buildEnd(field, end, Calendar.MONTH, i, endResolution);
                if (subPart != null) {
                    subQuery.add(subPart, Occur.SHOULD);
                }

            }

            if (subQuery.clauses().size() > 0) {
                query.add(subQuery, Occur.MUST);
            }
        }
    } else {
        // year + month + day + hr + min + s + ms

        BooleanQuery subQuery = new BooleanQuery();
        Query subPart;

        for (int i : new int[] { Calendar.MILLISECOND, Calendar.SECOND, Calendar.MINUTE, Calendar.HOUR_OF_DAY,
                Calendar.DAY_OF_MONTH, Calendar.MONTH }) {

            subPart = buildStart(field, start, Calendar.YEAR, i, startResolution);
            if (subPart != null) {
                subQuery.add(subPart, Occur.SHOULD);
            }

        }

        if (Calendar.YEAR < minResolution) {
            if ((end.get(Calendar.YEAR) - start.get(Calendar.YEAR)) > 1) {
                subPart = new ConstantScoreRangeQuery(field, "YE" + start.get(Calendar.YEAR),
                        "YE" + end.get(Calendar.YEAR), false, false);
                subQuery.add(subPart, Occur.SHOULD);
            }
        }
        if (Calendar.YEAR == minResolution) {
            if (start.get(Calendar.YEAR) == end.get(Calendar.YEAR)) {
                if (includeLower && includeUpper) {
                    part = new TermQuery(new Term(field, "YE" + start.get(Calendar.YEAR)));
                    query.add(part, Occur.MUST);
                }

                else {
                    return createNoMatchQuery();
                }
            } else {
                subPart = new ConstantScoreRangeQuery(field, "YE" + start.get(Calendar.YEAR),
                        "YE" + end.get(Calendar.YEAR), includeLower, includeUpper);
                subQuery.add(subPart, Occur.SHOULD);
            }
        }

        for (int i : new int[] { Calendar.MONTH, Calendar.DAY_OF_MONTH, Calendar.HOUR_OF_DAY, Calendar.MINUTE,
                Calendar.SECOND, Calendar.MILLISECOND }) {

            subPart = buildEnd(field, end, Calendar.YEAR, i, endResolution);
            if (subPart != null) {
                subQuery.add(subPart, Occur.SHOULD);
            }

        }

        if (subQuery.clauses().size() > 0) {
            query.add(subQuery, Occur.MUST);
        }
    }

    return query;
}

From source file:org.alfresco.repo.search.impl.lucene.AbstractLuceneQueryParser.java

License:Open Source License

/**
 * Returns null if all clause words were filtered away by the analyzer
 * @param booleanQuery - initial BooleanQuery
 * @return BooleanQuery or <code>null</code> if booleanQuery has no clauses 
 *//*w w w.j a v a2 s  .  com*/
protected BooleanQuery getNonEmptyBooleanQuery(BooleanQuery booleanQuery) {
    if (booleanQuery.clauses().size() > 0) {
        return booleanQuery;
    } else {
        return null;
    }
}

From source file:org.alfresco.solr.query.PostFilterQuery.java

License:Open Source License

private void getPostFilters(Query q, List<PostFilter> postFilters) {
    if (q instanceof BooleanQuery) {
        BooleanQuery bq = (BooleanQuery) q;
        List<BooleanClause> clauses = bq.clauses();
        for (BooleanClause clause : clauses) {
            Query q1 = clause.getQuery();
            getPostFilters(q1, postFilters);
        }/* www  .  ja v a  2s .  c  om*/
    } else if (q instanceof ContextAwareQuery) {
        ContextAwareQuery cq = (ContextAwareQuery) q;
        getPostFilters(cq.getLuceneQuery(), postFilters);
    } else if (q instanceof PostFilter) {
        postFilters.add((PostFilter) q);
    } else if (q instanceof BoostQuery) {
        BoostQuery bq = (BoostQuery) q;
        getPostFilters(bq.getQuery(), postFilters);
    }
}