edu.rpi.tw.linkipedia.search.query.WeightedQuery.java Source code

Java tutorial

Introduction

Here is the source code for edu.rpi.tw.linkipedia.search.query.WeightedQuery.java

Source

/**
 * Linkipedia, Copyright (c) 2015 Tetherless World Constellation 
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package edu.rpi.tw.linkipedia.search.query;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Hashtable;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
import org.apache.lucene.sandbox.queries.regex.RegexQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.payloads.MaxPayloadFunction;
import org.apache.lucene.search.payloads.PayloadTermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;

import edu.rpi.tw.linkipedia.search.indexing.DefaultAnalyzer;
import edu.rpi.tw.linkipedia.search.utils.Utils;

/**
 * For type query, pay attention to numbers at the end
 * @author zhengj3
 *
 */
public class WeightedQuery {

    static boolean debug = false;
    CustomScoreQuery finalQuery;
    static MaxPayloadFunction payloadFunctionMax;
    double contextWeightLamda = 1;
    double weightThreshold = -1;

    float labelBoost = 1;//10
    float contentBoost = 1;
    float relationBoost = 1;
    float typeBoost = 10;//5
    float defaultBoost = 1;//5
    float analyzedLabelBoost = 1;
    DictQueryExpansion dict_exp;

    //   Hashtable<String, String> myDict = new Hashtable<String, String>();

    public WeightedQuery() {
        payloadFunctionMax = new MaxPayloadFunction();
        //      initDict("Cognate.txt");
        dict_exp = new DictQueryExpansion();

    }

    //   
    //   public void initDict(String filename){
    //      try{
    //         String sCurrentLine;
    //          
    //         BufferedReader br = new BufferedReader(new FileReader(filename));
    // 
    //         while ((sCurrentLine = br.readLine()) != null) {
    //            String [] parts = sCurrentLine.toLowerCase().split("\\|");
    //            for(int i = 1; i < parts.length; i++){
    //               myDict.put(parts[i].trim(), parts[0].trim());
    //            }
    //         }
    //         
    //         
    //      }catch(Exception e){
    //         e.printStackTrace();
    //      }
    //   }
    private Query getDictExpandQuery(String label, ArrayList<String> weightedContext) throws ParseException {
        BooleanQuery luceneQuery = new BooleanQuery();
        ArrayList<String> dictLabels = dict_exp.getExpandQueryString(label);
        if (dictLabels == null)
            return luceneQuery;
        for (String dictLabel : dictLabels) {
            if (dictLabel != null) {
                Query dictlabelQuery = getSimpleLabelQuery(dictLabel);
                //Query dictcontentQuery = getContentQuery(dictLabel,weightedContext);
                //         Query dictrelationQuery = getRelationQuery(dictLabel, weightedContext);   

                if (dictlabelQuery != null) {
                    //            if(relationQuery == null && labelBoost == 1){
                    //               labelQuery.setBoost(10);
                    //            }
                    dictlabelQuery.setBoost(labelBoost);
                    luceneQuery.add(dictlabelQuery, BooleanClause.Occur.SHOULD);
                }

                //         if(dictcontentQuery != null){
                //            dictcontentQuery.setBoost(contentBoost);
                //            luceneQuery.add(dictcontentQuery, BooleanClause.Occur.SHOULD);
                //         }

                //         if(dictrelationQuery != null){
                //            //relationQuery.setBoost(2);
                //            dictrelationQuery.setBoost(relationBoost);
                //            luceneQuery.add(dictrelationQuery, BooleanClause.Occur.SHOULD);   
                //         }

            }
        }
        return luceneQuery;

    }

    public void setWeights(float labelWeight, float contentWeight, float relationWeight, float typeWeight,
            float defaultWeight) {
        labelBoost = labelWeight;
        contentBoost = contentWeight;
        relationBoost = relationWeight;
        typeBoost = typeWeight;
        defaultBoost = defaultWeight;

        //      System.out.println("labelBoost: "+labelBoost);
        //      System.out.println("contentBoost: "+contentBoost);
        //      System.out.println("relationBoost: "+relationBoost);
        //      System.out.println("typeBoost: "+typeBoost);
        //      System.out.println("defaultBoost: "+defaultBoost);

    }

    private Query getSimpleLabelQuery(String label) throws ParseException {
        //DisjunctionMaxQuery labelQuery = new DisjunctionMaxQuery(0);
        float weight = 1;
        BooleanQuery labelQuery = new BooleanQuery();

        if (label.contains("|")) {
            String[] termWeight = label.split("\\|");
            label = termWeight[0];
            weight = Float.parseFloat(termWeight[1]);
        }

        label = Utils.removeChars(label);
        Term term = new Term("label", label);
        //TermQuery termQuery = new TermQuery(term);
        PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunctionMax);
        //ptq.setBoost(5);
        labelQuery.add(ptq, Occur.SHOULD);

        String[] label_parts = label.split(" ");
        PhraseQuery pq = new PhraseQuery();
        for (int i = 0; i < label_parts.length; i++) {
            pq.add(new Term("analyzedLabel", label_parts[i]));
        }
        //      int gap = (new StandardAnalyzer(Version.LUCENE_47)).getPositionIncrementGap("analyzedLabel");
        pq.setSlop(10);
        labelQuery.add(pq, Occur.SHOULD);

        //      QueryParser defaultLabelParser = new QueryParser(Version.LUCENE_47,"defaultLabel", analyzer);
        //      Query defaultLabelQuery = defaultLabelParser.parse(label);
        //      //defaultLabelQuery.setBoost(10);
        //      labelQuery.add(defaultLabelQuery, BooleanClause.Occur.SHOULD);

        labelQuery.setBoost(weight);
        return labelQuery;
    }

    private Query getLabelSpanQuery(String mylabel) throws ParseException {
        float weight = 1;

        if (!mylabel.equals("")) {
            //DisjunctionMaxQuery labelQuery = new DisjunctionMaxQuery(0);      
            BooleanQuery labelQuery = new BooleanQuery();

            if (mylabel.contains("|")) {
                String[] termWeight = mylabel.split("\\|");
                mylabel = termWeight[0];
                weight = Float.parseFloat(termWeight[1]);
            }

            for (String label : mylabel.split("\\*")) {

                label = Utils.removeChars(label);
                Term term = new Term("label", label);
                //TermQuery termQuery = new TermQuery(term);
                PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunctionMax);
                //ptq.setBoost(5);
                labelQuery.add(ptq, Occur.SHOULD);

                //labelQuery.add(termQuery,Occur.SHOULD);

                //         String dictLabel = getDictExpandQuery(label);
                //         if(dictLabel != null){
                //            Term dictTerm = new Term("label",dictLabel);
                //            //TermQuery termQuery = new TermQuery(term);
                //            PayloadTermQuery dictptq = new PayloadTermQuery(dictTerm, payloadFunctionMax);
                //            //ptq.setBoost(5);
                //            labelQuery.add(dictptq,Occur.SHOULD);
                //         }
                //         label = Utils.toPhrase(label);
                //         Term pterm = new Term("label",label);
                //         //TermQuery termQuery = new TermQuery(term);
                //         PayloadTermQuery pptq = new PayloadTermQuery(pterm, payloadFunctionMax);
                //         //ptq.setBoost(5);
                //         labelQuery.add(pptq,Occur.SHOULD);

                //         Term defaultTerm = new Term("defaultLabel",label);
                //         //TermQuery defaultTermQuery = new TermQuery(defaultTerm);
                //         PayloadTermQuery defaultTermptq = new PayloadTermQuery(defaultTerm, payloadFunctionMax);
                //         //defaultTermptq.setBoost(10);
                //         labelQuery.add(defaultTermptq,Occur.SHOULD);//, Occur.SHOULD
                //labelQuery.add(defaultTermQuery);

                //         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
                //         QueryParser parser = new QueryParser(Version.LUCENE_47,"analyzedLabel", analyzer);
                //         parser.setDefaultOperator(Operator.AND);
                //         Query parsedQuery = parser.parse(label);
                //         //parsedQuery.setBoost((float) 0.1);
                //         labelQuery.add(parsedQuery,Occur.SHOULD);

                //         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
                //         QueryParser parser = new QueryParser(Version.LUCENE_47,"label", analyzer);
                //         String label_re = "*"+label.replaceAll(" ", "*");
                //         Query parsedQuery = parser.parse(label_re);
                //         labelQuery.add(parsedQuery,Occur.SHOULD);

                //         ComplexPhraseQueryParser cpqp = new ComplexPhraseQueryParser(Version.LUCENE_47, "label", analyzer);
                //         String label_re = label.replaceAll(" ", "*");
                //         Query parsedQuery = cpqp.parse(label_re);
                //         labelQuery.add(parsedQuery,Occur.SHOULD);

                //too slow
                //         String rq_label = label.replaceAll(" ", ".*");
                //         rq_label = ".*"+rq_label+".*";
                //         RegexQuery rq = new RegexQuery(new Term("label",rq_label));
                //         labelQuery.add(rq,Occur.SHOULD);

                //         String [] label_parts = label.split(" ");
                //         Term[] tTerms = new Term[label_parts.length];
                //         for(int i = 0; i < label_parts.length; i++){
                //            tTerms[i] = new Term("label", label_parts[i]);
                //         }
                //         MultiPhraseQuery multiPhrasequery = new MultiPhraseQuery();
                //         multiPhrasequery.add( tTerms );
                //         labelQuery.add(multiPhrasequery,Occur.SHOULD);

                //         String [] label_parts = label.split(" ");
                //         //SpanNearQuery [] snq = new SpanNearQuery[label_parts.length];
                //         SpanQuery [] sq = new SpanQuery[label_parts.length];
                //         //ArrayList<SpanNearQuery> span_terms = new ArrayList<SpanNearQuery>();
                //         for(int i = 0; i < label_parts.length; i++){
                //            SpanTermQuery stq = new SpanTermQuery(new Term("label", label_parts[i]));
                //            sq[i] = stq;
                //         }         
                //         SpanNearQuery final_snq = new SpanNearQuery(sq,10,false);
                //         labelQuery.add(final_snq,Occur.SHOULD);
                //         
                String[] label_parts = label.split(" ");
                PhraseQuery pq = new PhraseQuery();
                for (int i = 0; i < label_parts.length; i++) {
                    pq.add(new Term("analyzedLabel", label_parts[i]));
                }
                //         int gap = (new StandardAnalyzer(Version.LUCENE_47)).getPositionIncrementGap("analyzedLabel");
                pq.setSlop(10);
                labelQuery.add(pq, Occur.SHOULD);

                Query exp_label = getDictExpandQuery(label, null);
                labelQuery.add(exp_label, Occur.SHOULD);
            }
            //         QueryParser defaultLabelParser = new QueryParser(Version.LUCENE_47,"defaultLabel", analyzer);
            //         Query defaultLabelQuery = defaultLabelParser.parse(label);
            //         //defaultLabelQuery.setBoost(10);
            //         labelQuery.add(defaultLabelQuery, BooleanClause.Occur.SHOULD);

            labelQuery.setBoost(weight);
            return labelQuery;
        }
        return null;
    }

    private Query getLabelQuery(String label) throws ParseException {
        float weight = 1;

        if (!label.equals("")) {
            //DisjunctionMaxQuery labelQuery = new DisjunctionMaxQuery(0);      
            BooleanQuery labelQuery = new BooleanQuery();

            if (label.contains("|")) {
                String[] termWeight = label.split("\\|");
                label = termWeight[0];
                weight = Float.parseFloat(termWeight[1]);
            }

            label = Utils.removeChars(label);

            ///Analyzer labelAnalyzer = new StandardAnalyzer(Version.LUCENE_47);
            ///QueryParser labelParser = new QueryParser(Version.LUCENE_47,"label", labelAnalyzer);
            ///labelParser.setDefaultOperator(Operator.AND);
            ///Query labelParsedQuery = labelParser.parse(label);
            ///labelQuery.add(labelParsedQuery,Occur.SHOULD);

            Term term = new Term("label", label);
            TermQuery termQuery = new TermQuery(term);
            //PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunctionMax);
            //ptq.setBoost(5);
            //labelQuery.add(ptq,Occur.SHOULD);
            labelQuery.add(termQuery, Occur.SHOULD);

            //         Term defaultTerm = new Term("defaultLabel",label);
            //         //TermQuery defaultTermQuery = new TermQuery(defaultTerm);
            //         PayloadTermQuery defaultTermptq = new PayloadTermQuery(defaultTerm, payloadFunctionMax);
            //         //defaultTermptq.setBoost(10);
            //         labelQuery.add(defaultTermptq,Occur.SHOULD);//, Occur.SHOULD
            //labelQuery.add(defaultTermQuery);

            Analyzer analyzer = DefaultAnalyzer.getAnalyzer();
            QueryParser parser = new QueryParser(Version.LUCENE_47, "analyzedLabel", analyzer);
            parser.setDefaultOperator(Operator.AND);
            Query parsedQuery = parser.parse(label);
            //parsedQuery.setBoost((float) 0.1);
            labelQuery.add(parsedQuery, Occur.SHOULD);

            //         Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
            //         QueryParser parser = new QueryParser(Version.LUCENE_47,"label", analyzer);
            //         String label_re = "*"+label.replaceAll(" ", "*");
            //         Query parsedQuery = parser.parse(label_re);
            //         labelQuery.add(parsedQuery,Occur.SHOULD);

            //         ComplexPhraseQueryParser cpqp = new ComplexPhraseQueryParser(Version.LUCENE_47, "label", analyzer);
            //         String label_re = label.replaceAll(" ", "*");
            //         Query parsedQuery = cpqp.parse(label_re);
            //         labelQuery.add(parsedQuery,Occur.SHOULD);

            //too slow
            //         String rq_label = label.replaceAll(" ", ".*");
            //         rq_label = ".*"+rq_label+".*";
            //         RegexQuery rq = new RegexQuery(new Term("label",rq_label));
            //         labelQuery.add(rq,Occur.SHOULD);

            //         String [] label_parts = label.split(" ");
            //         Term[] tTerms = new Term[label_parts.length];
            //         for(int i = 0; i < label_parts.length; i++){
            //            tTerms[i] = new Term("label", label_parts[i]);
            //         }
            //         MultiPhraseQuery multiPhrasequery = new MultiPhraseQuery();
            //         multiPhrasequery.add( tTerms );
            //         labelQuery.add(multiPhrasequery,Occur.SHOULD);

            //         String [] label_parts = label.split(" ");
            //         SpanNearQuery [] snq = new SpanNearQuery[label_parts.length];
            //         //ArrayList<SpanNearQuery> span_terms = new ArrayList<SpanNearQuery>();
            //         for(int i = 0; i < label_parts.length; i++){
            //            SpanTermQuery [] stq = {new SpanTermQuery(new Term("label", label_parts[i]))};
            //            snq[i] = new SpanNearQuery(stq,10,false);
            //         }         
            //         SpanNearQuery final_snq = new SpanNearQuery(snq,10,false);
            //         labelQuery.add(final_snq,Occur.SHOULD);
            //         

            //         QueryParser defaultLabelParser = new QueryParser(Version.LUCENE_47,"defaultLabel", analyzer);
            //         Query defaultLabelQuery = defaultLabelParser.parse(label);
            //         //defaultLabelQuery.setBoost(10);
            //         labelQuery.add(defaultLabelQuery, BooleanClause.Occur.SHOULD);

            labelQuery.setBoost(weight);
            return labelQuery;
        }
        return null;
    }

    private Query getContentQuery(String label, ArrayList<String> contexts) throws ParseException {

        if (contexts.size() > 0 && !label.equals("")) {
            Analyzer analyzer = DefaultAnalyzer.getAnalyzer();
            QueryParser contentParser = new QueryParser(Version.LUCENE_47, "contents", analyzer);
            String contents = generateContent(label, contexts);
            Query contentQuery = contentParser.parse(contents);
            return contentQuery;
        }
        return null;
    }

    /*
    private ArrayList<String> filterContext(String [] contexts){
       ArrayList<String> relatedContext = new ArrayList<String>();
           
           
           
       return relatedContext;
    }
    */
    private Query getTypeQuery(String label) {

        if (label.contains("(") && label.contains(")")) {
            DisjunctionMaxQuery typesQuery = new DisjunctionMaxQuery(0);
            int typeStart = label.indexOf("(") + 1;
            int typeEnd = label.indexOf(")");

            String type = label.substring(typeStart, typeEnd);

            String[] types = type.split(";");

            for (int i = 0; i < types.length; i++) {
                Term typeTerm = new Term("type", types[i].trim());
                Query typeQuery = new TermQuery(typeTerm);
                //typeQuery.setBoost(15);
                typesQuery.add(typeQuery);
            }

            return typesQuery;
        } else {
            return null;
        }

    }

    public ArrayList<String> assignContextWeight(String label, String[] contexts, int context_limit) {

        ArrayList<String> weightedContext = new ArrayList<String>();
        for (int i = 0; i < contexts.length; i++) {
            String currentContext = contexts[i].replaceAll("\\(.*\\)", "");
            weightedContext.add(currentContext);
        }
        return weightedContext;
        /*
        int labelIndex = -1;
        boolean weightAssigned = false;
        for(int i = 0 ; i < contexts.length; i++){   
           //contexts[i]=contexts[i].toLowerCase();
           System.out.println("context: "+contexts[i]);
           String currentContext = contexts[i].replaceAll("\\(.*\\)", "");
           if(currentContext.equals(label)){
        labelIndex = i;
           }
           if(currentContext.contains("|")){
        weightAssigned = true;
        break;
           }
        }
        System.out.println(labelIndex);
        int startIndex = labelIndex - context_limit;
        if(startIndex < 0)
           startIndex = 0;
            
        int endIndex = labelIndex + context_limit;
            
            
        if(endIndex > contexts.length || labelIndex == -1)
           endIndex = contexts.length;
            
            
        for(int i = startIndex ; i < endIndex; i++){
           if(i == labelIndex)
        continue;
           if(!weightAssigned){
        int distance = Math.abs(labelIndex - i);
        double weight = contextWeightLamda*(double)contexts.length/((double)distance*distance);
        System.out.println("weight: "+weight);
        if(weight>weightThreshold){
           String currentContext = contexts[i].replaceAll("\\(.*\\)", "");
           weightedContext.add(currentContext);
           //weightedContext.add(contexts[i]+"|"+weight);
        }
           }
            
        }
        return weightedContext;
        */
    }

    public Query getRelationQuery(String label, ArrayList<String> weightedContext) {

        //System.out.println(label+" context size: "+weightedContext.size());

        if (weightedContext.size() < 1)
            return null;

        BooleanQuery relationQuery = new BooleanQuery();
        for (int i = 0; i < weightedContext.size(); i++) {
            //System.out.println("value: "+weightedContext.get(i));
            String context = weightedContext.get(i);
            //String currentContext = contexts[i].replaceAll("\\(.*\\)", "");
            if (context.equals(label))
                continue;

            float weight = 1;
            if (context.contains("|")) {
                String[] termWeight = context.split("\\|");
                context = termWeight[0];
                weight = Float.parseFloat(termWeight[1]);
            }

            //if(weight > weightThreshold){
            Term contextTerm = new Term("related_object", context);
            PayloadTermQuery contextTermQuery = new PayloadTermQuery(contextTerm, payloadFunctionMax);
            contextTermQuery.setBoost(weight);
            if (label.equals("")) {
                relationQuery.add(contextTermQuery, BooleanClause.Occur.MUST);
            } else {
                relationQuery.add(contextTermQuery, BooleanClause.Occur.SHOULD);
            }
            //}
        }
        if (relationQuery.clauses().size() == 0)
            return null;

        return relationQuery;
    }

    public Query parse(String label, String[] contexts) throws ParseException {

        label = label.toLowerCase().trim();
        for (int i = 0; i < contexts.length; i++) {
            contexts[i] = contexts[i].toLowerCase().trim();
        }

        Query typeQuery = getTypeQuery(label);
        label = label.replaceAll("\\(.*\\)", "");
        if (debug)
            System.out.println("new label: " + label);
        BooleanQuery luceneQuery = new BooleanQuery();
        ArrayList<String> dictLabels = dict_exp.getExpandQueryString(label);
        //      ArrayList<String> temp = new ArrayList<String>( Arrays.asList(contexts));
        //      if(dictLabels != null){
        //      for(String dictLabel:dictLabels){
        //         temp.add(dictLabel);         
        //      }   
        //      }
        //      contexts = temp.toArray(new String[temp.size()]);
        ArrayList<String> weightedContext = assignContextWeight(label, contexts, contexts.length);

        //System.out.println(label+" "+weightedContext.toString());

        Query labelQuery = getLabelQuery(label);
        //Query labelQuery = getLabelSpanQuery(label);
        Query contentQuery = getContentQuery(label, weightedContext);
        Query relationQuery = getRelationQuery(label, weightedContext);

        //      if(relationQuery!=null)
        //         relationQuery.setBoost(100);

        if (typeQuery != null) {
            typeQuery.setBoost(typeBoost);
            luceneQuery.add(typeQuery, BooleanClause.Occur.SHOULD);
        }

        if (labelQuery != null) {
            //         if(relationQuery == null && labelBoost == 1){
            //            labelQuery.setBoost(10);
            //         }
            labelQuery.setBoost(labelBoost);
            luceneQuery.add(labelQuery, BooleanClause.Occur.MUST);
        }

        if (contentQuery != null) {
            contentQuery.setBoost(contentBoost);
            luceneQuery.add(contentQuery, BooleanClause.Occur.SHOULD);
        }

        if (relationQuery != null) {
            //relationQuery.setBoost(2);
            relationQuery.setBoost(relationBoost);
            luceneQuery.add(relationQuery, BooleanClause.Occur.SHOULD);
        }

        //      System.out.println("parse: labelBoost: "+labelBoost);
        //      System.out.println("parse: contentBoost: "+contentBoost);
        //      System.out.println("parse: relationBoost: "+relationBoost);
        //      System.out.println("parse: typeBoost: "+typeBoost);
        //      System.out.println("parse: defaultBoost: "+defaultBoost);

        //      Query dictQuery = getDictExpandQuery(label, weightedContext);
        //      if(dictQuery != null){
        //         luceneQuery.add(dictQuery, BooleanClause.Occur.SHOULD);   
        //      }
        FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost"));
        boostQuery.setBoost(defaultBoost);

        finalQuery = new CustomScoreQuery(luceneQuery, boostQuery);

        return finalQuery;
    }

    public Query getFuzzyQuery(String label, String[] contexts) throws ParseException {
        label = label.toLowerCase().trim();
        for (int i = 0; i < contexts.length; i++) {
            contexts[i] = contexts[i].toLowerCase().trim();
        }

        Query typeQuery = getTypeQuery(label);
        label = label.replaceAll("\\(.*\\)", "");
        //System.out.println("new label: "+label);
        BooleanQuery luceneQuery = new BooleanQuery();
        ArrayList<String> weightedContext = assignContextWeight(label, contexts, contexts.length);

        //System.out.println(label+" "+weightedContext.toString());
        Term labelTerm = new Term("label", label);
        FuzzyQuery fuzzyLabelQuery = new FuzzyQuery(labelTerm);
        //Query labelQuery = getLabelQuery(label);   
        Query contentQuery = getContentQuery(label, weightedContext);
        Query relationQuery = getRelationQuery(label, weightedContext);

        //      if(relationQuery!=null)
        //         relationQuery.setBoost(100);

        if (typeQuery != null) {
            //typeQuery.setBoost(15);
            luceneQuery.add(typeQuery, BooleanClause.Occur.MUST);
        }

        if (fuzzyLabelQuery != null) {
            //         if(relationQuery == null)
            //            labelQuery.setBoost(10);
            luceneQuery.add(fuzzyLabelQuery, BooleanClause.Occur.MUST);
        }

        if (contentQuery != null) {
            luceneQuery.add(contentQuery, BooleanClause.Occur.SHOULD);
        }

        if (relationQuery != null) {
            //relationQuery.setBoost(2);
            luceneQuery.add(relationQuery, BooleanClause.Occur.SHOULD);
        }

        FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost"));

        finalQuery = new CustomScoreQuery(luceneQuery, boostQuery);

        return finalQuery;
    }

    public static boolean isRelated(IndexSearcher searcher, String label, String context) {
        try {
            label = label.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", "");
            context = context.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", "");

            BooleanQuery luceneQuery = new BooleanQuery();
            WeightedQuery wquery = new WeightedQuery();
            Query labelQuery = wquery.getSimpleLabelQuery(label);

            Term contextTerm = new Term("related_object", context);
            TermQuery relateQuery = new TermQuery(contextTerm);

            BooleanQuery directRelate = new BooleanQuery();

            directRelate.add(labelQuery, BooleanClause.Occur.MUST);
            directRelate.add(relateQuery, BooleanClause.Occur.MUST);

            luceneQuery.add(directRelate, BooleanClause.Occur.SHOULD);

            BooleanQuery reverseRelate = new BooleanQuery();
            Query rLabelQuery = wquery.getSimpleLabelQuery(context);
            Term rContextTerm = new Term("related_object", label);
            TermQuery rRelateQuery = new TermQuery(rContextTerm);

            reverseRelate.add(rLabelQuery, BooleanClause.Occur.MUST);
            reverseRelate.add(rRelateQuery, BooleanClause.Occur.MUST);

            luceneQuery.add(reverseRelate, BooleanClause.Occur.SHOULD);

            TopDocs topDocs = searcher.search(luceneQuery, 1);
            ScoreDoc[] hits = topDocs.scoreDocs;
            if (debug)
                System.out.println(luceneQuery.toString() + " " + hits.length);
            return hits.length == 1;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    public static boolean strictLabelRelate(IndexSearcher searcher, String label, String context) {
        try {
            BooleanQuery luceneQuery = new BooleanQuery();
            //WeightedQuery wquery = new WeightedQuery();

            context = context.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", "");

            Term term = new Term("label", label);
            PayloadTermQuery labelQuery = new PayloadTermQuery(term, payloadFunctionMax);

            //ptq.setBoost(5);
            //labelQuery.add(ptq);
            //Query labelQuery = wquery.getLabelQuery(label);

            Term contextTerm = new Term("related_object", context);
            TermQuery relateQuery = new TermQuery(contextTerm);

            luceneQuery.add(labelQuery, BooleanClause.Occur.MUST);
            luceneQuery.add(relateQuery, BooleanClause.Occur.MUST);

            TopDocs topDocs = searcher.search(luceneQuery, 1);
            ScoreDoc[] hits = topDocs.scoreDocs;
            //System.out.println(luceneQuery.toString()+" "+hits.length);
            return hits.length == 1;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    public static Query getCoOccurQuery(String label, String context) {
        try {
            BooleanQuery luceneQuery = new BooleanQuery();
            //WeightedQuery wquery = new WeightedQuery();

            context = context.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", "");

            Term term = new Term("related_object", label);
            TermQuery labelQuery = new TermQuery(term);

            Term contextTerm = new Term("related_object", context);
            TermQuery relateQuery = new TermQuery(contextTerm);

            luceneQuery.add(labelQuery, BooleanClause.Occur.MUST);
            luceneQuery.add(relateQuery, BooleanClause.Occur.MUST);

            return luceneQuery;
        } catch (Exception e) {
        }
        return null;
    }

    public String toString() {
        if (finalQuery == null)
            return "";
        return finalQuery.toString();
    }

    private String generateContent(String label, ArrayList<String> contexts) {
        String contents = "";
        //      HashSet<String> addedContext = new HashSet<String>();
        for (String str : contexts) {
            //         if(addedContext.contains(str)){
            //            continue;
            //         }
            if (str.contains("|")) {
                String[] termWeight = str.split("\\|");
                str = termWeight[0];
            }
            str = str.replaceAll("\\W", " ");
            /*
            if(str.contains("(")){
               int typeStart = str.indexOf("(");
               int typeEnd = str.indexOf(")");
                   
               //contents+=" "+label.substring(0,typeStart);
                   
               String type = str.substring(typeStart+1,typeEnd);
                   
               String [] types = type.split(";");
                   
               str = " "+str.substring(0,typeStart);
                   
               for(int i=0; i < types.length; i++){
                  str += " "+types[i];
               }
                   
                   
                   
            }
            */
            contents += " " + str;
            //         addedContext.add(str);

        }
        return contents;
    }

}