List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery
BooleanQuery
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
public static boolean strictLabelRelate(IndexSearcher searcher, String label, String context) { try {//w w w . java 2 s . c om BooleanQuery luceneQuery = new BooleanQuery(); //WeightedQuery wquery = new WeightedQuery(); context = context.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", ""); Term term = new Term("label", label); PayloadTermQuery labelQuery = new PayloadTermQuery(term, payloadFunctionMax); //ptq.setBoost(5); //labelQuery.add(ptq); //Query labelQuery = wquery.getLabelQuery(label); Term contextTerm = new Term("related_object", context); TermQuery relateQuery = new TermQuery(contextTerm); luceneQuery.add(labelQuery, BooleanClause.Occur.MUST); luceneQuery.add(relateQuery, BooleanClause.Occur.MUST); TopDocs topDocs = searcher.search(luceneQuery, 1); ScoreDoc[] hits = topDocs.scoreDocs; //System.out.println(luceneQuery.toString()+" "+hits.length); return hits.length == 1; } catch (Exception e) { e.printStackTrace(); } return false; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
public static Query getCoOccurQuery(String label, String context) { try {//from www . j a v a 2s . c o m BooleanQuery luceneQuery = new BooleanQuery(); //WeightedQuery wquery = new WeightedQuery(); context = context.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", ""); Term term = new Term("related_object", label); TermQuery labelQuery = new TermQuery(term); Term contextTerm = new Term("related_object", context); TermQuery relateQuery = new TermQuery(contextTerm); luceneQuery.add(labelQuery, BooleanClause.Occur.MUST); luceneQuery.add(relateQuery, BooleanClause.Occur.MUST); return luceneQuery; } catch (Exception e) { } return null; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQueryFilterBackup.java
License:Open Source License
private Query getLabelQuery(String label) throws ParseException { float weight = 1; if (!label.equals("")) { //DisjunctionMaxQuery labelQuery = new DisjunctionMaxQuery(0); BooleanQuery labelQuery = new BooleanQuery(); if (label.contains("|")) { String[] termWeight = label.split("\\|"); label = termWeight[0];/*w w w . j a v a 2 s . com*/ weight = Float.parseFloat(termWeight[1]); } label = Utils.removeChars(label); Term term = new Term("label", label); //TermQuery termQuery = new TermQuery(term); PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunctionMax); //ptq.setBoost(5); labelQuery.add(ptq, Occur.SHOULD); //labelQuery.add(termQuery); // Term defaultTerm = new Term("defaultLabel",label); // //TermQuery defaultTermQuery = new TermQuery(defaultTerm); // PayloadTermQuery defaultTermptq = new PayloadTermQuery(defaultTerm, payloadFunctionMax); // //defaultTermptq.setBoost(10); // labelQuery.add(defaultTermptq,Occur.SHOULD);//, Occur.SHOULD //labelQuery.add(defaultTermQuery); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); QueryParser parser = new QueryParser(Version.LUCENE_47, "analyzedLabel", analyzer); parser.setDefaultOperator(Operator.AND); Query parsedQuery = parser.parse(label); //parsedQuery.setBoost((float) 0.1); labelQuery.add(parsedQuery, Occur.SHOULD); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); // QueryParser parser = new QueryParser(Version.LUCENE_47,"label", analyzer); // String label_re = "*"+label.replaceAll(" ", "*"); // Query parsedQuery = parser.parse(label_re); // labelQuery.add(parsedQuery,Occur.SHOULD); // ComplexPhraseQueryParser cpqp = new ComplexPhraseQueryParser(Version.LUCENE_47, "label", analyzer); // String label_re = label.replaceAll(" ", "*"); // Query parsedQuery = cpqp.parse(label_re); // labelQuery.add(parsedQuery,Occur.SHOULD); //too slow // String rq_label = label.replaceAll(" ", ".*"); // rq_label = ".*"+rq_label+".*"; // RegexQuery rq = new RegexQuery(new Term("label",rq_label)); // labelQuery.add(rq,Occur.SHOULD); // String [] label_parts = label.split(" "); // Term[] tTerms = new Term[label_parts.length]; // for(int i = 0; i < label_parts.length; i++){ // tTerms[i] = new Term("label", label_parts[i]); // } // MultiPhraseQuery multiPhrasequery = new MultiPhraseQuery(); // multiPhrasequery.add( tTerms ); // labelQuery.add(multiPhrasequery,Occur.SHOULD); // String [] label_parts = label.split(" "); // SpanNearQuery [] snq = new SpanNearQuery[label_parts.length]; // //ArrayList<SpanNearQuery> span_terms = new ArrayList<SpanNearQuery>(); // for(int i = 0; i < label_parts.length; i++){ // SpanTermQuery [] stq = {new SpanTermQuery(new Term("label", label_parts[i]))}; // snq[i] = new SpanNearQuery(stq,10,false); // } // SpanNearQuery final_snq = new SpanNearQuery(snq,10,false); // labelQuery.add(final_snq,Occur.SHOULD); // // QueryParser defaultLabelParser = new QueryParser(Version.LUCENE_47,"defaultLabel", analyzer); // Query defaultLabelQuery = defaultLabelParser.parse(label); // //defaultLabelQuery.setBoost(10); // labelQuery.add(defaultLabelQuery, BooleanClause.Occur.SHOULD); labelQuery.setBoost(weight); return labelQuery; } return null; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQueryFilterBackup.java
License:Open Source License
public Query parse(String label, String[] contexts) throws ParseException { label = label.toLowerCase().trim();// www . ja v a 2s .c o m for (int i = 0; i < contexts.length; i++) { contexts[i] = contexts[i].toLowerCase().trim(); } CachingWrapperFilter typeFilter = getTypeFilter(label); label = label.replaceAll("\\(.*\\)", ""); //System.out.println("new label: "+label); BooleanQuery luceneQuery = new BooleanQuery(); ArrayList<String> weightedContext = assignContextWeight(label, contexts, contexts.length); //System.out.println(label+" "+weightedContext.toString()); Query labelQuery = getLabelQuery(label); Query contentQuery = getContentQuery(label, weightedContext); Query relationQuery = getRelationQuery(label, weightedContext); // if(relationQuery!=null) // relationQuery.setBoost(100); if (labelQuery != null) { luceneQuery.add(labelQuery, BooleanClause.Occur.MUST); } FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost")); if (relationQuery == null && labelQuery != null) { labelQuery.setBoost(10); boostQuery.setBoost(5); } if (contentQuery != null && relationQuery != null) { luceneQuery.add(contentQuery, BooleanClause.Occur.SHOULD); } if (relationQuery != null) { //relationQuery.setBoost(2); luceneQuery.add(relationQuery, BooleanClause.Occur.SHOULD); } Query query = new CustomScoreQuery(luceneQuery, boostQuery); if (typeFilter != null) { finalQuery = new FilteredQuery(query, typeFilter); return finalQuery; } return query; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQueryFilterBackup.java
License:Open Source License
public Query getFuzzyQuery(String label, String[] contexts) throws ParseException { label = label.toLowerCase().trim();/* w ww . j a v a2 s. c o m*/ for (int i = 0; i < contexts.length; i++) { contexts[i] = contexts[i].toLowerCase().trim(); } CachingWrapperFilter typeFilter = getTypeFilter(label); label = label.replaceAll("\\(.*\\)", ""); ArrayList<String> weightedContext = assignContextWeight(label, contexts, contexts.length); Term labelTerm = new Term("label", label); FuzzyQuery fuzzyLabelQuery = new FuzzyQuery(labelTerm); BooleanQuery luceneQuery = new BooleanQuery(); if (fuzzyLabelQuery != null) { luceneQuery.add(fuzzyLabelQuery, BooleanClause.Occur.MUST); } Query contentQuery = getContentQuery(label, weightedContext); Query relationQuery = getRelationQuery(label, weightedContext); FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost")); if (relationQuery == null && fuzzyLabelQuery != null) { fuzzyLabelQuery.setBoost(5); boostQuery.setBoost(5); } if (contentQuery != null && relationQuery != null) { luceneQuery.add(contentQuery, BooleanClause.Occur.SHOULD); } if (relationQuery != null) { //relationQuery.setBoost(2); luceneQuery.add(relationQuery, BooleanClause.Occur.SHOULD); } Query query = new CustomScoreQuery(luceneQuery, boostQuery); if (typeFilter != null) { finalQuery = new FilteredQuery(query, typeFilter); return finalQuery; } return query; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQueryFilterBackup.java
License:Open Source License
public static boolean isRelated(IndexSearcher searcher, String label, String context) { try {//from ww w . ja v a 2 s .co m BooleanQuery luceneQuery = new BooleanQuery(); WeightedQueryFilterBackup wquery = new WeightedQueryFilterBackup(); Query labelQuery = wquery.getLabelQuery(label); context = context.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", ""); Term contextTerm = new Term("related_object", context); TermQuery relateQuery = new TermQuery(contextTerm); luceneQuery.add(labelQuery, BooleanClause.Occur.MUST); luceneQuery.add(relateQuery, BooleanClause.Occur.MUST); TopDocs topDocs = searcher.search(luceneQuery, 1); ScoreDoc[] hits = topDocs.scoreDocs; //System.out.println(luceneQuery.toString()+" "+hits.length); return hits.length == 1; } catch (Exception e) { e.printStackTrace(); } return false; }
From source file:edu.rpi.tw.linkipedia.search.recognizer.EntityRecognizer.java
License:Open Source License
private boolean lookup(String query) { //try dictionary String type = dicts.search(query); if (type != null) { System.out.println(query + " search by dicts"); return true; } else {//from w w w. j a v a2 s .c o m System.out.println(query + " search by dicts FAILED"); } BooleanQuery luceneQuery = new BooleanQuery(); try { query = query.toLowerCase(); query = query.trim(); //System.out.println("query: "+query); Term term = new Term("lookUpLabel", query); TermQuery termQuery = new TermQuery(term);//parser.parse(query); Term labelTerm = new Term("label", query); PayloadTermQuery ptq = new PayloadTermQuery(labelTerm, payloadFunction); luceneQuery.add(termQuery, Occur.SHOULD); luceneQuery.add(ptq, Occur.SHOULD); //FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost")); //Query finalQuery = new CustomScoreQuery(luceneQuery, boostQuery); TopDocs topDocs = searcher.search(luceneQuery, 1); ScoreDoc[] hits = topDocs.scoreDocs; // if(hits.length == 0){ // System.out.println("look up: "+luceneQuery.toString()+" failed"); // } //System.out.println("look up: "+luceneQuery.toString()+" "+hits.length); return hits.length == 1; } catch (Exception e) { e.printStackTrace(); } return false; /* Document doc = null; try { doc = searcher.doc(hits[0].doc); weight = hits[0].score; System.out.println(query+": "+doc.get("url")+" "+weight); } catch (Exception e) { e.printStackTrace(); } return weight; */ }
From source file:edu.rpi.tw.linkipedia.search.searching.SurfaceFormSearcher.java
License:Open Source License
public ArrayList<Annotation> mysearch(String query) { TopScoreDocCollector collector = TopScoreDocCollector.create(10, false); results = new ArrayList<Annotation>(); try {/* w w w.ja v a 2 s . co m*/ BooleanQuery luceneQuery = new BooleanQuery(); Term term = new Term("label", query); //TermQuery termQuery = new TermQuery(term); PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunction); ptq.setBoost(10); Analyzer analyzer = DefaultAnalyzer.getAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_47, "analyzedLabel", analyzer); Query parsedQuery = parser.parse(query); //luceneQuery.add(termQuery, BooleanClause.Occur.SHOULD); luceneQuery.add(ptq, BooleanClause.Occur.SHOULD); luceneQuery.add(parsedQuery, BooleanClause.Occur.SHOULD); FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost")); //boostQuery.setBoost(100); Query finalQuery = new CustomScoreQuery(luceneQuery, boostQuery); System.out.println(finalQuery); searcher.search(finalQuery, collector); } catch (Exception e) { e.printStackTrace(); } ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); int start = 0; int end = Math.min(numTotalHits, 10); for (int i = start; i < end; i++) { String thisResult = ""; Document doc = null; try { doc = searcher.doc(hits[i].doc); } catch (Exception e) { e.printStackTrace(); } String url = doc.get("url"); thisResult += url;//+" "+hits[i].score; String[] triples = doc.getValues("triple"); // for(String label:labels){ // thisResult += label+"\n"; // } Annotation annotation = new Annotation(query, url, hits[i].score); annotation.setTriples(new ArrayList<String>(Arrays.asList(triples))); results.add(annotation); } return results; }
From source file:edu.rpi.tw.linkipedia.search.similarity.ValidationRank.java
License:Open Source License
private boolean isDirectRelated(String url1, String url2) { try {/*w ww .j ava 2s . c o m*/ if (dir_cache.containsKey(url2 + " " + url1)) { System.out.println( "direct relate cache found: " + url2 + " " + url1 + " " + dir_cache.get(url2 + " " + url1)); return dir_cache.get(url2 + " " + url1) == 1; } // if(cache.containsKey(url1+" "+url2)){ // System.out.println("cache found 2: "+url2+" "+url1); // return cache.get(url1+" "+url2); // } BooleanQuery luceneQuery = new BooleanQuery(); TermQuery termQuery = new TermQuery(new Term("defaultLabel", url1)); //Query labelQuery = wquery.getLabelQuery(url1); Term contextTerm = new Term("related_object", url2); TermQuery relateQuery = new TermQuery(contextTerm); luceneQuery.add(termQuery, BooleanClause.Occur.MUST); luceneQuery.add(relateQuery, BooleanClause.Occur.MUST); TopDocs docs = searcher.search(luceneQuery, 1); dir_cache.put(url1 + " " + url2, (float) docs.scoreDocs.length); dir_cache.put(url2 + " " + url1, (float) docs.scoreDocs.length); System.out.println("direct: " + url1 + " | " + url2 + " : " + docs.scoreDocs.length); return docs.scoreDocs.length == 1; } catch (Exception e) { e.printStackTrace(); } return false; }
From source file:edu.rpi.tw.linkipedia.search.similarity.ValidationRank.java
License:Open Source License
private Query getCoOccurQuery(String url1, String url2) { try {//from w ww.j a v a 2s . c om BooleanQuery luceneQuery = new BooleanQuery(); Term term = new Term("related_object", url1); TermQuery labelQuery = new TermQuery(term); Term contextTerm = new Term("related_object", url2); TermQuery relateQuery = new TermQuery(contextTerm); luceneQuery.add(labelQuery, BooleanClause.Occur.MUST); luceneQuery.add(relateQuery, BooleanClause.Occur.MUST); return luceneQuery; } catch (Exception e) { } return null; }