List of usage examples for org.apache.lucene.search BooleanQuery BooleanQuery
BooleanQuery
From source file:edu.harvard.iq.dvn.core.web.StudyListingPage.java
License:Apache License
public String search_actionNew() { logger.fine("Entered search_actionNew on StudyListingPage.java"); DvnQuery dvnQuery = new DvnQuery(); searchField = (searchField == null) ? "any" : searchField; // default searchField, in case no dropdown List searchTerms = new ArrayList(); SearchTerm st = new SearchTerm(); st.setFieldName(searchField);/*from w w w . jav a2 s. c om*/ st.setValue(searchValue); // "Search Studies" by default searchTerms.add(st); dvnQuery.setSearchTerms(searchTerms); ResultsWithFacets resultsWithFacets = new ResultsWithFacets(); List studyIDList = new ArrayList(); Map variableMap = new HashMap(); Map fileMap = new HashMap(); Map versionMap = new HashMap(); List displayVersionList = new ArrayList(); // currently search filter is determined from a set of boolean checkboxes int searchFilter = 0; if (renderSearchResultsFilter && searchResultsFilter) { searchFilter = 2; } if (renderSearchCollectionFilter && searchCollectionFilter) { searchFilter = 1; } if (searchField.equals("variable")) { List variables = null; if (searchFilter == 1) { // just this collection List collections = new ArrayList(); collections.add(vdcCollectionService.find(studyListing.getCollectionId())); variables = indexService.searchVariables(getVDCRequestBean().getCurrentVDC(), collections, st); } else if (searchFilter == 2) { // subsearch variables = indexService.searchVariables(studyListing.getStudyIds(), st); } else { variables = indexService.searchVariables(getVDCRequestBean().getCurrentVDC(), st); } varService.determineStudiesFromVariables(variables, studyIDList, variableMap); } else { logger.fine("searchFilter = " + searchFilter); if (searchFilter == 1) { // just this collection // List collections = new ArrayList(); // collections.add(vdcCollectionService.find(studyListing.getCollectionId())); // studyIDList = indexService.search(getVDCRequestBean().getCurrentVDC(), collections, searchTerms); // old non-faceted method above /** * @todo: refactor? code is similar to getCollectionQueries in * Indexer.java? */ Query finalQuery = null; QueryParser parser = new QueryParser(Version.LUCENE_30, "abstract", new DVNAnalyzer()); parser.setDefaultOperator(QueryParser.AND_OPERATOR); StringBuilder sbOuter = new StringBuilder(); logger.fine("finding collection for id: " + studyListing.getCollectionId()); VDCCollection col = vdcCollectionService.find(studyListing.getCollectionId()); String type = col.getType(); String queryString = col.getQuery(); boolean isDynamic = col.isDynamic(); boolean isLocalScope = col.isLocalScope(); boolean isSubnetworkScope = col.isSubnetworkScope(); boolean isRootCollection = col.isRootCollection(); logger.fine("Single collection query... For " + col.getName() + " (isRootCollection=" + isRootCollection + "|type=" + type + "|isDynamic=" + isDynamic + "|isLocalScope=" + isLocalScope + ") query: <<<" + queryString + ">>>"); if (queryString != null && !queryString.isEmpty()) { try { logger.fine("For " + col.getName() + " (isRootCollection=" + isRootCollection + "|type=" + type + "|isDynamic=" + isDynamic + "|isLocalScope=" + isLocalScope + ") adding query: <<<" + queryString + ">>>"); Query dynamicQuery = parser.parse(queryString); if (isLocalScope) { BooleanQuery dynamicLocal = new BooleanQuery(); //Query dvOwnerIdQuery = indexService.constructDvOwnerIdQuery(getVDCRequestBean().getCurrentVDC()); Query dvOwnerIdQuery = indexService.constructDvOwnerIdQuery(col.getOwner()); dynamicLocal.add(dynamicQuery, BooleanClause.Occur.MUST); dynamicLocal.add(dvOwnerIdQuery, BooleanClause.Occur.MUST); finalQuery = dynamicLocal; } else if (isSubnetworkScope) { BooleanQuery dynamicLocal = new BooleanQuery(); Long subNetId = getVDCRequestBean().getCurrentVdcNetwork().getId(); //Query dvnetIdQuery = indexService.constructNetworkIdQuery(getVDCRequestBean().getCurrentVdcNetwork().getId()); Query dvnetIdQuery = indexService .constructNetworkOwnerIdQuery(col.getOwner().getVdcNetwork().getId()); dynamicLocal.add(dynamicQuery, BooleanClause.Occur.MUST); dynamicLocal.add(dvnetIdQuery, BooleanClause.Occur.MUST); finalQuery = dynamicLocal; } else { finalQuery = dynamicQuery; } } catch (org.apache.lucene.queryParser.ParseException ex) { Logger.getLogger(StudyListingPage.class.getName()).log(Level.SEVERE, null, ex); } } else { logger.fine("For " + col.getName() + " (isRootCollection=" + isRootCollection + "|type=" + type + "|isDynamic=" + isDynamic + "|isLocalScope=" + isLocalScope + ") skipping add of query: <<<" + queryString + ">>>"); List<Study> studies = col.getStudies(); StringBuilder sbInner = new StringBuilder(); for (Study study : studies) { logger.fine("- has StudyId: " + study.getId()); String idColonId = "id:" + study.getId().toString() + " "; sbInner.append(idColonId); } if (isRootCollection) { List<Long> rootCollectionStudies = vdcService.getOwnedStudyIds(col.getOwner().getId()); for (Long id : rootCollectionStudies) { logger.fine("- has StudyId: " + id); String idColonId = "id:" + id.toString() + " "; sbInner.append(idColonId); } } logger.fine("sbInner: " + sbInner.toString()); sbOuter.append(sbInner); } logger.fine("sbOuter: " + sbOuter); if (!sbOuter.toString().isEmpty()) { try { parser.setDefaultOperator(QueryParser.OR_OPERATOR); Query staticColQuery = parser.parse(sbOuter.toString()); parser.setDefaultOperator(QueryParser.AND_OPERATOR); logger.fine("staticCollectionQuery: " + staticColQuery); finalQuery = staticColQuery; } catch (org.apache.lucene.queryParser.ParseException ex) { Logger.getLogger(AdvSearchPage.class.getName()).log(Level.SEVERE, null, ex); } } dvnQuery.setSingleCollectionQuery(finalQuery); dvnQuery.setVdc(getVDCRequestBean().getCurrentVDC()); dvnQuery.constructQuery(); resultsWithFacets = indexService.searchNew(dvnQuery); studyIDList = resultsWithFacets.getMatchIds(); } else if (searchFilter == 2) { // subsearch logger.fine( "with these results searches disabled per https://redmine.hmdc.harvard.edu/issues/2969 "); // studyIDList = indexService.search(studyListing.getStudyIds(), searchTerms); // old method // dvnQuery.setLimitToStudyIds(studyListing.getStudyIds()); // dvnQuery.setSearchTerms(searchTerms); // dvnQuery.constructQuery(); // resultsWithFacets = indexService.searchNew(dvnQuery); // studyIDList = resultsWithFacets.getMatchIds(); } else { logger.fine("single collection not selected"); logger.fine("current subnetwork: " + getVDCRequestBean().getCurrentVdcNetwork().getId()); dvnQuery.setVdc(getVDCRequestBean().getCurrentVDC()); if (dvnQuery.getVdc() != null) { /** * At the dataverse level, search should not be affected by * the value of getVDCRequestBean().getCurrentVdcNetwork() * which tells us which subnetwork we are currently in. * * That is to say, we don't need to bother to check that * value or make any decisions based on it. */ dvnQuery.setDvOwnerIdQuery(indexService.constructDvOwnerIdQuery(dvnQuery.getVdc())); dvnQuery.setCollectionQueries(indexService.getCollectionQueries(dvnQuery.getVdc())); dvnQuery.setSearchTerms(searchTerms); dvnQuery.constructQuery(); resultsWithFacets = indexService.searchNew(dvnQuery); studyIDList = resultsWithFacets.getMatchIds(); } else { Long rootSubnetworkId = getVDCRequestBean().getVdcNetwork().getId(); Long currentSubnetworkId = getVDCRequestBean().getCurrentVdcNetwork().getId(); if (!currentSubnetworkId.equals(rootSubnetworkId)) { Query subNetworkQuery = indexService.constructNetworkIdQuery(currentSubnetworkId); dvnQuery.setSubNetworkQuery(subNetworkQuery); } dvnQuery.setSearchTerms(searchTerms); dvnQuery.constructQuery(); resultsWithFacets = indexService.searchNew(dvnQuery); studyIDList = resultsWithFacets.getMatchIds(); } } if (searchField.equals("any")) { List<Long> versionIds = indexService.searchVersionUnf(getVDCRequestBean().getCurrentVDC(), searchValue); Iterator iter = versionIds.iterator(); Long studyId = null; while (iter.hasNext()) { // List<StudyVersion> svList = new ArrayList<StudyVersion>(); Long vId = (Long) iter.next(); StudyVersion sv = null; try { sv = studyService.getStudyVersionById(vId); studyId = sv.getStudy().getId(); List<StudyVersion> svList = (List<StudyVersion>) versionMap.get(studyId); if (svList == null) { svList = new ArrayList<StudyVersion>(); } svList.add(sv); if (!studyIDList.contains(studyId)) { displayVersionList.add(studyId); studyIDList.add(studyId); } versionMap.put(studyId, svList); } catch (IllegalArgumentException e) { e.printStackTrace(); } } } } // now we handle the display of the page // first get the bound collection tree collectionTree = studyListing.getCollectionTree(); // now create the new StudyListing studyListing = new StudyListing(StudyListing.SEARCH); studyListing.setVdcId(getVDCRequestBean().getCurrentVDCId()); studyListing.setStudyIds(studyIDList); studyListing.setResultsWithFacets(resultsWithFacets); studyListing.setSearchTerms(searchTerms); studyListing.setVariableMap(variableMap); studyListing.setVersionMap(versionMap); studyListing.setCollectionTree(collectionTree); studyListing.setDisplayStudyVersionsList(displayVersionList); renderFacets = true; String studyListingIndex = StudyListing.addToStudyListingMap(studyListing, getSessionMap()); return "/StudyListingPage.xhtml?faces-redirect=true&studyListingIndex=" + studyListingIndex + getContextSuffix(); }
From source file:edu.ku.brc.specify.dbsupport.cleanuptools.GeoCleanupFuzzySearch.java
License:Open Source License
public static void main(String[] args) throws IOException { //For Debug// ww w . j a v a2 s.c o m String connectStr = "jdbc:mysql://localhost/testfish"; String username = "root"; String password = "root"; DBConnection dbConn; // Debug dbConn = DBConnection.getInstance(); dbConn.setConnectionStr(connectStr); dbConn.setDatabaseName("stats"); dbConn.setUsernamePassword(username, password); dbConn.setDriver("com.mysql.jdbc.Driver"); boolean doBuildIndex = false; //String indexLocation = "/Users/rods/Downloads/lucene/geonames-index"; String indexLocation = "/Users/rods/Documents/Specify/geonames-index"; GeoCleanupFuzzySearch indexer = null; try { indexer = new GeoCleanupFuzzySearch(null); if (doBuildIndex) { indexer.startIndexingProcessSync(1, null); } } catch (Exception ex) { System.out.println("Cannot create index..." + ex.getMessage()); System.exit(-1); } // =================================================== // after adding, we always have to call the // closeIndex, otherwise the index is not created // =================================================== // indexer.closeIndex(); // ========================================================= // Now search // ========================================================= IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); boolean doFuzzy = false; boolean doTerm = false; boolean doParse = true; if (doFuzzy) { System.out.println("-------------------------- Fuzzy -----------------------"); String[] searchStrs = { "Comoro Islands", "Solomon", "united states iowa", "germany brandenburg", "bulgaria sofia", "costa rica alajuela", "costa rica cartago", "costa rica alajuela", "canada newfoundland", "mexico campeche", "australia ashmore and cartier islands", "fiji lau", "fiji lomaiviti", "guam agana", "germany Lower Saxony", "germany Saxony", "germany Sachsen Anhalt", "germany Sachsen-Anhalt", "germany Land Sachsen-Anhalt", "united states iowa,Fayette", "united states iowa Fayette County", "Argentina Buenos Aires", "buenos aires argentina ", }; for (String searchText : searchStrs) { try { Query query = new FuzzyQuery(new Term("name", searchText)); TopDocs docs = searcher.search(query, 10); ScoreDoc[] hits = docs.scoreDocs; System.out.println(searchText + " -> Hits " + hits.length + " hits [" + query + "]"); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("name") + " score=" + hits[i].score); } } catch (Exception e) { System.out.println("Error searching " + searchText + " : " + e.getMessage()); } } } if (doTerm) { System.out.println("-------------------------- Terms -----------------------"); String[] searchStrs = { "Comoro Islands", "Solomon", "united states,iowa", "germany,brandenburg", "bulgaria,sofia", "costa rica,alajuela", "costa rica,cartago", "costa rica,alajuela", "canada,newfoundland", "mexico,campeche", "australia,ashmore and cartier islands", "fiji,lau", "fiji,lomaiviti", "guam,agana", "germany,Lower Saxony", "germany,Saxony", "germany,Sachsen Anhalt", "germany,Sachsen-Anhalt", "germany,Land Sachsen-Anhalt", "united states,iowa,Fayette", "united states,iowa,Fayette County", "argentina,buenos aires", "Argentina,Buenos Aires", }; for (String searchText : searchStrs) { try { String[] tokens = StringUtils.split(searchText, ','); BooleanQuery query = new BooleanQuery(); TermQuery t1 = new TermQuery(new Term("country", tokens[0])); t1.setBoost(0.2f); query.add(t1, Occur.SHOULD); if (tokens.length > 1) { TermQuery t2 = new TermQuery(new Term("state", tokens[1])); t2.setBoost(0.4f); query.add(t2, Occur.SHOULD); } if (tokens.length > 2) { TermQuery t3 = new TermQuery(new Term("county", tokens[2])); t3.setBoost(0.8f); query.add(t3, Occur.MUST); } TopDocs docs = searcher.search(query, 20); ScoreDoc[] hits = docs.scoreDocs; System.out.println(searchText + " -> Hits " + hits.length + " hits [" + query + "]"); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("name") + " score=" + hits[i].score); } } catch (Exception e) { System.out.println("Error searching " + searchText + " : " + e.getMessage()); } } } if (doParse) { System.out.println("-------------------------- Parsing -----------------------"); String[] searchStrs = { "Comoro Islands", "Bahamas Elbow Bank", // "Solomon", // "united states iowa", // "germany brandenburg", // "bulgaria sofia", // "costa rica alajuela", // "costa rica cartago", // "costa rica alajuela", // "canada newfoundland", // "mexico campeche", // "australia ashmore and cartier islands", // "fiji lau", // "fiji lomaiviti", // "guam agana", // "germany Lower Saxony", // "germany Saxony", // "germany Sachsen Anhalt", // "germany Sachsen-Anhalt", // "germany Land Sachsen-Anhalt", // "united states iowa,Fayette", // "united states iowa Fayette County", // "Argentina Buenos Aires", // "buenos aires argentina " }; for (String searchText : searchStrs) { try { TopScoreDocCollector collector = TopScoreDocCollector.create(5, true); Query q = new QueryParser(Version.LUCENE_47, "name", analyzer).parse(searchText); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; if (hits != null) { System.out.println(searchText + " -> Hits " + hits.length + " hits."); // System.out.println("For: ["+seatchText+"] Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); if (d != null) { System.out.println((i + 1) + ". " + d.get("name") + " score=" + hits[i].score); } else { System.err.println("Doc was null searching " + searchText); } } } else { System.err.println("Hits was null searching " + searchText); } } catch (Exception e) { e.printStackTrace(); System.err.println("Error searching " + searchText + " : " + e.getMessage()); } } } }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
private Query getDictExpandQuery(String label, ArrayList<String> weightedContext) throws ParseException { BooleanQuery luceneQuery = new BooleanQuery(); ArrayList<String> dictLabels = dict_exp.getExpandQueryString(label); if (dictLabels == null) return luceneQuery; for (String dictLabel : dictLabels) { if (dictLabel != null) { Query dictlabelQuery = getSimpleLabelQuery(dictLabel); //Query dictcontentQuery = getContentQuery(dictLabel,weightedContext); // Query dictrelationQuery = getRelationQuery(dictLabel, weightedContext); if (dictlabelQuery != null) { // if(relationQuery == null && labelBoost == 1){ // labelQuery.setBoost(10); // } dictlabelQuery.setBoost(labelBoost); luceneQuery.add(dictlabelQuery, BooleanClause.Occur.SHOULD); }/*from w w w .j a va 2 s. co m*/ // if(dictcontentQuery != null){ // dictcontentQuery.setBoost(contentBoost); // luceneQuery.add(dictcontentQuery, BooleanClause.Occur.SHOULD); // } // if(dictrelationQuery != null){ // //relationQuery.setBoost(2); // dictrelationQuery.setBoost(relationBoost); // luceneQuery.add(dictrelationQuery, BooleanClause.Occur.SHOULD); // } } } return luceneQuery; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
private Query getSimpleLabelQuery(String label) throws ParseException { //DisjunctionMaxQuery labelQuery = new DisjunctionMaxQuery(0); float weight = 1; BooleanQuery labelQuery = new BooleanQuery(); if (label.contains("|")) { String[] termWeight = label.split("\\|"); label = termWeight[0];//from ww w . j av a2s .c o m weight = Float.parseFloat(termWeight[1]); } label = Utils.removeChars(label); Term term = new Term("label", label); //TermQuery termQuery = new TermQuery(term); PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunctionMax); //ptq.setBoost(5); labelQuery.add(ptq, Occur.SHOULD); String[] label_parts = label.split(" "); PhraseQuery pq = new PhraseQuery(); for (int i = 0; i < label_parts.length; i++) { pq.add(new Term("analyzedLabel", label_parts[i])); } // int gap = (new StandardAnalyzer(Version.LUCENE_47)).getPositionIncrementGap("analyzedLabel"); pq.setSlop(10); labelQuery.add(pq, Occur.SHOULD); // QueryParser defaultLabelParser = new QueryParser(Version.LUCENE_47,"defaultLabel", analyzer); // Query defaultLabelQuery = defaultLabelParser.parse(label); // //defaultLabelQuery.setBoost(10); // labelQuery.add(defaultLabelQuery, BooleanClause.Occur.SHOULD); labelQuery.setBoost(weight); return labelQuery; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
private Query getLabelSpanQuery(String mylabel) throws ParseException { float weight = 1; if (!mylabel.equals("")) { //DisjunctionMaxQuery labelQuery = new DisjunctionMaxQuery(0); BooleanQuery labelQuery = new BooleanQuery(); if (mylabel.contains("|")) { String[] termWeight = mylabel.split("\\|"); mylabel = termWeight[0];//from w w w.j av a 2s. c o m weight = Float.parseFloat(termWeight[1]); } for (String label : mylabel.split("\\*")) { label = Utils.removeChars(label); Term term = new Term("label", label); //TermQuery termQuery = new TermQuery(term); PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunctionMax); //ptq.setBoost(5); labelQuery.add(ptq, Occur.SHOULD); //labelQuery.add(termQuery,Occur.SHOULD); // String dictLabel = getDictExpandQuery(label); // if(dictLabel != null){ // Term dictTerm = new Term("label",dictLabel); // //TermQuery termQuery = new TermQuery(term); // PayloadTermQuery dictptq = new PayloadTermQuery(dictTerm, payloadFunctionMax); // //ptq.setBoost(5); // labelQuery.add(dictptq,Occur.SHOULD); // } // label = Utils.toPhrase(label); // Term pterm = new Term("label",label); // //TermQuery termQuery = new TermQuery(term); // PayloadTermQuery pptq = new PayloadTermQuery(pterm, payloadFunctionMax); // //ptq.setBoost(5); // labelQuery.add(pptq,Occur.SHOULD); // Term defaultTerm = new Term("defaultLabel",label); // //TermQuery defaultTermQuery = new TermQuery(defaultTerm); // PayloadTermQuery defaultTermptq = new PayloadTermQuery(defaultTerm, payloadFunctionMax); // //defaultTermptq.setBoost(10); // labelQuery.add(defaultTermptq,Occur.SHOULD);//, Occur.SHOULD //labelQuery.add(defaultTermQuery); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); // QueryParser parser = new QueryParser(Version.LUCENE_47,"analyzedLabel", analyzer); // parser.setDefaultOperator(Operator.AND); // Query parsedQuery = parser.parse(label); // //parsedQuery.setBoost((float) 0.1); // labelQuery.add(parsedQuery,Occur.SHOULD); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); // QueryParser parser = new QueryParser(Version.LUCENE_47,"label", analyzer); // String label_re = "*"+label.replaceAll(" ", "*"); // Query parsedQuery = parser.parse(label_re); // labelQuery.add(parsedQuery,Occur.SHOULD); // ComplexPhraseQueryParser cpqp = new ComplexPhraseQueryParser(Version.LUCENE_47, "label", analyzer); // String label_re = label.replaceAll(" ", "*"); // Query parsedQuery = cpqp.parse(label_re); // labelQuery.add(parsedQuery,Occur.SHOULD); //too slow // String rq_label = label.replaceAll(" ", ".*"); // rq_label = ".*"+rq_label+".*"; // RegexQuery rq = new RegexQuery(new Term("label",rq_label)); // labelQuery.add(rq,Occur.SHOULD); // String [] label_parts = label.split(" "); // Term[] tTerms = new Term[label_parts.length]; // for(int i = 0; i < label_parts.length; i++){ // tTerms[i] = new Term("label", label_parts[i]); // } // MultiPhraseQuery multiPhrasequery = new MultiPhraseQuery(); // multiPhrasequery.add( tTerms ); // labelQuery.add(multiPhrasequery,Occur.SHOULD); // String [] label_parts = label.split(" "); // //SpanNearQuery [] snq = new SpanNearQuery[label_parts.length]; // SpanQuery [] sq = new SpanQuery[label_parts.length]; // //ArrayList<SpanNearQuery> span_terms = new ArrayList<SpanNearQuery>(); // for(int i = 0; i < label_parts.length; i++){ // SpanTermQuery stq = new SpanTermQuery(new Term("label", label_parts[i])); // sq[i] = stq; // } // SpanNearQuery final_snq = new SpanNearQuery(sq,10,false); // labelQuery.add(final_snq,Occur.SHOULD); // String[] label_parts = label.split(" "); PhraseQuery pq = new PhraseQuery(); for (int i = 0; i < label_parts.length; i++) { pq.add(new Term("analyzedLabel", label_parts[i])); } // int gap = (new StandardAnalyzer(Version.LUCENE_47)).getPositionIncrementGap("analyzedLabel"); pq.setSlop(10); labelQuery.add(pq, Occur.SHOULD); Query exp_label = getDictExpandQuery(label, null); labelQuery.add(exp_label, Occur.SHOULD); } // QueryParser defaultLabelParser = new QueryParser(Version.LUCENE_47,"defaultLabel", analyzer); // Query defaultLabelQuery = defaultLabelParser.parse(label); // //defaultLabelQuery.setBoost(10); // labelQuery.add(defaultLabelQuery, BooleanClause.Occur.SHOULD); labelQuery.setBoost(weight); return labelQuery; } return null; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
private Query getLabelQuery(String label) throws ParseException { float weight = 1; if (!label.equals("")) { //DisjunctionMaxQuery labelQuery = new DisjunctionMaxQuery(0); BooleanQuery labelQuery = new BooleanQuery(); if (label.contains("|")) { String[] termWeight = label.split("\\|"); label = termWeight[0];//from w w w. j a v a 2 s . c o m weight = Float.parseFloat(termWeight[1]); } label = Utils.removeChars(label); ///Analyzer labelAnalyzer = new StandardAnalyzer(Version.LUCENE_47); ///QueryParser labelParser = new QueryParser(Version.LUCENE_47,"label", labelAnalyzer); ///labelParser.setDefaultOperator(Operator.AND); ///Query labelParsedQuery = labelParser.parse(label); ///labelQuery.add(labelParsedQuery,Occur.SHOULD); Term term = new Term("label", label); TermQuery termQuery = new TermQuery(term); //PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunctionMax); //ptq.setBoost(5); //labelQuery.add(ptq,Occur.SHOULD); labelQuery.add(termQuery, Occur.SHOULD); // Term defaultTerm = new Term("defaultLabel",label); // //TermQuery defaultTermQuery = new TermQuery(defaultTerm); // PayloadTermQuery defaultTermptq = new PayloadTermQuery(defaultTerm, payloadFunctionMax); // //defaultTermptq.setBoost(10); // labelQuery.add(defaultTermptq,Occur.SHOULD);//, Occur.SHOULD //labelQuery.add(defaultTermQuery); Analyzer analyzer = DefaultAnalyzer.getAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_47, "analyzedLabel", analyzer); parser.setDefaultOperator(Operator.AND); Query parsedQuery = parser.parse(label); //parsedQuery.setBoost((float) 0.1); labelQuery.add(parsedQuery, Occur.SHOULD); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); // QueryParser parser = new QueryParser(Version.LUCENE_47,"label", analyzer); // String label_re = "*"+label.replaceAll(" ", "*"); // Query parsedQuery = parser.parse(label_re); // labelQuery.add(parsedQuery,Occur.SHOULD); // ComplexPhraseQueryParser cpqp = new ComplexPhraseQueryParser(Version.LUCENE_47, "label", analyzer); // String label_re = label.replaceAll(" ", "*"); // Query parsedQuery = cpqp.parse(label_re); // labelQuery.add(parsedQuery,Occur.SHOULD); //too slow // String rq_label = label.replaceAll(" ", ".*"); // rq_label = ".*"+rq_label+".*"; // RegexQuery rq = new RegexQuery(new Term("label",rq_label)); // labelQuery.add(rq,Occur.SHOULD); // String [] label_parts = label.split(" "); // Term[] tTerms = new Term[label_parts.length]; // for(int i = 0; i < label_parts.length; i++){ // tTerms[i] = new Term("label", label_parts[i]); // } // MultiPhraseQuery multiPhrasequery = new MultiPhraseQuery(); // multiPhrasequery.add( tTerms ); // labelQuery.add(multiPhrasequery,Occur.SHOULD); // String [] label_parts = label.split(" "); // SpanNearQuery [] snq = new SpanNearQuery[label_parts.length]; // //ArrayList<SpanNearQuery> span_terms = new ArrayList<SpanNearQuery>(); // for(int i = 0; i < label_parts.length; i++){ // SpanTermQuery [] stq = {new SpanTermQuery(new Term("label", label_parts[i]))}; // snq[i] = new SpanNearQuery(stq,10,false); // } // SpanNearQuery final_snq = new SpanNearQuery(snq,10,false); // labelQuery.add(final_snq,Occur.SHOULD); // // QueryParser defaultLabelParser = new QueryParser(Version.LUCENE_47,"defaultLabel", analyzer); // Query defaultLabelQuery = defaultLabelParser.parse(label); // //defaultLabelQuery.setBoost(10); // labelQuery.add(defaultLabelQuery, BooleanClause.Occur.SHOULD); labelQuery.setBoost(weight); return labelQuery; } return null; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
public Query getRelationQuery(String label, ArrayList<String> weightedContext) { //System.out.println(label+" context size: "+weightedContext.size()); if (weightedContext.size() < 1) return null; BooleanQuery relationQuery = new BooleanQuery(); for (int i = 0; i < weightedContext.size(); i++) { //System.out.println("value: "+weightedContext.get(i)); String context = weightedContext.get(i); //String currentContext = contexts[i].replaceAll("\\(.*\\)", ""); if (context.equals(label)) continue; float weight = 1; if (context.contains("|")) { String[] termWeight = context.split("\\|"); context = termWeight[0];/*from ww w. ja v a 2s.c o m*/ weight = Float.parseFloat(termWeight[1]); } //if(weight > weightThreshold){ Term contextTerm = new Term("related_object", context); PayloadTermQuery contextTermQuery = new PayloadTermQuery(contextTerm, payloadFunctionMax); contextTermQuery.setBoost(weight); if (label.equals("")) { relationQuery.add(contextTermQuery, BooleanClause.Occur.MUST); } else { relationQuery.add(contextTermQuery, BooleanClause.Occur.SHOULD); } //} } if (relationQuery.clauses().size() == 0) return null; return relationQuery; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
public Query parse(String label, String[] contexts) throws ParseException { label = label.toLowerCase().trim();//w w w . jav a 2 s . c o m for (int i = 0; i < contexts.length; i++) { contexts[i] = contexts[i].toLowerCase().trim(); } Query typeQuery = getTypeQuery(label); label = label.replaceAll("\\(.*\\)", ""); if (debug) System.out.println("new label: " + label); BooleanQuery luceneQuery = new BooleanQuery(); ArrayList<String> dictLabels = dict_exp.getExpandQueryString(label); // ArrayList<String> temp = new ArrayList<String>( Arrays.asList(contexts)); // if(dictLabels != null){ // for(String dictLabel:dictLabels){ // temp.add(dictLabel); // } // } // contexts = temp.toArray(new String[temp.size()]); ArrayList<String> weightedContext = assignContextWeight(label, contexts, contexts.length); //System.out.println(label+" "+weightedContext.toString()); Query labelQuery = getLabelQuery(label); //Query labelQuery = getLabelSpanQuery(label); Query contentQuery = getContentQuery(label, weightedContext); Query relationQuery = getRelationQuery(label, weightedContext); // if(relationQuery!=null) // relationQuery.setBoost(100); if (typeQuery != null) { typeQuery.setBoost(typeBoost); luceneQuery.add(typeQuery, BooleanClause.Occur.SHOULD); } if (labelQuery != null) { // if(relationQuery == null && labelBoost == 1){ // labelQuery.setBoost(10); // } labelQuery.setBoost(labelBoost); luceneQuery.add(labelQuery, BooleanClause.Occur.MUST); } if (contentQuery != null) { contentQuery.setBoost(contentBoost); luceneQuery.add(contentQuery, BooleanClause.Occur.SHOULD); } if (relationQuery != null) { //relationQuery.setBoost(2); relationQuery.setBoost(relationBoost); luceneQuery.add(relationQuery, BooleanClause.Occur.SHOULD); } // System.out.println("parse: labelBoost: "+labelBoost); // System.out.println("parse: contentBoost: "+contentBoost); // System.out.println("parse: relationBoost: "+relationBoost); // System.out.println("parse: typeBoost: "+typeBoost); // System.out.println("parse: defaultBoost: "+defaultBoost); // Query dictQuery = getDictExpandQuery(label, weightedContext); // if(dictQuery != null){ // luceneQuery.add(dictQuery, BooleanClause.Occur.SHOULD); // } FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost")); boostQuery.setBoost(defaultBoost); finalQuery = new CustomScoreQuery(luceneQuery, boostQuery); return finalQuery; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
public Query getFuzzyQuery(String label, String[] contexts) throws ParseException { label = label.toLowerCase().trim();// w ww. ja v a2 s.c o m for (int i = 0; i < contexts.length; i++) { contexts[i] = contexts[i].toLowerCase().trim(); } Query typeQuery = getTypeQuery(label); label = label.replaceAll("\\(.*\\)", ""); //System.out.println("new label: "+label); BooleanQuery luceneQuery = new BooleanQuery(); ArrayList<String> weightedContext = assignContextWeight(label, contexts, contexts.length); //System.out.println(label+" "+weightedContext.toString()); Term labelTerm = new Term("label", label); FuzzyQuery fuzzyLabelQuery = new FuzzyQuery(labelTerm); //Query labelQuery = getLabelQuery(label); Query contentQuery = getContentQuery(label, weightedContext); Query relationQuery = getRelationQuery(label, weightedContext); // if(relationQuery!=null) // relationQuery.setBoost(100); if (typeQuery != null) { //typeQuery.setBoost(15); luceneQuery.add(typeQuery, BooleanClause.Occur.MUST); } if (fuzzyLabelQuery != null) { // if(relationQuery == null) // labelQuery.setBoost(10); luceneQuery.add(fuzzyLabelQuery, BooleanClause.Occur.MUST); } if (contentQuery != null) { luceneQuery.add(contentQuery, BooleanClause.Occur.SHOULD); } if (relationQuery != null) { //relationQuery.setBoost(2); luceneQuery.add(relationQuery, BooleanClause.Occur.SHOULD); } FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost")); finalQuery = new CustomScoreQuery(luceneQuery, boostQuery); return finalQuery; }
From source file:edu.rpi.tw.linkipedia.search.query.WeightedQuery.java
License:Open Source License
public static boolean isRelated(IndexSearcher searcher, String label, String context) { try {/* www.j av a 2s . c o m*/ label = label.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", ""); context = context.replaceAll("\\(.*\\)", "").replaceAll("\\|.*", ""); BooleanQuery luceneQuery = new BooleanQuery(); WeightedQuery wquery = new WeightedQuery(); Query labelQuery = wquery.getSimpleLabelQuery(label); Term contextTerm = new Term("related_object", context); TermQuery relateQuery = new TermQuery(contextTerm); BooleanQuery directRelate = new BooleanQuery(); directRelate.add(labelQuery, BooleanClause.Occur.MUST); directRelate.add(relateQuery, BooleanClause.Occur.MUST); luceneQuery.add(directRelate, BooleanClause.Occur.SHOULD); BooleanQuery reverseRelate = new BooleanQuery(); Query rLabelQuery = wquery.getSimpleLabelQuery(context); Term rContextTerm = new Term("related_object", label); TermQuery rRelateQuery = new TermQuery(rContextTerm); reverseRelate.add(rLabelQuery, BooleanClause.Occur.MUST); reverseRelate.add(rRelateQuery, BooleanClause.Occur.MUST); luceneQuery.add(reverseRelate, BooleanClause.Occur.SHOULD); TopDocs topDocs = searcher.search(luceneQuery, 1); ScoreDoc[] hits = topDocs.scoreDocs; if (debug) System.out.println(luceneQuery.toString() + " " + hits.length); return hits.length == 1; } catch (Exception e) { e.printStackTrace(); } return false; }