List of usage examples for org.apache.solr.search DocSet intersection
public DocSet intersection(DocSet other);
From source file:com.searchbox.SuggesterTreeHolder.java
License:Apache License
SuggestionResultSet getSuggestions(SolrIndexSearcher searcher, String[] fields, String query,
int maxPhraseSearch) {
query = deAccent(query);/* w ww .jav a2 s . c o m*/
String[] queryTokens = query.replaceAll("[^A-Za-z0-9 ]", " ").replace(" ", " ").trim().split(" "); // TODO should use
// tokensizer..
SuggestionResultSet rs = headNode.computeQt(queryTokens[queryTokens.length - 1].toLowerCase(),
maxPhraseSearch); // get completion for the first word in the
// suggestion
// didn't find it, bail early
if (rs == null) {
return new SuggestionResultSet("", 0);
}
rs.myval = "";
LOGGER.debug("Doing 2nd part of equation");
try {
if (queryTokens.length > 1) {
// Solr 4.4 method change
QueryParser parser = new QueryParser(Version.LUCENE_44, "contents",
searcher.getCore().getLatestSchema().getAnalyzer());
// QueryParser parser = new QueryParser(Version.LUCENE_43,
// "contents", searcher.getCore().getSchema().getAnalyzer());
SuggestionResultSet newrs = new SuggestionResultSet("", maxPhraseSearch);
StringBuilder sb = new StringBuilder();
// build a search in all of the target fields
for (int zz = 0; zz < queryTokens.length - 1; zz++) {
newrs.myval = newrs.myval + queryTokens[zz] + " ";
StringBuilder inner = new StringBuilder();
for (String field : fields) {
String escaped_field = parser.escape(field);
// looking for the query token
String escaped_token = parser.escape(queryTokens[zz]);
inner.append(escaped_field + ":" + escaped_token + " ");
}
if (inner.length() > 0) {
sb.append("+(" + inner + ")");
}
}
// LOGGER.info("SB query:\t" + sb.toString());
Query q = null;
try {
// convert it to a solr query
q = parser.parse(sb.toString());
// LOGGER.info("BQ1 query:\t" + q.toString());
} catch (Exception e) {
e.printStackTrace();
LOGGER.error("Error parsing query:\t" + sb.toString());
}
DocSet qd = searcher.getDocSet(q);
// LOGGER.info("Number of docs in set\t" + qd.size());
for (SuggestionResult sr : rs.suggestions) {
// for each of the possible suggestions, see how prevelant
// they are in the document set so that we can know their
// likelihood of being correct
sb = new StringBuilder();
// should use tokensizer
String[] suggestionTokens = sr.suggestion.split(" ");
for (int zz = 0; zz < suggestionTokens.length; zz++) {
StringBuilder inner = new StringBuilder();
for (String field : fields) {
inner.append(field + ":" + suggestionTokens[zz] + " ");
}
if (inner.length() > 0) {
sb.append("+(" + inner + ")");
}
}
// prevent zero bump down
double Q_c = .0000001;
try {
// LOGGER.info("BQ2 query String:\t" + sb.toString());
q = parser.parse(sb.toString());
// LOGGER.info("BQ2 query query:\t" + q.toString());
} catch (Exception e) {
// LOGGER.error("parser fail?");
}
DocSet pd = searcher.getDocSet(q);
// LOGGER.info("Number of docs in phrase set\t" +
// pd.size());
if (pd.size() != 0) {
// As per equation (13) from paper
Q_c += qd.intersection(pd).size() / (pd.size() * 1.0);
}
// LOGGER.info("Number of docs in phrase set----- Q_c\t (" +
// Q_c + ") * (" + sr.probability + ")");
newrs.add(sr.suggestion, sr.probability * Q_c);
}
rs = newrs;
}
} catch (IOException ex) {
LOGGER.error(ex.getMessage());
}
return rs;
}
From source file:org.alfresco.solr.query.SolrAuthorityScorer.java
License:Open Source License
public static SolrAuthorityScorer createAuthorityScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authority) throws IOException { Properties p = searcher.getSchema().getResourceLoader().getCoreProperties(); boolean doPermissionChecks = Boolean.parseBoolean(p.getProperty("alfresco.doPermissionChecks", "true")); Query key = new SolrAuthorityQuery(authority); DocSet answer = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key); if (answer != null) { // Answer was in the cache, so return it. return new SolrAuthorityScorer(weight, answer, context, searcher); }//from w w w.jav a 2 s .c o m // Answer was not in cache, so build the results, cache and return. final HashSet<String> globalReaders = GlobalReaders.getReaders(); if (globalReaders.contains(authority) || (doPermissionChecks == false)) { // can read all DocSet allDocs = searcher.getDocSet(new MatchAllDocsQuery()); return new SolrAuthorityScorer(weight, allDocs, context, searcher); } // Docs for which the authority has explicit read access. DocSet readableDocSet = searcher.getDocSet(new SolrReaderQuery(authority)); // Are all doc owners granted read permissions at a global level? if (globalReaders.contains(PermissionService.OWNER_AUTHORITY)) { // Get the set of docs owned by the authority (which they can therefore read). DocSet authorityOwnedDocs = searcher.getDocSet(new SolrOwnerQuery(authority)); // Final set of docs that the authority can read. DocSet toCache = readableDocSet.union(authorityOwnedDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache); return new SolrAuthorityScorer(weight, toCache, context, searcher); } else { // for that docs I own that have owner Read rights DocSet ownerReadableDocSet = searcher.getDocSet(new SolrReaderQuery(PermissionService.OWNER_AUTHORITY)); DocSet authorityOwnedDocs = searcher.getDocSet(new SolrOwnerQuery(authority)); // Docs where the authority is an owner and where owners have read rights. DocSet docsAuthorityOwnsAndCanRead = ownerReadableDocSet.intersection(authorityOwnedDocs); // Final set of docs that the authority can read. DocSet toCache = readableDocSet.union(docsAuthorityOwnsAndCanRead); searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache); return new SolrAuthorityScorer(weight, toCache, context, searcher); } }
From source file:org.alfresco.solr.query.SolrAuthoritySetScorer.java
License:Open Source License
public static SolrAuthoritySetScorer createAuthoritySetScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authorities) throws IOException { Properties p = searcher.getSchema().getResourceLoader().getCoreProperties(); boolean doPermissionChecks = Boolean.parseBoolean(p.getProperty("alfresco.doPermissionChecks", "true")); Query key = new SolrAuthoritySetQuery(authorities); DocSet answer = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key); if (answer != null) { // Answer was in the cache, so return it. return new SolrAuthoritySetScorer(weight, answer, context, searcher); }//from w w w. j a v a 2 s.c om // Answer was not in cache, so build the results, cache and return. String[] auths = authorities.substring(1).split(authorities.substring(0, 1)); boolean hasGlobalRead = false; final HashSet<String> globalReaders = GlobalReaders.getReaders(); for (String auth : auths) { if (globalReaders.contains(auth)) { hasGlobalRead = true; break; } } if (hasGlobalRead || (doPermissionChecks == false)) { // can read all WrappedQuery wrapped = new WrappedQuery(new MatchAllDocsQuery()); wrapped.setCache(false); DocSet allDocs = searcher.getDocSet(wrapped); return new SolrAuthoritySetScorer(weight, allDocs, context, searcher); } // Docs for which the authorities have explicit read access. WrappedQuery wrapped; wrapped = new WrappedQuery(new SolrReaderSetQuery(authorities)); wrapped.setCache(false); DocSet readableDocSet = searcher.getDocSet(wrapped); // Are all doc owners granted read permissions at a global level? if (globalReaders.contains(PermissionService.OWNER_AUTHORITY)) { // Get the set of docs owned by the authorities (which they can therefore read). wrapped = new WrappedQuery(new SolrOwnerSetQuery(authorities)); wrapped.setCache(false); DocSet authorityOwnedDocs = searcher.getDocSet(wrapped); // Final set of docs that the authorities can read. DocSet toCache = readableDocSet.union(authorityOwnedDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache); return new SolrAuthoritySetScorer(weight, toCache, context, searcher); } else { // for that docs I own that have owner Read rights wrapped = new WrappedQuery(new SolrReaderSetQuery("|" + PermissionService.OWNER_AUTHORITY)); wrapped.setCache(false); DocSet ownerReadableDocSet = searcher.getDocSet(wrapped); wrapped = new WrappedQuery(new SolrOwnerSetQuery(authorities)); wrapped.setCache(false); DocSet authorityOwnedDocs = searcher.getDocSet(wrapped); // Docs where the authority is an owner and where owners have read rights. DocSet docsAuthorityOwnsAndCanRead = ownerReadableDocSet.intersection(authorityOwnedDocs); // Final set of docs that the authorities can read. DocSet toCache = readableDocSet.union(docsAuthorityOwnsAndCanRead); searcher.cacheInsert(CacheConstants.ALFRESCO_AUTHORITY_CACHE, key, toCache); return new SolrAuthoritySetScorer(weight, toCache, context, searcher); } }
From source file:org.alfresco.solr.query.SolrCachingAuthorityScorer.java
License:Open Source License
public static SolrCachingAuthorityScorer createAuthorityScorer(SolrIndexSearcher searcher, Similarity similarity, String authority, SolrIndexReader reader) throws IOException { // Get hold of solr top level searcher // Execute query with caching // translate reults to leaf docs // build ordered doc list Query key = new SolrCachingAuthorityQuery(authority); DocSet answer = (DocSet) searcher.cacheLookup(AlfrescoSolrEventListener.ALFRESCO_AUTHORITY_CACHE, key); if (answer != null) { return new SolrCachingAuthorityScorer(similarity, answer, reader); }/*from w w w. j a va 2s. co m*/ HashSet<String> globalReaders = (HashSet<String>) searcher.cacheLookup( AlfrescoSolrEventListener.ALFRESCO_CACHE, AlfrescoSolrEventListener.KEY_GLOBAL_READERS); if (globalReaders.contains(authority)) { // can read all OpenBitSet allLeafDocs = (OpenBitSet) searcher.cacheLookup(AlfrescoSolrEventListener.ALFRESCO_CACHE, AlfrescoSolrEventListener.KEY_ALL_LEAF_DOCS); DocSet toCache = new BitDocSet(allLeafDocs); searcher.cacheInsert(AlfrescoSolrEventListener.ALFRESCO_AUTHORITY_CACHE, key, toCache); return new SolrCachingAuthorityScorer(similarity, toCache, reader); } DocSet readableDocSet = searcher.getDocSet(new SolrCachingReaderQuery(authority)); if (globalReaders.contains(PermissionService.OWNER_AUTHORITY)) { DocSet authorityOwnedDocs = searcher.getDocSet(new SolrCachingOwnerQuery(authority)); DocSet toCache = readableDocSet.union(authorityOwnedDocs); searcher.cacheInsert(AlfrescoSolrEventListener.ALFRESCO_AUTHORITY_CACHE, key, toCache); return new SolrCachingAuthorityScorer(similarity, toCache, reader); } else { // for that docs I own that have owner Read rights DocSet ownerReadableDocSet = searcher .getDocSet(new SolrCachingReaderQuery(PermissionService.OWNER_AUTHORITY)); DocSet authorityOwnedDocs = searcher.getDocSet(new SolrCachingOwnerQuery(authority)); DocSet docsAuthorityOwnsAndCanRead = ownerReadableDocSet.intersection(authorityOwnedDocs); DocSet toCache = readableDocSet.union(docsAuthorityOwnsAndCanRead); searcher.cacheInsert(AlfrescoSolrEventListener.ALFRESCO_AUTHORITY_CACHE, key, toCache); return new SolrCachingAuthorityScorer(similarity, toCache, reader); } }
From source file:org.apache.lucene.search.suggest.hovland.ContextAwarePhraseLookup.java
License:Apache License
private SuggestionResultSet getSuggestions(String query) { query = deAccent(query);//from w w w .ja v a2 s . c o m /*String[] queryTokens = query.replaceAll("[^A-Za-z0-9 ]", " ") .replace(" ", " ").trim().split(" "); */ String[] queryTokens = SuggesterAnalyzer.analyze(SuggesterAnalyzer.getQueryAnalizer(), query); String endToken = queryTokens[queryTokens.length - 1].toLowerCase(); LOGGER.debug("Building suggestions for: " + endToken); SuggestionResultSet rs = headNode.computeQt(endToken, maxPhraseSearch); // get completion for the last word in the // suggestion // didn't find it, bail early if (rs == null) { LOGGER.debug("Didn't find anyting for: " + endToken); return new SuggestionResultSet("", 0); } LOGGER.debug(rs.toString()); rs.value = ""; LOGGER.debug("Doing 2nd part of equation"); try { if (queryTokens.length > 1) { QueryParser parser = new QueryParser("contents", searcher.getCore().getLatestSchema().getQueryAnalyzer()); SuggestionResultSet newRs = new SuggestionResultSet("", maxPhraseSearch); StringBuilder sb = new StringBuilder(); // build a search in all of the target fields for (int zz = 0; zz < queryTokens.length - 1; zz++) { newRs.value = newRs.value + queryTokens[zz] + " "; StringBuilder inner = new StringBuilder(); for (String field : fields) { String escaped_field = parser.escape(field); // looking for the query token String escaped_token = parser.escape(queryTokens[zz]); inner.append(escaped_field + ":" + escaped_token + " "); } if (inner.length() > 0) { sb.append("+(" + inner + ")"); } } LOGGER.debug("SB query:\t" + sb.toString()); Query q = null; try { // convert it to a solr query q = parser.parse(sb.toString()); LOGGER.debug("BQ1 query:\t" + q.toString()); } catch (Exception e) { e.printStackTrace(); LOGGER.error("Error parsing query:\t" + sb.toString()); } DocSet qd = searcher.getDocSet(q); LOGGER.debug("Number of docs in set\t" + qd.size()); int numOfSuggestions = 0; for (SuggestionResultSet.SuggestionResult sr : rs.suggestions) { // for each of the possible suggestions, see how prevalent // they are in the document set so that we can know their // likelihood of being correct sb = new StringBuilder(); // should use tokensizer String[] suggestionTokens = sr.suggestion.split(" "); for (int zz = 0; zz < suggestionTokens.length; zz++) { StringBuilder inner = new StringBuilder(); for (String field : fields) { inner.append(field + ":" + suggestionTokens[zz] + " "); } if (inner.length() > 0) { sb.append("+(" + inner + ")"); } } // prevent zero bump down double Q_c = .0000001; try { LOGGER.debug("BQ2 query String:\t" + sb.toString()); q = parser.parse(sb.toString()); LOGGER.debug("BQ2 query query:\t" + q.toString()); } catch (Exception e) { // LOGGER.error("parser fail?"); } DocSet pd = searcher.getDocSet(q); LOGGER.debug("Number of docs in phrase set\t" + pd.size()); if (pd.size() != 0) { // As per equation (13) from paper Q_c += qd.intersection(pd).size() / (pd.size() * 1.0); } LOGGER.debug("Q_c = (" + Q_c + ") * (" + sr.probability + ")"); LOGGER.debug("Adding: {" + sr.suggestion + ", " + (sr.probability * Q_c)); newRs.add(sr.suggestion, sr.probability * Q_c); } rs = newRs; LOGGER.debug("Final suggestions. " + rs.toString()); } } catch (IOException ex) { LOGGER.error(ex.getMessage()); } return rs; }
From source file:org.apache.lucene.search.suggest.hovland.ContextAwarePhraseSuggester.java
License:Apache License
public SuggestionResultSet getSuggestions(SolrIndexSearcher searcher, String[] fields, String query, int maxPhraseSearch) { query = deAccent(query);/*from w w w. j a v a 2 s . c o m*/ /*String[] queryTokens = query.replaceAll("[^A-Za-z0-9 ]", " ") .replace(" ", " ").trim().split(" "); */ String[] queryTokens = SuggesterAnalyzer.analyze(SuggesterAnalyzer.getQueryAnalizer(), query); String endToken = queryTokens[queryTokens.length - 1].toLowerCase(); LOGGER.debug("Building suggestions for: " + endToken); SuggestionResultSet rs = headNode.computeQt(endToken, maxPhraseSearch); // get completion for the last word in the // suggestion // didn't find it, bail early if (rs == null) { LOGGER.debug("Didn't find anyting for: " + endToken); return new SuggestionResultSet("", 0); } LOGGER.debug(rs.toString()); rs.value = ""; LOGGER.debug("Doing 2nd part of equation"); try { if (queryTokens.length > 1) { QueryParser parser = new QueryParser("contents", searcher.getCore().getLatestSchema().getQueryAnalyzer()); SuggestionResultSet newRs = new SuggestionResultSet("", maxPhraseSearch); StringBuilder sb = new StringBuilder(); // build a search in all of the target fields for (int zz = 0; zz < queryTokens.length - 1; zz++) { newRs.value = newRs.value + queryTokens[zz] + " "; StringBuilder inner = new StringBuilder(); for (String field : fields) { String escaped_field = parser.escape(field); // looking for the query token String escaped_token = parser.escape(queryTokens[zz]); inner.append(escaped_field + ":" + escaped_token + " "); } if (inner.length() > 0) { sb.append("+(" + inner + ")"); } } LOGGER.debug("SB query:\t" + sb.toString()); Query q = null; try { // convert it to a solr query q = parser.parse(sb.toString()); LOGGER.debug("BQ1 query:\t" + q.toString()); } catch (Exception e) { e.printStackTrace(); LOGGER.error("Error parsing query:\t" + sb.toString()); } DocSet qd = searcher.getDocSet(q); LOGGER.debug("Number of docs in set\t" + qd.size()); int numOfSuggestions = 0; for (SuggestionResultSet.SuggestionResult sr : rs.suggestions) { // for each of the possible suggestions, see how prevalent // they are in the document set so that we can know their // likelihood of being correct sb = new StringBuilder(); // should use tokensizer String[] suggestionTokens = sr.suggestion.split(" "); for (int zz = 0; zz < suggestionTokens.length; zz++) { StringBuilder inner = new StringBuilder(); for (String field : fields) { inner.append(field + ":" + suggestionTokens[zz] + " "); } if (inner.length() > 0) { sb.append("+(" + inner + ")"); } } // prevent zero bump down double Q_c = .0000001; try { LOGGER.debug("BQ2 query String:\t" + sb.toString()); q = parser.parse(sb.toString()); LOGGER.debug("BQ2 query query:\t" + q.toString()); } catch (Exception e) { // LOGGER.error("parser fail?"); } DocSet pd = searcher.getDocSet(q); LOGGER.debug("Number of docs in phrase set\t" + pd.size()); if (pd.size() != 0) { // As per equation (13) from paper Q_c += qd.intersection(pd).size() / (pd.size() * 1.0); } LOGGER.debug("Q_c = (" + Q_c + ") * (" + sr.probability + ")"); LOGGER.debug("Adding: {" + sr.suggestion + ", " + (sr.probability * Q_c)); newRs.add(sr.suggestion, sr.probability * Q_c); } rs = newRs; LOGGER.debug("Final suggestions. " + rs.toString()); } } catch (IOException ex) { LOGGER.error(ex.getMessage()); } return rs; }