List of usage examples for org.apache.lucene.search Explanation getDetails
public Explanation[] getDetails()
From source file:com.doculibre.constellio.lucene.LuceneSearchResultsProvider.java
License:Open Source License
public synchronized List<String> getMatchingFieldNames(int docId) { Set<String> matchingFieldNames = new HashSet<String>(); try {// w w w. j av a 2s . c o m Explanation explanation = getIndexSearcher().explain(luceneQuery, docId); if (explanation instanceof ComplexExplanation) { Explanation[] details = explanation.getDetails(); if (details != null) { for (int i = 0; i < details.length; i++) { Explanation detail = details[i]; matchingFieldNames.addAll(getMatchingFieldNames(docId, detail)); } } } else { matchingFieldNames.addAll(getMatchingFieldNames(docId, explanation)); } } catch (IOException e) { throw new RuntimeException(e); } return new ArrayList<String>(matchingFieldNames); }
From source file:com.doculibre.constellio.lucene.LuceneSearchResultsProvider.java
License:Open Source License
private List<String> getMatchingFieldNames(int docId, Explanation explanation) { Set<String> matchingFieldNames = new HashSet<String>(); if (explanation instanceof ComplexExplanation) { Explanation[] details = explanation.getDetails(); if (details != null) { for (int i = 0; i < details.length; i++) { Explanation detail = details[i]; // Recursive call matchingFieldNames.addAll(getMatchingFieldNames(docId, detail)); }/*from ww w .j a v a2 s.com*/ } } String description = explanation.getDescription(); int indexOfFirstChar = 0; int indexOfColon = description.indexOf(":"); while (indexOfColon != -1) { String delim = "("; int indexOfDelim = description.indexOf(delim, indexOfFirstChar); if (indexOfDelim != -1) { int indexOfFieldName = indexOfDelim + delim.length(); String matchingFieldName = description.substring(indexOfFieldName, indexOfColon); if (matchingFieldName.startsWith("termFreq(")) { matchingFieldName = matchingFieldName.substring("termFreq(".length()); } matchingFieldNames.add(matchingFieldName); indexOfFirstChar = indexOfColon + 1; indexOfColon = description.indexOf(":", indexOfFirstChar); } else { break; } } return new ArrayList<String>(matchingFieldNames); }
From source file:com.nuvolect.deepdive.lucene.Search.java
/** * Return results for a search along a specific path. If the path is changed or new * create an index.//from w ww .j av a 2 s . com * @param searchQuery * @param searchPath * @return */ public static JSONObject search(String searchQuery, String volumeId, String searchPath) { JSONObject result = new JSONObject(); JSONArray jsonArray = new JSONArray(); Context ctx = App.getContext(); DirectoryReader ireader = null; ScoreDoc[] scoreDocs = null; String error = ""; preSearch(volumeId, searchPath); try { ireader = DirectoryReader.open(m_directory); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } IndexSearcher isearcher = new IndexSearcher(ireader); Query query = null; try { LogUtil.log(LogUtil.LogType.SEARCH, "query: " + searchQuery + ", vid: " + volumeId + ", path: " + searchPath); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser(CConst.FIELD_CONTENT, m_analyzer); query = parser.parse(searchQuery); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_HITS); isearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (ParseException | IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } // Iterate through the results creating an object for each file HashMap<String, Integer> hitCounts = new HashMap<>(); HashMap<String, Integer> hitIndexes = new HashMap<>(); /** * First iterate the hit list and count duplicates based on file path. */ for (int ii = 0; scoreDocs != null && ii < scoreDocs.length; ++ii) { Document hitDoc = null; int fileHits = 1; try { hitDoc = isearcher.doc(scoreDocs[ii].doc); Explanation explanation = isearcher.explain(query, scoreDocs[ii].doc); Explanation[] details = explanation.getDetails(); String description = details[0].getDescription(); /** * FIXME, find a better way to count hits in each file */ if (description.contains("=")) { String[] lineParts = description.split("="); String[] elementParts = lineParts[2].split(Pattern.quote(")")); if (elementParts.length > 0) { fileHits = ((int) Double.parseDouble(elementParts[0])); } } } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } String filePath = hitDoc.get((CConst.FIELD_PATH)); if (hitCounts.containsKey(filePath)) { hitCounts.put(filePath, hitCounts.get(filePath) + fileHits); } else { hitCounts.put(filePath, fileHits); hitIndexes.put(filePath, ii); } } /** * Iterate over each unique hit and save the results */ for (Map.Entry<String, Integer> uniqueHit : hitIndexes.entrySet()) { Document hitDoc = null; try { hitDoc = isearcher.doc(scoreDocs[uniqueHit.getValue()].doc); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); error += e.toString(); } String file_name = hitDoc.get((CConst.FIELD_FILENAME)); String file_path = hitDoc.get((CConst.FIELD_PATH)); try { String folder_url = OmniHash.getStartPathUrl(ctx, volumeId, file_path); JSONObject hitObj = new JSONObject(); hitObj.put("volume_id", volumeId); hitObj.put("file_path", file_path); hitObj.put("file_name", file_name); hitObj.put("folder_url", folder_url); hitObj.put("num_hits", hitCounts.get(file_path)); hitObj.put("error", error); jsonArray.put(hitObj); } catch (Exception e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); } } int num_hits = scoreDocs != null ? scoreDocs.length : 0; try { result.put("hits", jsonArray != null ? jsonArray : new JSONArray()); result.put("num_hits", num_hits); result.put("error", error); ireader.close(); m_directory.close(); } catch (JSONException | IOException e) { LogUtil.logException(LogUtil.LogType.SEARCH, e); } return result; }
From source file:com.o19s.es.ltr.query.LtrQueryTests.java
License:Apache License
public void checkFeatureNames(Explanation expl, List<PrebuiltFeature> features) { Explanation[] expls = expl.getDetails(); int ftrIdx = 0; for (Explanation ftrExpl : expls) { String ftrName = features.get(ftrIdx).name(); String expectedFtrName;/*from w w w .ja v a2 s.co m*/ if (ftrName == null) { expectedFtrName = "Feature " + ftrIdx + ":"; } else { expectedFtrName = "Feature " + ftrIdx + "(" + ftrName + "):"; } String ftrExplainStart = ftrExpl.getDescription().substring(0, expectedFtrName.length()); assertEquals(expectedFtrName, ftrExplainStart); ftrIdx++; } }
From source file:com.senseidb.search.req.SenseiRequestProtoSerializer.java
License:Apache License
private SenseiProtos.Explanation convertExplanation(Explanation explanation) { SenseiProtos.Explanation.Builder builder = SenseiProtos.Explanation.newBuilder(); builder.setDescription(explanation.getDescription()); builder.setValue(explanation.getValue()); if (explanation.getDetails() != null && explanation.getDetails().length > 0) { for (int i = 0; i < explanation.getDetails().length; i++) { builder.addDetails(convertExplanation(explanation.getDetails()[i])); }//from w w w . jav a 2s .c o m } return builder.build(); }
From source file:com.xiaomi.linden.core.search.LindenResultParser.java
License:Apache License
private LindenExplanation parseLindenExplanation(Explanation expl) { LindenExplanation lindenExpl = new LindenExplanation(); lindenExpl.setDescription(expl.getDescription()); lindenExpl.setValue(expl.getValue()); if (expl.getDetails() != null) { for (Explanation subExpl : expl.getDetails()) { LindenExplanation subLindenExpl = parseLindenExplanation(subExpl); lindenExpl.addToDetails(subLindenExpl); }/*from w w w. ja va 2s . c om*/ } return lindenExpl; }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
/** * returns collection of docIds of the Lucene docs that hit, at least * threshold times./*from ww w. j av a2 s.com*/ * warning! only looks up body field, no others * Caution: This code is not to be touched, unless something is being optimised * Introducing something here can seriously affect the search times. */ private Pair<Collection<String>, Integer> luceneLookupAsDocIdsWithTotalHits(String q, int threshold, IndexSearcher searcher, QueryType qt, int lt) throws IOException, ParseException, GeneralSecurityException, ClassNotFoundException { Collection<String> result = new ArrayList<String>(); // String escaped_q = escapeRegex(q); // to mimic built-in regex support //TODO: There should also be a general query type that takes any query with field param, i.e. without parser Query query; if (qt == QueryType.ORIGINAL) query = parserOriginal.parse(q); else if (qt == QueryType.SUBJECT) query = parserSubject.parse(q); else if (qt == QueryType.CORRESPONDENTS) query = parserCorrespondents.parse(q); else if (qt == QueryType.REGEX) { query = new BooleanQuery(); /** * Note: this is not a spanning (i.e. doesn't search over more than * one token) regexp, for spanning regexp use: body_unanlyzed and * title_unanlyzed fields instead */ Query query1 = new RegexpQuery(new Term("body", q), RegExp.ALL); Query query2 = new RegexpQuery(new Term("title", q), RegExp.ALL); ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD); ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD); } else /* if (qt == QueryType.PRESET_REGEX) { query = new BooleanQuery(); if(presetQueries != null) { for (String pq : presetQueries) { Query query1 = new RegexpQuery(new Term("body", pq), RegExp.ALL); Query query2 = new RegexpQuery(new Term("title", pq), RegExp.ALL); ((BooleanQuery) query).add(query1, org.apache.lucene.search.BooleanClause.Occur.SHOULD); ((BooleanQuery) query).add(query2, org.apache.lucene.search.BooleanClause.Occur.SHOULD); } log.info("Doing a preset regex search"); }else{ log.warn("Preset queries is not initialised"); } } else */ if (qt == QueryType.META) { query = parserMeta.parse(q); } else query = parser.parse(q); // query = convertRegex(query); long st = System.currentTimeMillis(); int totalHits = 0; ScoreDoc[] hits = null; if (query != null) { TopDocs tds = searcher.search(query, null, lt); log.info("Took: " + (System.currentTimeMillis() - st) + "ms for query:" + query); hits = tds.scoreDocs; totalHits = tds.totalHits; } else { log.error("Query is null!!"); } // this logging causes a 50% overhead on the query -- maybe enable it only for debugging // log.info (hits.length + " hits for query " + Util.ellipsize(q, 30) + " => " + Util.ellipsize(escaped_q, 30) + " = " + Util.ellipsize(query.toString(), 30) + " :"); // Iterate through the results: // TODO: not very pretty code here to determine dir_name which selects the cache to use Util.softAssert(searcher == isearcher || searcher == isearcher_blob); String dir_name = searcher == isearcher ? INDEX_NAME_EMAILS : INDEX_NAME_ATTACHMENTS; Map<Integer, String> map = dirNameToDocIdMap.get(dir_name); if (map == null) { map = new LinkedHashMap<Integer, String>(); dirNameToDocIdMap.put(dir_name, map); log.info("Adding new entry for dir name to docIdMap"); } else { log.info("Existing entry for dir name to docIdMap"); } int n_added = 0; log.info("Found: " + hits.length + " hits for query: " + q); for (int i = 0; i < hits.length; i++) { int ldocId = hits[i].doc; // this is the lucene doc id, we need to map it to our doc id. String docId = null; // this will be our doc id // try to use the new fieldcache id's // if this works, we can get rid of the dirNameToDocIdMap try { docId = (searcher == isearcher) ? contentDocIds.get(ldocId) : blobDocIds.get(ldocId); } catch (Exception e) { Util.print_exception(e, log); continue; } if (threshold <= 1) { // common case: threshold is 1. result.add(docId); n_added++; } else { // more expensive, do it only if threshold is > 1 Explanation expl = searcher.explain(query, hits[i].doc); Explanation[] details = expl.getDetails(); // NB: a catch here is that details.length doesn't reflect the actual # of hits for the query. // sometimes, for a single hit, there are 2 entries, a ComplexExplanation and an Explanation. // not sure why, but is somewhat corroborated by the code: // http://massapi.com/class/ex/Explanation.html // showing a single hit creating both a C.E and an E. // a more robust approach might be to look for the summary to end with product of: , sum of: etc. // e.g. http://www.gossamer-threads.com/lists/lucene/java-dev/49706 // but for now, we'll count only the number of ComplexExplanation and check if its above threshold // log.info("doc id " + hits[i].toString() + " #details = " + details.length); // HORRIBLE HACK! - because we don't know a better way to find the threshold outer: for (Explanation detail : details) { // log.info(detail.getClass().getName()); if (detail instanceof ComplexExplanation) { ComplexExplanation ce = (ComplexExplanation) detail; String s = ce.toString(); int total_tf = 0; while (true) { int idx = s.indexOf("tf(termFreq("); if (idx < 0) break outer; s = s.substring(idx); idx = s.indexOf("="); if (idx < 0) break outer; s = s.substring(idx + 1); int idx1 = s.indexOf(")"); if (idx < 0) break outer; String num_str = s.substring(0, idx1); int num = 0; try { num = Integer.parseInt(num_str); } catch (Exception e) { log.warn("ERROR parsing complex expl: " + num_str); } total_tf += num; if (total_tf >= threshold) { result.add(docId); n_added++; break outer; } } } } } } log.info(n_added + " docs added to docIdMap cache"); return new Pair<Collection<String>, Integer>(result, totalHits); }
From source file:org.apache.solr.util.SolrPluginUtils.java
License:Apache License
public static NamedList<Object> explanationToNamedList(Explanation e) { NamedList<Object> out = new SimpleOrderedMap<Object>(); out.add("match", e.isMatch()); out.add("value", e.getValue()); out.add("description", e.getDescription()); Explanation[] details = e.getDetails(); // short circut out if (null == details || 0 == details.length) return out; List<NamedList<Object>> kids = new ArrayList<NamedList<Object>>(details.length); for (Explanation d : details) { kids.add(explanationToNamedList(d)); }/* w w w . jav a 2s .c o m*/ out.add("details", kids); return out; }
From source file:org.codelibs.elasticsearch.common.lucene.Lucene.java
License:Apache License
public static void writeExplanation(StreamOutput out, Explanation explanation) throws IOException { out.writeBoolean(explanation.isMatch()); out.writeString(explanation.getDescription()); Explanation[] subExplanations = explanation.getDetails(); out.writeVInt(subExplanations.length); for (Explanation subExp : subExplanations) { writeExplanation(out, subExp);/*from w w w .j av a 2 s. c om*/ } if (explanation.isMatch()) { out.writeFloat(explanation.getValue()); } }
From source file:org.elasticsearch.common.lucene.client.Lucene.java
License:Apache License
public static void writeExplanation(StreamOutput out, Explanation explanation) throws IOException { out.writeFloat(explanation.getValue()); out.writeUTF(explanation.getDescription()); Explanation[] subExplanations = explanation.getDetails(); if (subExplanations == null) { out.writeBoolean(false);/* www . j a va 2s. c o m*/ } else { out.writeBoolean(true); out.writeVInt(subExplanations.length); for (Explanation subExp : subExplanations) { writeExplanation(out, subExp); } } }