List of usage examples for org.apache.lucene.search IndexSearcher explain
protected Explanation explain(Weight weight, int doc) throws IOException
doc
scored against weight
. From source file:org.getopt.luke.Luke.java
License:Apache License
/** * Pop up a modal dialog explaining the selected result. * @param sTable Thinlet table widget containing selected search result. */// w ww. j a v a 2 s.c o m public void explainResult(Object sTable) { Object row = getSelectedItem(sTable); if (row == null) return; final Integer docid = (Integer) getProperty(row, "docid"); if (docid == null) return; if (ir == null) { showStatus(MSG_NOINDEX); return; } final Query q = (Query) getProperty(sTable, "query"); if (q == null) return; Thread t = new Thread() { public void run() { try { IndexSearcher is = new IndexSearcher(ir); Similarity sim = createSimilarity(find("srchOptTabs")); is.setSimilarity(sim); Explanation expl = is.explain(q, docid.intValue()); Object dialog = addComponent(null, "/xml/explain.xml", null, null); Object eTree = find(dialog, "eTree"); addNode(eTree, expl); //setBoolean(eTree, "expand", true); add(dialog); } catch (Exception e) { e.printStackTrace(); errorMsg(e.getMessage()); } } }; if (slowAccess) { t.start(); } else { t.run(); } }
From source file:org.modmine.web.ModMineSearch.java
License:GNU General Public License
/** * perform a keyword search over all document metadata fields with lucene * @param searchString//from ww w . j a v a 2 s .com * string to search for * @return map of document IDs with their respective scores */ public static Map<Integer, Float> runLuceneSearch(String searchString) { LinkedHashMap<Integer, Float> matches = new LinkedHashMap<Integer, Float>(); String queryString = parseQueryString(prepareQueryString(searchString)); long time = System.currentTimeMillis(); try { IndexSearcher searcher = new IndexSearcher(ram); Analyzer analyzer = new WhitespaceAnalyzer(); org.apache.lucene.search.Query query; // pass entire list of field names to the multi-field parser // => search through all fields String[] fieldNamesArray = new String[fieldNames.size()]; fieldNames.toArray(fieldNamesArray); QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, fieldNamesArray, analyzer, fieldBoosts); query = queryParser.parse(queryString); // required to expand search terms query = query.rewrite(IndexReader.open(ram)); LOG.debug("Actual query: " + query); TopDocs topDocs = searcher.search(query, 500); time = System.currentTimeMillis() - time; LOG.info("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "' in " + time + " milliseconds:"); for (int i = 0; (i < MAX_HITS && i < topDocs.totalHits); i++) { Document doc = searcher.doc(topDocs.scoreDocs[i].doc); String name = doc.get("name"); // show how score was calculated if (i < 2) { LOG.debug("Score for " + name + ": " + searcher.explain(query, topDocs.scoreDocs[i].doc)); } matches.put(Integer.parseInt(name), new Float(topDocs.scoreDocs[i].score)); } } catch (ParseException e) { // just return an empty list LOG.info("Exception caught, returning no results", e); } catch (IOException e) { // just return an empty list LOG.info("Exception caught, returning no results", e); } return matches; }
From source file:org.mskcc.pathdb.lucene.LuceneResults.java
License:Open Source License
public LuceneResults(Pager pager, Query query, IndexSearcher indexSearcher, Hits hits, String term, GlobalFilterSettings globalFilterSettings, boolean debug) throws IOException, ParseException, DaoException { numHits = hits.length();/* www. j a v a2 s . c o m*/ int size = pager.getEndIndex() - pager.getStartIndex(); // init private variables cpathIds = new long[size]; fragments = new ArrayList<List<String>>(); numDescendentsList = new ArrayList<Integer>(); numParentsList = new ArrayList<Integer>(); numParentPathwaysList = new ArrayList<Integer>(); numParentInteractionsList = new ArrayList<Integer>(); dataSourceMap = new HashMap<Long, Set<String>>(); scores = new HashMap<Long, Float>(); globalDataSources = new HashSet<String>(); if (globalFilterSettings != null) { DaoExternalDbSnapshot daoSnapShot = new DaoExternalDbSnapshot(); for (Long snapshotId : globalFilterSettings.getSnapshotIdSet()) { ExternalDatabaseSnapshotRecord snapShotRecord = daoSnapShot.getDatabaseSnapshot(snapshotId); if (snapShotRecord == null) continue; ExternalDatabaseRecord externalDatabaseRecord = snapShotRecord.getExternalDatabase(); if (externalDatabaseRecord == null) continue; globalDataSources.add(externalDatabaseRecord.getMasterTerm()); } } DaoExternalDb dao = new DaoExternalDb(); int index = 0; Highlighter highLighter = null; if (term != null) { //term = reformatTerm(term); highLighter = createHighlighter(term); } for (int i = pager.getStartIndex(); i < pager.getEndIndex(); i++) { Document doc = hits.doc(i); Field field = doc.getField(LuceneConfig.FIELD_CPATH_ID); if (field != null) { cpathIds[index++] = Long.parseLong(field.stringValue()); scores.put(Long.parseLong(field.stringValue()), new Float(hits.score(i))); if (debug) { explanationMap.put(Long.parseLong(field.stringValue()), indexSearcher.explain(query, hits.id(i))); } } if (highLighter != null) { extractFragment(doc, highLighter, term); } extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_DESCENDENTS, numDescendentsList); extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_PARENTS, numParentsList); extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_PARENT_PATHWAYS, numParentPathwaysList); extractNumFamilyTree(doc, LuceneConfig.FIELD_NUM_PARENT_INTERACTIONS, numParentInteractionsList); extractDataSourceMap(doc, dao); } }
From source file:org.ohdsi.usagi.tests.TestLucene.java
License:Apache License
public static void main(String[] args) throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); //Analyzer analyzer = new UsagiAnalyzer(); FieldType textVectorField = new FieldType(); textVectorField.setIndexed(true);//ww w. j a va 2 s . co m textVectorField.setTokenized(true); textVectorField.setStoreTermVectors(true); textVectorField.setStoreTermVectorPositions(false); textVectorField.setStoreTermVectorPayloads(false); textVectorField.setStoreTermVectorOffsets(false); textVectorField.setStored(true); textVectorField.freeze(); File indexFolder = new File(folder); if (indexFolder.exists()) DirectoryUtilities.deleteDir(indexFolder); Directory dir = FSDirectory.open(indexFolder); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new Field("F", "word1 word2 w3 word4", textVectorField)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("F", "word1 word2 w3", textVectorField)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder))); for (int i = 0; i < reader.numDocs(); i++) { TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { System.out.print(text.utf8ToString() + ","); } System.out.println(); } IndexSearcher searcher = new IndexSearcher(reader); // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); // mlt.setMinTermFreq(0); // mlt.setMinDocFreq(0); // mlt.setMaxDocFreq(9999); // mlt.setMinWordLen(0); // mlt.setMaxWordLen(9999); // mlt.setMaxDocFreqPct(100); // mlt.setMaxNumTokensParsed(9999); // mlt.setMaxQueryTerms(9999); // mlt.setStopWords(null); // mlt.setFieldNames(new String[] { "F" }); // mlt.setAnalyzer(new UsagiAnalyzer()); // Query query = mlt.like("F", new StringReader("Systolic blood pressure")); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer); Query query = parser.parse("word1"); Explanation explanation = searcher.explain(query, 0); print(explanation); System.out.println(); explanation = searcher.explain(query, 1); print(explanation); System.out.println(); TopDocs topDocs = searcher.search(query, 99); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F")); } }
From source file:org.talend.dataquality.standardization.index.Explainer.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: Explainer <index dir> <query>"); System.exit(1);/*w ww . j a v a 2s.com*/ } String indexDir = args[0]; String queryExpression = args[1]; Directory directory = FSDirectory.open(new File(indexDir)); QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new SimpleAnalyzer()); Query query = parser.parse(queryExpression); System.out.println("Query: " + queryExpression); IndexSearcher searcher = new IndexSearcher(directory); TopDocs topDocs = searcher.search(query, 10); for (ScoreDoc match : topDocs.scoreDocs) { Explanation explanation = searcher.explain(query, match.doc); System.out.println("----------"); Document doc = searcher.doc(match.doc); System.out.println(doc.get("title")); System.out.println(explanation.toString()); } searcher.close(); directory.close(); }
From source file:practica1_2.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./*from w w w. j a v a 2s . c o m*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { System.out.println(searcher.explain(query, hits[i].doc)); if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:practica1_2.SearchFiles_P2.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from w w w . j ava 2 s .co m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { System.out.println(searcher.explain(query, hits[i].doc)); if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:sisTradicional.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./*from w w w .ja v a 2s . co m*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(IndexSearcher searcher, Query query, String id, PrintWriter writer) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 9999); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; for (int i = start; i < 50; i++) { Document doc = searcher.doc(hits[i].doc); Path path = Paths.get(doc.get("path")); //System.out.printf("%s\t%s\n",id, path.getFileName()); writer.printf("%s\t%s\n", id, path.getFileName()); String modified = doc.get("modified"); if (path != null) { System.out.println((i + 1) + ". " + path); System.out.println(" modified: " + new Date(Long.parseLong(modified)).toString()); System.out.println(searcher.explain(query, hits[i].doc)); } else { System.out.println((i + 1) + ". " + "No path for this document"); } } }