List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:ead.editor.model.ModelIndex.java
License:Open Source License
/** * Get a (sorted) list of nodes that match a query */// w ww . jav a2 s . co m public List<DependencyNode> searchAll(String queryText, Map<Integer, DependencyNode> nodesById) { ArrayList<DependencyNode> nodes = new ArrayList<DependencyNode>(); try { IndexReader reader = IndexReader.open(searchIndex); Query query = getQueryAllParser().parse(queryText); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { String nodeId = searcher.doc(hit.doc).get(editorIdFieldName); nodes.add(nodesById.get(Integer.parseInt(nodeId))); } searcher.close(); } catch (Exception e) { logger.error("Error parsing or looking up query '{}' in index", queryText, e); } return nodes; }
From source file:ead.editor.model.ModelIndex.java
License:Open Source License
/** * Get a (sorted) list of nodes that match a query *//*from w w w .j a va 2 s .co m*/ public List<DependencyNode> search(String field, String queryText, Map<Integer, DependencyNode> nodesById) { ArrayList<DependencyNode> nodes = new ArrayList<DependencyNode>(); try { IndexReader reader = IndexReader.open(searchIndex); Query query = new QueryParser(Version.LUCENE_35, field, searchAnalyzer).parse(queryText); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { String nodeId = searcher.doc(hit.doc).get(editorIdFieldName); nodes.add(nodesById.get(Integer.parseInt(nodeId))); } searcher.close(); } catch (Exception e) { logger.error("Error parsing or looking up query '{}' in index", queryText, e); } return nodes; }
From source file:edu.albany.ir.example.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * // ww w.jav a 2s . c o m * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected. * */ public static void doPagingSearch(BufferedReader in, String out_f, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, String q_id) throws IOException { // Collect enough docs to show 5 pages raw = true; // only show raw TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; BufferedWriter out = new BufferedWriter(new FileWriter(out_f, true)); int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { /* * if (end > hits.length) { System.out.println("Only results 1 - " + * hits.length +" of " + numTotalHits + * " total matching documents collected."); * System.out.println("Collect more (y/n) ?"); String line = * in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { * break; } * * hits = searcher.search(query, numTotalHits).scoreDocs; } */ end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); float score = hits[i].score; String path = doc.get("path"); if (path != null) { path = path.substring(path.lastIndexOf("\\") + 1); out.write(q_id + " 0 " + path + " " + i + " " + score + " CSI550\n"); System.out.println(q_id + " 0 " + path + " " + i + " " + score + " CSI550"); } else { String url = doc.get("url"); if (url != null) { System.out.println(i + ". " + url); System.out.println(" - " + doc.get("title")); } else { System.out.println(i + ". " + "No path nor URL for this document"); } } if (true) continue; if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } doc = searcher.doc(hits[i].doc); path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } out.close(); if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:edu.cmu.cs.in.search.HoopLuceneSearch.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./*from w ww .ja v a2 s . c o m*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; debug(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { debug("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); debug("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format debug("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { debug((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { debug(" Title: " + doc.get("title")); } } else { debug((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } debug("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { debug("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.GazInfoIndexerAllCountries.java
License:Apache License
public static void main(String argv[]) throws Exception { if (argv.length < 1) throw new Exception("Command line argument number wrong"); GazInfoIndexerAllCountries gi = new GazInfoIndexerAllCountries(); // argv[0]="-write"; String mode = argv[0];// w ww . j a v a 2 s.c o m if (mode.equals("-write")) { String argv1 = GlobalParam.getGeoNames() + "/allCountries.txt"; String argv2 = GlobalParam.getGazIndex() + "/InfoIndex"; BufferedReader br = GetReader.getUTF8FileReader(argv1); IndexWriter iw = GetWriter.getIndexWriter(argv2, 1400); iw.deleteAll(); gi.indexGazatteer(br, iw); iw.close(); br.close(); } if (mode.equals("-read")) { System.out.println("input id. Output basic information. For debugging."); // query first two fields. String argv1 = GlobalParam.getGazIndex() + "/GazIndex/"; IndexSearcher is = GetReader.getIndexSearcher(argv1, "disk"); BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8")); String line; while ((line = r.readLine()) != null) { long id; try { id = Long.parseLong(line); } catch (Exception e) { System.err.println("number wrong."); continue; } Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true); long start = System.currentTimeMillis(); TopDocs docs = is.search(q, 1); if (docs == null) { System.err.println("Not found."); continue; } if (docs.scoreDocs.length == 0) { System.err.println("Not found."); continue; } ScoreDoc sd = docs.scoreDocs[0]; Document d = is.doc(sd.doc); long end = System.currentTimeMillis(); System.out.println(d); System.out.println(d.get("ID")); System.out.println(d.get("ORIGIN")); System.out.println(d.get("LONGTITUDE") + " " + d.get("LATITUDE")); System.out.println("lookup time: " + (end - start)); } } }
From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.GazStringIndexerAllCountries.java
License:Apache License
public static void main(String argv[]) throws Exception { if (argv.length < 1) throw new Exception("Command line argument number wrong"); GazStringIndexerAllCountries gi = new GazStringIndexerAllCountries(); // argv[0] = "-read"; String mode = argv[0];/*from w w w. java 2 s. c o m*/ if (mode.equals("-write")) { String argv1 = GlobalParam.getGeoNames() + "/allCountries.txt"; String argv2 = GlobalParam.getGazIndex() + "/StringIndex/"; BufferedReader br = GetReader.getUTF8FileReader(argv1); IndexWriter iw = GetWriter.getIndexWriter(argv2, 1600); iw.deleteAll(); gi.indexGazatteer(br, iw); // iw.optimize(); iw.close(); br.close(); } if (mode.equals("-read")) { System.out.println("input id. Output basic information. For debugging."); // query first two fields. String argv1 = GlobalParam.getGazIndex() + "/StringIndex"; IndexSearcher is = GetReader.getIndexSearcher(argv1, "disk"); BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8")); String line; while ((line = r.readLine()) != null) { long id; try { id = Long.parseLong(line); } catch (Exception e) { System.err.println("number wrong."); continue; } Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true); long start = System.currentTimeMillis(); TopDocs docs = is.search(q, 1); if (docs == null) { System.err.println("Not found."); continue; } if (docs.scoreDocs.length == 0) { System.err.println("Not found."); continue; } ScoreDoc sd = docs.scoreDocs[0]; Document d = is.doc(sd.doc); long end = System.currentTimeMillis(); System.out.println(d); System.out.println(d.get("ID")); System.out.println(d.get("LOWERED_ORIGIN")); } } }
From source file:edu.cmu.geolocator.resource.gazindexing.CollaborativeIndex.GazStringIndexerAltNames.java
License:Apache License
public static void main(String argv[]) throws Exception { if (argv.length < 1) throw new Exception("Command line argument number wrong"); GazStringIndexerAltNames stringIndex = new GazStringIndexerAltNames(); // argv[0] = "-read"; String mode = argv[0];//from ww w.j a v a2 s . com if (mode.equals("-write")) { String argv1 = GlobalParam.getGeoNames() + "/alternateNames.txt"; String argv2 = GlobalParam.getGazIndex() + "/StringIndex/"; BufferedReader br = GetReader.getUTF8FileReader(argv1); //add the list that adds the items into the gazetteer. String argv3 = "res/geonames/addition.txt"; BufferedReader br2 = GetReader.getUTF8FileReader(argv3); IndexWriter iw = GetWriter.getIndexWriter(argv2, 1024 + 512); stringIndex.indexAlterNames(br, iw); stringIndex.indexAlterNames(br2, iw); // iw.optimize(); iw.close(); br.close(); } if (mode.equals("-read")) { System.out.println("input id. Output basic information. For debugging."); // query first two fields. String argv1 = GlobalParam.getGazIndex() + "/StringIndex"; IndexSearcher is = GetReader.getIndexSearcher(argv1, "disk"); BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8")); String line; while ((line = r.readLine()) != null) { Query q = new TermQuery(new Term("LOWERED-NO-WS", line)); TermFilter filter = new TermFilter(new Term("LOWERED-NO-WS", line)); long start = System.currentTimeMillis(); // TopDocs docs = is.search(q, filter, 100); TopDocs docs = is.search(q, 100); long end = System.currentTimeMillis(); if (docs == null) { System.err.println("Not found."); continue; } if (docs.scoreDocs.length == 0) { System.err.println("Not found."); continue; } for (ScoreDoc sd : docs.scoreDocs) { Document d = is.doc(sd.doc); System.out.println(d); System.out.println(d.get("ID")); System.out.println(d.get("LOWERED_ORIGIN")); System.out.println(d.get("LANG")); } System.out.println(docs.totalHits); System.out.println("lookup time: " + (end - start)); } } }
From source file:edu.cmu.geoparser.resource.gazindexing.CollaborativeIndex.GazInfoIndexerAllCountries.java
License:Apache License
public static void main(String argv[]) throws Exception { GazInfoIndexerAllCountries gi = new GazInfoIndexerAllCountries(); argv[0] = "-write"; String mode = argv[0];/*from ww w .j av a 2 s. co m*/ if (mode.equals("-write")) { if (argv.length != 3) throw new Exception("Command line argument number wrong"); argv[1] = "GeoNames/allCountries.txt"; argv[2] = "GazIndex/InfoIndex"; BufferedReader br = GetReader.getUTF8FileReader(argv[1]); IndexWriter iw = GetWriter.getIndexWriter(argv[2], 1400); iw.deleteAll(); gi.indexGazatteer(br, iw); iw.close(); br.close(); } if (mode.equals("-read")) { System.out.println("input id. Output basic information. For debugging."); // query first two fields. argv[1] = "/Users/Indri/Eclipse_workspace/GazIndex/"; IndexSearcher is = GetReader.getIndexSearcher(argv[1], "disk"); BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8")); String line; while ((line = r.readLine()) != null) { long id; try { id = Long.parseLong(line); } catch (Exception e) { System.err.println("number wrong."); continue; } Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true); long start = System.currentTimeMillis(); TopDocs docs = is.search(q, 1); if (docs == null) { System.err.println("Not found."); continue; } if (docs.scoreDocs.length == 0) { System.err.println("Not found."); continue; } ScoreDoc sd = docs.scoreDocs[0]; Document d = is.doc(sd.doc); long end = System.currentTimeMillis(); System.out.println(d); System.out.println(d.get("ID")); System.out.println(d.get("ORIGIN")); System.out.println(d.get("LONGTITUDE") + " " + d.get("LATITUDE")); System.out.println("lookup time: " + (end - start)); } } }
From source file:edu.cmu.geoparser.resource.gazindexing.CollaborativeIndex.GazStringIndexerAllCountries.java
License:Apache License
public static void main(String argv[]) throws Exception { GazStringIndexerAllCountries gi = new GazStringIndexerAllCountries(); argv[0] = "-read"; String mode = argv[0];//ww w . j a va2 s .c o m if (mode.equals("-write")) { if (argv.length != 3) throw new Exception("Command line argument number wrong"); argv[1] = "GeoNames/allCountries.txt"; argv[2] = "GazIndex/StringIndex/"; BufferedReader br = GetReader.getUTF8FileReader(argv[1]); IndexWriter iw = GetWriter.getIndexWriter(argv[2], 1600); iw.deleteAll(); gi.indexGazatteer(br, iw); // iw.optimize(); iw.close(); br.close(); } if (mode.equals("-read")) { System.out.println("input id. Output basic information. For debugging."); // query first two fields. argv[1] = "GazIndex/StringIndex"; IndexSearcher is = GetReader.getIndexSearcher(argv[1], "disk"); BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8")); String line; while ((line = r.readLine()) != null) { long id; try { id = Long.parseLong(line); } catch (Exception e) { System.err.println("number wrong."); continue; } Query q = NumericRangeQuery.newLongRange("ID", id, id, true, true); long start = System.currentTimeMillis(); TopDocs docs = is.search(q, 1); if (docs == null) { System.err.println("Not found."); continue; } if (docs.scoreDocs.length == 0) { System.err.println("Not found."); continue; } ScoreDoc sd = docs.scoreDocs[0]; Document d = is.doc(sd.doc); long end = System.currentTimeMillis(); System.out.println(d); System.out.println(d.get("ID")); System.out.println(d.get("LOWERED_ORIGIN")); } } }
From source file:edu.cmu.geoparser.resource.gazindexing.CollaborativeIndex.GazStringIndexerAltNames.java
License:Apache License
public static void main(String argv[]) throws Exception { GazStringIndexerAltNames stringIndex = new GazStringIndexerAltNames(); argv[0] = "-read"; String mode = argv[0];/*from w w w . j a v a 2 s .co m*/ if (mode.equals("-write")) { if (argv.length != 3) throw new Exception("Command line argument number wrong"); argv[1] = "GeoNames/alternateNames.txt"; argv[2] = "GazIndex/StringIndex/"; BufferedReader br = GetReader.getUTF8FileReader(argv[1]); IndexWriter iw = GetWriter.getIndexWriter(argv[2], 1024 + 512); stringIndex.indexAlterNames(br, iw); // iw.optimize(); iw.close(); br.close(); } if (mode.equals("-read")) { System.out.println("input id. Output basic information. For debugging."); // query first two fields. argv[1] = "GazIndex/StringIndex"; IndexSearcher is = GetReader.getIndexSearcher(argv[1], "disk"); BufferedReader r = new BufferedReader(new InputStreamReader(System.in, "utf-8")); String line; while ((line = r.readLine()) != null) { Query q = new TermQuery(new Term("LOWERED-NO-WS", line)); TermFilter filter = new TermFilter(new Term("LOWERED-NO-WS", line)); long start = System.currentTimeMillis(); // TopDocs docs = is.search(q, filter, 100); TopDocs docs = is.search(q, 100); long end = System.currentTimeMillis(); if (docs == null) { System.err.println("Not found."); continue; } if (docs.scoreDocs.length == 0) { System.err.println("Not found."); continue; } for (ScoreDoc sd : docs.scoreDocs) { Document d = is.doc(sd.doc); System.out.println(d); System.out.println(d.get("ID")); System.out.println(d.get("LOWERED_ORIGIN")); System.out.println(d.get("LANG")); } System.out.println(docs.totalHits); System.out.println("lookup time: " + (end - start)); } } }