List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:lucenesearch.TagBodyCount.java
public void calculatePR(String[] bodyTerms, int N) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(new Searcher().getPostIndexPath()))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); HashSet<Integer> found = new HashSet<>(); HashSet<Integer> total = new HashSet<>(); System.out.println("Calculating word itself: " + searchTag); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new QueryParser("Body", analyzer).parse(searchTag), BooleanClause.Occur.MUST); booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST); TopDocs results;/*from w w w . j a v a 2 s. c om*/ results = searcher.search(booleanQuery.build(), N); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, N); int count_r = 0; int count_n = 0; int skip = 0; for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); if (doc.get("SId") == null) { skip++; continue; } int id = Integer.parseInt(doc.get("SId")); if (!hasTag(id, mainTag)) { continue; } if (this.acceptedAnswers.contains(id)) { found.add(id); count_r++; } else { count_n++; } total.add(id); } System.out.println("Total Post Cnt = " + count_r + "/" + this.acceptedAnswers.size()); System.out.println("Total skipped Post = " + skip); double[] P = new double[bodyTerms.length + 1]; double[] R = new double[bodyTerms.length + 1]; int cnt = 0; P[cnt] = (double) (count_r) / (count_r + count_n); R[cnt] = (double) count_r / (acceptedAnswers.size()); cnt++; for (String bodyTerm : bodyTerms) { HashSet<Integer> temp = new HashSet<>(); System.out.println("Query for: " + bodyTerm); booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new QueryParser("Body", analyzer).parse(bodyTerm), BooleanClause.Occur.MUST); booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST); results = searcher.search(booleanQuery.build(), N); hits = results.scoreDocs; numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); start = 0; end = Math.min(numTotalHits, N); count_r = 0; count_n = 0; skip = 0; for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); if (doc.get("SId") == null) { skip++; continue; } int id = Integer.parseInt(doc.get("SId")); if (!hasTag(id, searchTag)) { skip++; continue; } if (this.acceptedAnswers.contains(id)) { found.add(id); count_r++; } else { count_n++; } total.add(id); } P[cnt] = (double) found.size() / total.size(); R[cnt] = (double) found.size() / (acceptedAnswers.size()); cnt++; System.out.println("Total Post Cnt = " + count_r + "/" + count_n + "/" + this.acceptedAnswers.size()); System.out.println("Total skipped Post = " + skip); System.out.println("-----------------"); } // System.out.println("-----Final Count-----"); // System.out.println("Self,"+((double)self.size()/acceptedAnswers.size())*100); // for (int i = 0; i < cnt; i++) // { // System.out.println("Tr"+(i+1)+","+((double)counts[i]/acceptedAnswers.size())*100); // } System.out.println("-----Final Accum Count-----"); // System.out.println("Self,"+((double)self.size()/acceptedAnswers.size())*100); // for (int i = 0; i < cnt; i++) // { // System.out.println("Tr"+(i+1)+","+((double)accum_counts[i]/acceptedAnswers.size())*100); // } System.out.println("Cnt,Method,Value"); for (int i = 0; i < cnt; i++) { System.out.println((i) + "," + "Precision" + "," + P[i] * 100); System.out.println((i) + "," + "Recall" + "," + R[i] * 100); } }
From source file:lucenesearch.TagBodyCount.java
public void calculateVenn(String[] bodyTerms, int N) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(new Searcher().getPostIndexPath()))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); ArrayList<HashSet<Integer>> sets = new ArrayList<>(); // HashSet<?>[] sets = new HashSet<?>[bodyTerms.length + 1]; System.out.println("Calculating word itself: " + searchTag); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new QueryParser("Body", analyzer).parse(searchTag), BooleanClause.Occur.MUST); booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST); TopDocs results;/* www.j av a 2 s . c om*/ results = searcher.search(booleanQuery.build(), N); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, N); int count = 0; int skip = 0; sets.add(0, acceptedAnswers); HashSet<Integer> temp = new HashSet<Integer>(); sets.add(1, new HashSet<>()); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); if (doc.get("SId") == null) { skip++; continue; } int id = Integer.parseInt(doc.get("SId")); if (this.acceptedAnswers.contains(id)) { sets.get(1).add(id); } } System.out.println("Total Post Cnt = " + count + "/" + this.acceptedAnswers.size()); System.out.println("Total skipped Post = " + skip); int[] counts = new int[bodyTerms.length]; int[] accum_counts = new int[bodyTerms.length]; int cnt = 0; int arrayIndex = 2; for (String bodyTerm : bodyTerms) { sets.add(arrayIndex, new HashSet<>()); System.out.println("Query for: " + bodyTerm); booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new QueryParser("Body", analyzer).parse(bodyTerm), BooleanClause.Occur.MUST); booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST); results = searcher.search(booleanQuery.build(), N); hits = results.scoreDocs; numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); start = 0; end = Math.min(numTotalHits, N); count = 0; skip = 0; for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); if (doc.get("SId") == null) { skip++; continue; } int id = Integer.parseInt(doc.get("SId")); if (this.acceptedAnswers.contains(id)) { sets.get(arrayIndex).add(id); } } arrayIndex++; counts[cnt] = temp.size(); accum_counts[cnt] = cnt == 0 ? temp.size() : accum_counts[cnt - 1] + temp.size(); cnt++; // System.out.println("Total Post Cnt = " + count + "/" + this.acceptedAnswers.size()); // System.out.println("Total skipped Post = " + skip); System.out.println("-----------------"); } System.out.println("-------------------\nFinal Res\n-------------\n"); int pow = 1; for (int i = 0; i < bodyTerms.length + 1; i++) pow *= 2; HashSet<Integer> temp2 = new HashSet<>(); for (HashSet<Integer> hs : sets) { temp2.addAll(hs); } int size = temp2.size(); for (int i = 1; i <= pow - 1; i++) { ArrayList<Integer> numbers = new ArrayList<>(); // int rem = 2; int dig = 2; int n = i; while (n != 0) { if (n % 2 == 1) { numbers.add(dig); } n /= 2; dig++; } // System.out.println(numbers); temp = new HashSet<>(); temp.addAll(sets.get(numbers.get(0) - 2)); for (Integer number : numbers) { temp.retainAll(sets.get(number - 2)); //-1 to include self translation and accepted } String s = ""; if (numbers.size() == 1) s = "area"; else s = "n"; for (Integer number : numbers) { s = s + (number - 1); } // s += "="+((double)temp.size() / acceptedAnswers.size())+","; s += "=" + (temp.size()) + ","; System.out.println(s); } String s = "category = c(\"All\",\"" + this.searchTag + "\","; for (String t : bodyTerms) { s = s + "\"" + t + "\","; } s += "),"; System.out.println(s); // System.out.println("-----Final Count-----"); // System.out.println("Self," + ((double) self.size() / acceptedAnswers.size()) * 100); // for (int i = 0; i < cnt; i++) // { // System.out.println("Tr" + (i + 1) + "," + ((double) counts[i] / acceptedAnswers.size()) * 100); // } // System.out.println("-----Final Accum Count-----"); // System.out.println("Self,"+((double)self.size()/acceptedAnswers.size())*100); // for (int i = 0; i < cnt; i++) // { // System.out.println("Tr"+(i+1)+","+((double)accum_counts[i]/acceptedAnswers.size())*100); // } // System.out.println("Cnt,Method,Value"); // for (int i = 0; i < cnt; i++) // { // System.out.println((i + 1) + "," + "Translation" + "," + ((double) accum_counts[i] / acceptedAnswers.size()) * 100); // System.out.println((i + 1) + "," + "self" + "," + ((double) self.size() / acceptedAnswers.size()) * 100); // } }
From source file:lucenesearch.TagScorer.java
public void caculate() throws IOException { HashMap<String, HashMap<Integer, Double>> tagUserScore = new HashMap<>(); int hitsPerPage = 2000000; String index = new Searcher().getPostIndexPath(); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(IntPoint.newExactQuery("PostTypeId", 2), BooleanClause.Occur.MUST); Query q = booleanQuery.build(); TopDocs results;// w w w.jav a2 s .c o m results = searcher.search(q, hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " Total answers found."); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); int errorUsers = 0; for (int i = start; i < end; i++) { System.out.println("processing answer " + i + "/" + end); int docID = hits[i].doc; int uid = -1; Document doc = searcher.doc(docID); Post p = new Post(doc); try { uid = Integer.parseInt(doc.get("SOwnerUserId")); ArrayList<String> tgs = tags.get(p.getId()); for (String tg : tgs) { if (!tagUserScore.containsKey(tg)) { tagUserScore.put(tg, new HashMap<>()); } HashMap<Integer, Double> temp = tagUserScore.get(tg); if (!temp.containsKey(uid)) { temp.put(uid, 1.0); } else { temp.replace(uid, 1 + temp.get(uid)); } } } catch (Exception ex) { errorUsers++; continue; } } System.out.println("tag:map,p@1,p@5,p@10"); for (Map.Entry<String, HashMap<Integer, Double>> entryM : tagUserScore.entrySet()) { String tag = entryM.getKey(); HashMap<Integer, Double> userScores = entryM.getValue(); String goldenFile = Utility.getGoldenFileName(tag); ValueComparator bvc = new ValueComparator(userScores); TreeMap<Integer, Double> sorted_map = new TreeMap<Integer, Double>(bvc); sorted_map.putAll(userScores); ArrayList<Integer> lst = new ArrayList<>(); for (Map.Entry<Integer, Double> entry : sorted_map.entrySet()) { lst.add(entry.getKey()); } Evaluator ev = new Evaluator(); Balog b = new Balog(); double map = ev.map(lst, b.getGoldenList(goldenFile)); double p1 = ev.precisionAtK(lst, b.getGoldenList(goldenFile), 1); double p5 = ev.precisionAtK(lst, b.getGoldenList(goldenFile), 5); double p10 = ev.precisionAtK(lst, b.getGoldenList(goldenFile), 10); EvalResult er = new EvalResult(tag, map, p1, p5, p10); System.out.println(er.getMap() + "," + er.getP1() + "," + er.getP5() + "," + er.getP10()); } }
From source file:lucenesearche.HW3.java
public static void main(String[] args) throws IOException { System.out.println(//from ww w. ja v a 2 s . c o m "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)"); String indexLocation = null; BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String s = br.readLine(); HW3 indexer = null; try { indexLocation = s; indexer = new HW3(s); } catch (Exception ex) { System.out.println("Cannot create index..." + ex.getMessage()); System.exit(-1); } String query1, query2, query3, query4; query1 = "Lucene_Results_Stopped.txt"; query2 = "Lucene_Q2_top100.txt"; query3 = "Lucene_Q3_top100.txt"; query4 = "Lucene_Q4_top100.txt"; File luceneFile = new File(query1); // change filename for each query int query_id; // =================================================== // read input from user until he enters q for quit // =================================================== while (!s.equalsIgnoreCase("q")) { try { System.out.println( "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)"); System.out.println("[Acceptable file types: .xml, .html, .html, .txt]"); s = br.readLine(); if (s.equalsIgnoreCase("q")) { break; } // try to add file into the index indexer.indexFileOrDirectory(s); } catch (Exception e) { System.out.println("Error indexing " + s + " : " + e.getMessage()); } } // =================================================== // after adding, we always have to call the // closeIndex, otherwise the index is not created // =================================================== indexer.closeIndex(); // ========================================================= // Now search // ========================================================= IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); //TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); Formatter f = new Formatter(); s = ""; File file1 = new File( "C:\\Users\\shantanu\\Downloads\\NetBeansProjects\\LuceneSearchE\\src\\lucenesearche\\query_stopped.txt"); ScoreDoc[] hits; try { BufferedReader b = new BufferedReader(new FileReader(file1)); query_id = 1; FileInputStream fis = new FileInputStream( "C:\\Users\\shantanu\\Downloads\\NetBeansProjects\\LuceneSearchE\\src\\lucenesearche\\query_stopped.txt"); Scanner scanner = new Scanner(fis); luceneFile.createNewFile(); FileWriter writer = new FileWriter(luceneFile); while (scanner.hasNextLine()) { String line; //line = b.readLine(); line = scanner.nextLine(); if (line == null) break; System.out.println(b.readLine()); //s = br.readLine(); if (s.equalsIgnoreCase("q")) { break; } TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(line); searcher.search(q, collector); //System.out.println(searcher); hits = collector.topDocs().scoreDocs; System.out.println(hits.length); // 4. display results // change this for new query //writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s","Query ID","Q0","Document Name","Rank","Cosine Similarity Score","System Name\n")); System.out.println("Found " + hits.length + " hits."); //System.out.println(f.format("%-10s %-10s %-80s %-10s %-40s %-20s","Query ID","Q0","Document Name","Rank","Cosine Similarity Score","System Name")); for (int i = 0; i < hits.length; ++i) { Formatter fmt = new Formatter(); int docId = hits[i].doc; Document d = searcher.doc(docId); //System.out.println(d.get("filename")); //System.out.println((i+1) +". " + d.get("path")+" "+ hits[i].score); String a = d.get("filename"); String parts = a.substring(0, a.indexOf('.')); //System.out.println(parts); writer.append(String.format("%-10s %-10s %-30s %-10s %-30s", query_id, "Q0", parts, (i + 1), hits[i].score)); writer.append('\n'); writer.flush(); //System.out.println(fmt.format("%-10s %-10s %-80s %-10s %-40s %-20s",""+query_id,"Q0",""+d.get("path"),""+(i + 1),""+hits[i].score,"Shantanu-SYS-001")); } // 5. term stats --> watch out for which "version" of the term // must be checked here instead! /*Term termInstance = new Term("contents", s); long termFreq = reader.totalTermFreq(termInstance); long docCount = reader.docFreq(termInstance); System.out.println(s + " Term Frequency " + termFreq + " - Document Frequency " + docCount);*/ query_id += 1; } writer.close(); } catch (Exception e) { System.out.println("Error searching " + s + " : " + e.toString()); //break; } }
From source file:lucenesearche.HW3.java
public static void main(String[] args) throws IOException { System.out.println(/*from w w w. ja v a 2 s . c o m*/ "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)"); String indexLocation = null; BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String s = br.readLine(); HW3 indexer = null; try { indexLocation = s; indexer = new HW3(s); } catch (Exception ex) { System.out.println("Cannot create index..." + ex.getMessage()); System.exit(-1); } // =================================================== // read input from user until he enters q for quit // =================================================== while (!s.equalsIgnoreCase("q")) { try { System.out.println( "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)"); System.out.println("[Acceptable file types: .xml, .html, .html, .txt]"); s = br.readLine(); if (s.equalsIgnoreCase("q")) { break; } // try to add file into the index indexer.indexFileOrDirectory(s); } catch (Exception e) { System.out.println("Error indexing " + s + " : " + e.getMessage()); } } // =================================================== // after adding, we always have to call the // closeIndex, otherwise the index is not created // =================================================== indexer.closeIndex(); // ========================================================= // Now search // ========================================================= IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); Formatter f = new Formatter(); s = ""; while (!s.equalsIgnoreCase("q")) { try { System.out.println("Enter the search query (q=quit):"); s = br.readLine(); if (s.equalsIgnoreCase("q")) { break; } Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(s); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results String query1, query2, query3, query4; query1 = "Lucene_Q1_top100.txt"; query2 = "Lucene_Q2_top100.txt"; query3 = "Lucene_Q3_top100.txt"; query4 = "Lucene_Q4_top100.txt"; File luceneFile = new File(query4); // change filename for each query int query_id; query_id = 4; // change this for new query luceneFile.createNewFile(); FileWriter writer = new FileWriter(luceneFile); writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s", "Query ID", "Q0", "Document Name", "Rank", "Cosine Similarity Score", "System Name\n")); System.out.println("Found " + hits.length + " hits."); System.out.println(f.format("%-10s %-10s %-80s %-10s %-40s %-20s", "Query ID", "Q0", "Document Name", "Rank", "Cosine Similarity Score", "System Name")); for (int i = 0; i < hits.length; ++i) { Formatter fmt = new Formatter(); int docId = hits[i].doc; Document d = searcher.doc(docId); //System.out.println((i+1) +". " + d.get("path")+" "+ hits[i].score); writer.write(String.format("%-10s %-10s %-80s %-10s %-40s %-20s", "" + query_id, "Q0", "" + d.get("path"), "" + (i + 1), "" + hits[i].score, "Shantanu-SYS-001\n")); writer.flush(); System.out.println(fmt.format("%-10s %-10s %-80s %-10s %-40s %-20s", "" + query_id, "Q0", "" + d.get("path"), "" + (i + 1), "" + hits[i].score, "Shantanu-SYS-001")); } writer.close(); // 5. term stats --> watch out for which "version" of the term // must be checked here instead! Term termInstance = new Term("contents", s); long termFreq = reader.totalTermFreq(termInstance); long docCount = reader.docFreq(termInstance); System.out.println(s + " Term Frequency " + termFreq + " - Document Frequency " + docCount); } catch (Exception e) { System.out.println("Error searching " + s + " : " + e.getMessage()); break; } } }
From source file:magoffin.matt.lucene.LuceneSearchService.java
License:Open Source License
@Override public List<?> build(String index, final TopDocCollector hits, final int start, final int end) { final LucenePlugin plugin = getPluginForString(index); final int length = end > start ? end - start : 0; final ScoreDoc[] docs = hits.topDocs().scoreDocs; final int hitLength = docs.length; final List<Object> searchMatches = new ArrayList<Object>(length); doIndexSearcherOp(index, new IndexSearcherOp() { @Override//from w w w . j av a 2 s .co m public void doSearcherOp(String type, IndexSearcher searcher) throws IOException { for (int i = start; i < end && i < hitLength; i++) { int docId = docs[i].doc; Document doc = searcher.doc(docId); searchMatches.add(plugin.build(doc)); } } }); return searchMatches; }
From source file:magoffin.matt.lucene.LuceneSearchService.java
License:Open Source License
@Override public void search(String index, String query, final LuceneSearchResultHandler handler) { Query luceneQuery = parseQuery(index, query); doIndexQueryOp(index, luceneQuery, ASYNCHRONOUS, new IndexQueryOp() { @SuppressWarnings("unchecked") @Override/*from w w w . j a v a 2 s. co m*/ public void doSearcherOp(String indexType, IndexSearcher searcher, Query myQuery, TopDocCollector hits) throws IOException { int numHits = hits == null ? 0 : hits.getTotalHits(); handler.setTotalMatches(numHits); ScoreDoc[] docs = hits == null ? null : hits.topDocs().scoreDocs; for (int i = 0; i < numHits; i++) { int docId = docs[i].doc; Document doc = searcher.doc(docId); List<Field> fields = doc.getFields(); Map<String, String[]> match = new LinkedHashMap<String, String[]>(); for (Field field : fields) { match.put(field.name(), doc.getValues(field.name())); } if (!handler.processMatch(match)) { break; } } } }); }
From source file:magoffin.matt.lucene.LuceneSearchService.java
License:Open Source License
@Override public LuceneSearchResults search(String type, String query, final int maxResults, final int pageSize, final int page) { Query luceneQuery = parseQuery(type, query); final LuceneSearchResultsImpl results = new LuceneSearchResultsImpl(); doIndexQueryOp(type, luceneQuery, ASYNCHRONOUS, new IndexQueryOp() { @SuppressWarnings({ "unchecked" }) @Override/*from w w w . ja v a 2s.c o m*/ public void doSearcherOp(String indexType, IndexSearcher searcher, Query myQuery, TopDocCollector hits) throws IOException { int numHits = hits == null ? 0 : hits.getTotalHits(); results.totalMatches = numHits; if (numHits > 0) { Set<String> seenFieldNames = new HashSet<String>(); results.results = new LinkedList<Map<String, String[]>>(); int start = 0; int max = -1; if (pageSize > 0) { start = pageSize * (page - 1); max = pageSize; } int maxr = maxResults < 1 ? numHits : maxResults; ScoreDoc[] docs = hits == null ? null : hits.topDocs().scoreDocs; for (int i = start; i < numHits && i < maxr && ((max--) != 0); i++) { int docId = docs[i].doc; Document doc = searcher.doc(docId); List<Field> fields = doc.getFields(); // use a TreeMap to keep keys sorted Map<String, String[]> data = new TreeMap<String, String[]>(); for (Field field : fields) { data.put(field.name(), doc.getValues(field.name())); } Set<String> fieldSet = new HashSet<String>(); fieldSet.addAll(data.keySet()); // see if doc was missing any seen fields... Collection<String> fill = CollectionUtils.subtract(seenFieldNames, fieldSet); if (fill.size() > 0) { for (String fieldName : fill) { data.put(fieldName, null); } } // see if any fields we have not seen yet... Collection<String> missing = CollectionUtils.subtract(fieldSet, seenFieldNames); // any keys in 'missing' need to be added to all previous // results so they all have same keys if (missing.size() > 0) { for (Map<String, String[]> map : results.results) { for (Iterator<String> itr = missing.iterator(); itr.hasNext();) { map.put(itr.next(), null); } } seenFieldNames.addAll(missing); } results.results.add(data); } } } }); return results; }
From source file:main.java.run.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./* www . j a v a 2s.c o m*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { double score = hits[i].score; System.out.println((i + 1) + ". " + path + ". " + score); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:Main.WebAPI.Search.java
/** * //from w w w. ja v a2 s. c om * @param args args[0] is a query * * @throws IOException * @throws ParseException * @throws InvalidTokenOffsetsException */ public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { //... Above, create documents with two fields, one with term vectors (tv) and one without (notv) Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexing")); String querystr = args.length > 0 ? args[0] : "mike lab"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr); // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); String Preview; for (int i = 0; i < 10; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String text; Preview = ""; System.out.println(doc.get("url")); System.out.println(doc.get("title")); text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); int k = 0; for (TextFragment frag1 : frag) { if ((frag1 != null) && (frag1.getScore() > 0)) { Preview += (frag1.toString()) + "...<br>"; k++; // Get 2 Line Preview if (k >= 2) break; } } //Term vector System.out.println("-------------"); } }