List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
List<ClassificationResult<BytesRef>> buildListFromTopDocs(IndexSearcher searcher, String categoryFieldName, TopDocs topDocs, int k) throws IOException { Map<BytesRef, Integer> classCounts = new HashMap<>(); Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs float maxScore = topDocs.getMaxScore(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField storableField = searcher.doc(scoreDoc.doc).getField(categoryFieldName); if (storableField != null) { BytesRef cl = new BytesRef(storableField.stringValue()); //update count Integer count = classCounts.get(cl); if (count != null) { classCounts.put(cl, count + 1); } else { classCounts.put(cl, 1);//from ww w . j av a2s.c o m } //update boost, the boost is based on the best score Double totalBoost = classBoosts.get(cl); double singleBoost = scoreDoc.score / maxScore; if (totalBoost != null) { classBoosts.put(cl, totalBoost + singleBoost); } else { classBoosts.put(cl, singleBoost); } } } List<ClassificationResult<BytesRef>> returnList = new ArrayList<>(); List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>(); int sumdoc = 0; for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) { Integer count = entry.getValue(); Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1 temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k)); sumdoc += count; } //correction if (sumdoc < k) { for (ClassificationResult<BytesRef> cr : temporaryList) { returnList.add( new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc)); } } else { returnList = temporaryList; } return returnList; }
From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java
License:Open Source License
public TableResultResponse<IndexObject> page(Integer pageNumber, Integer pageSize, String keyword) { IndexReader indexReader = null;/*from w w w .ja va2 s . c o m*/ TableResultResponse<IndexObject> pageQuery = null; List<IndexObject> searchResults = new ArrayList<>(); try { indexReader = DirectoryReader.open(this.getDirectory()); IndexSearcher indexSearcher = new IndexSearcher(indexReader); Query query = QueryUtil.query(keyword, this.getAnalyzer(), "title", "descripton"); ScoreDoc lastScoreDoc = this.getLastScoreDoc(pageNumber, pageSize, query, indexSearcher); /*?documentsearchAfter */ TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize); Highlighter highlighter = this.addStringHighlighter(query); log.info("??{}", keyword); log.info("{}", topDocs.totalHits); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docID = scoreDoc.doc; float score = scoreDoc.score; Document document = indexSearcher.doc(docID); IndexObject indexObject = DocumentUtil.document2IndexObject(this.getAnalyzer(), highlighter, document, score); searchResults.add(indexObject); log.info("" + score); } Collections.sort(searchResults); pageQuery = new TableResultResponse<>(topDocs.totalHits, searchResults); } catch (Exception e) { e.printStackTrace(); } finally { try { indexReader.close(); } catch (IOException e) { e.printStackTrace(); } } return pageQuery; }
From source file:com.globalsight.ling.lucene.Hits.java
License:Apache License
private void dealHits(IndexSearcher searcher, ScoreDoc[] p_hits, int end, int begin, float p_minScore, String text) throws IOException { m_hits = new ArrayList(end); text = text.toLowerCase();// w ww .j a v a 2s. co m if (p_hits.length < end) { end = p_hits.length; } for (int i = begin, max = end; i < max; i++) { float score = p_hits[i].score; if (score < p_minScore) { break; } Document doc = searcher.doc(p_hits[i].doc); String str = doc.get(IndexDocument.TEXT).toLowerCase(); if (text.indexOf(str) > -1 || str.indexOf(text) > -1) { m_hits.add(new Hit(doc.get(IndexDocument.MAINID), doc.get(IndexDocument.SUBID), doc.get(IndexDocument.TEXT), score)); } } }
From source file:com.globalsight.ling.lucene.Index.java
License:Apache License
/** * Executes a search in the index returning no more than p_maxHits * (suggested: 5-10), and having no score smaller than p_minScore. * * This implementation is based on Lucene and Lucene score values * float widely, making it hard to specify a useful cut-off like * 0.7 or 0.5. Good scores can be < 0.2. All that is guaranteed is * that scores are numerically ordered. Use p_maxHits instead. *//*from w ww . ja va2s. c o m*/ public Hits search(String p_text, int end, int begin, float p_minScore) throws IOException, InterruptedException { synchronized (m_state) { if (m_state != STATE_OPENED) { throw new IOException("index is not available"); } } try { m_lock.readLock().acquire(); try { // Search the current index. //IndexReader reader = DirectoryReader.open(m_fsDir); //IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = LuceneCache.getLuceneCache(m_directory).getIndexSearcher(); Query query = getQuery(p_text); int maxHits = end - begin; TopDocs topDocs = searcher.search(query, maxHits); if (topDocs.totalHits > 0) { noResult = false; } // Store results in our own object. Hits result = new Hits(searcher, topDocs.scoreDocs, end, begin, p_minScore, p_text); // Highlight query terms in long results. if (m_type == TYPE_TEXT) { // Note: rewrite MultiTermQuery, RangeQuery or PrefixQuery. // TODO: optimize object creation if it all works. Highlighter highlighter = new Highlighter(new SimpleFormatter(), new QueryScorer(query)); int max = Math.min(end, topDocs.totalHits); for (int i = begin; i < max; i++) { Document doc = searcher.doc(topDocs.scoreDocs[i].doc); String text = doc.get(IndexDocument.TEXT); TokenStream tokenStream = m_analyzer.tokenStream(IndexDocument.TEXT, new StringReader(text)); tokenStream.reset(); // Get 3 best fragments and separate with "..." String hilite = highlighter.getBestFragments(tokenStream, text, 3, "..."); result.getHit(i).setText(hilite); } } //searcher.close(); // reader.close(); return result; } finally { m_lock.readLock().release(); } } catch (InterruptedException ex) { throw new IOException(ex.getMessage()); } }
From source file:com.google.gerrit.server.change.ReviewerSuggestionCache.java
License:Apache License
List<AccountInfo> search(String query, int n) throws IOException { IndexSearcher searcher = get(); if (searcher == null) { return Collections.emptyList(); }//from w w w . j a v a2 s. c o m List<String> segments = Splitter.on(' ').omitEmptyStrings().splitToList(query.toLowerCase()); BooleanQuery q = new BooleanQuery(); for (String field : ALL) { BooleanQuery and = new BooleanQuery(); for (String s : segments) { and.add(new PrefixQuery(new Term(field, s)), Occur.MUST); } q.add(and, Occur.SHOULD); } TopDocs results = searcher.search(q, n); ScoreDoc[] hits = results.scoreDocs; List<AccountInfo> result = new LinkedList<>(); for (ScoreDoc h : hits) { Document doc = searcher.doc(h.doc); IndexableField idField = checkNotNull(doc.getField(ID)); AccountInfo info = new AccountInfo(idField.numericValue().intValue()); info.name = doc.get(NAME); info.email = doc.get(EMAIL); info.username = doc.get(USERNAME); result.add(info); } return result; }
From source file:com.greplin.interval.BaseIntervalQueryTest.java
License:Apache License
protected void assertSearch(IndexSearcher searcher, Query query, Integer... expectedResults) throws IOException { Set<Integer> expected = ImmutableSet.copyOf(expectedResults); TopDocs docs = searcher.search(query, 100); Set<Integer> actual = Sets.newHashSet(); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); actual.add(Integer.valueOf(doc.get("id"))); }/*from w w w . j a va 2 s .co m*/ Assert.assertEquals(query + " should match [" + Joiner.on(", ").join(expectedResults) + "]", expected, actual); }
From source file:com.hin.hl7messaging.LicenseeSearchService.java
public Object fillData(ScoreDoc[] hits, IndexSearcher searcher, SearchVO searchVO) throws Exception { List<ProfileVO> profileVOList = new ArrayList<ProfileVO>(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); ProfileVO profileVO = new ProfileVO(); profileVO.setPrefixName(d.get("namePrefix")); profileVO.setSuffixName(d.get("nameSuffix")); profileVO.setName(profileVO.getFullName()); profileVO.setSubscriberId(d.get("subscriberId")); profileVO.setImageBase64(d.get("image")); profileVO.setRole(d.get("Role")); profileVO.setTelecom(d.get("telecom")); profileVOList.add(profileVO);/* ww w .java 2s . co m*/ } return profileVOList; }
From source file:com.icdd.lucence.SearchFiles.java
License:Apache License
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages Date starttime = new Date(); TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + "total matching documents"); Date endtime = new Date(); System.out.println("one search cost :" + (endtime.getTime() - starttime.getTime()) + "ms"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; }/*from w w w. ja v a2 s . c o m*/ hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title:" + title); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press"); if (start - hitsPerPage > 0) { System.out.print("(p)revious page,"); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page,"); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page."); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:com.icdd.lucence.SearchFiles.java
License:Apache License
public static List<Document> doPagingSearchWeb(List<Document> docs, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw) throws IOException { // Collect enough docs to show 5 pages Date starttime = new Date(); TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + "total matching documents"); Date endtime = new Date(); System.out.println("one search cost :" + (endtime.getTime() - starttime.getTime()) + "ms"); System.out.println("?"); for (ScoreDoc scoreDoc : hits) { docs.add(searcher.doc(scoreDoc.doc)); }//from w ww.j a va2 s. c o m return docs; }
From source file:com.ikon.analysis.SearchDemo.java
License:Open Source License
/** * Search in documents/*from w ww. j a v a2 s . c o m*/ */ private static void search(Directory index, Analyzer analyzer, String str) throws ParseException, CorruptIndexException, IOException { IndexReader reader = IndexReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(NUM_HITS, true); //Query q = new QueryParser(Config.LUCENE_VERSION, DOC_FIELD, analyzer).parse(str); Query q = new WildcardQuery(new Term(DOC_FIELD, str)); System.out.println("Query: " + q); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get(DOC_FIELD)); } searcher.close(); }