List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.LuceneTermRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null; try {// w w w . java2 s .co m Map<String, Double> terms = ratingsDao.getRatings(userid); Query query = buildQuery(terms); int hitsPerPage = count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter filter = new SeenArticlesFilter(viewsDao, userid); searcher = manager.acquire(); manager.maybeRefresh(); searcher.search(query, filter, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int stop = (start + count < hits.length ? start + count : hits.length); List<RecommendedNewsItem> results = new ArrayList<>(stop - start); for (int i = start; i < stop; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); results.add(toNewsitem(d, docId, hits[i].score, "termRecommender")); //System.out.println(docId); //System.out.println(searcher.explain(query, docId).toString()); } return results; } catch (RatingsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } finally { try { manager.release(searcher); } catch (IOException ex) { logger.error(ex); } searcher = null; } }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.PersonalAndTrendingRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { count = count / 2;//from ww w. ja v a 2 s.co m List<RecommendedNewsItem> results = super.recommend(userid, start, count); IndexSearcher searcher = null; try { Map<String, Double> terms = ratingsDao.getRatings(userid); Query query = buildQuery(terms); int hitsPerPage = start + count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter f1 = new UniqueResultsFilter(results); Filter f2 = new RecentFilter("timestamp", 1000 * 60 * 60 * 24); Filter f = new ChainedFilter(new Filter[] { f1, f2 }, ChainedFilter.AND); searcher = manager.acquire(); manager.maybeRefresh(); searcher.search(query, f, collector); ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs; for (ScoreDoc s : hits) { int docId = s.doc; Document d = searcher.doc(docId); RecommendedNewsItem item = toNewsitem(d, docId, s.score, "personal"); results.add(item); } //Collections.sort(results); } catch (RatingsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } return results; }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TopNRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null; try {/*from ww w . j av a 2 s .c o m*/ List<Long> ids = viewsDao.getNMostSeenArticles(start, start + count); Query query = buildQuery(ids); int hitsPerPage = count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter filter = new SeenArticlesFilter(viewsDao, userid); searcher = manager.acquire(); searcher.search(query, filter, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int stop = (start + count < hits.length ? start + count : hits.length); List<RecommendedNewsItem> results = new ArrayList<>(stop - start); for (int i = start; i < stop; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); results.add(toNewsitem(d, docId, hits[i].score, "topN")); } return results; } catch (ViewsDaoException | IOException ex) { throw new RecommendationException(ex); } finally { if (searcher != null) { try { manager.release(searcher); } catch (IOException ex) { logger.error(ex); } searcher = null; } } }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TrendingTopicRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null; try {//from www . j a va 2 s .co m String[] trends = trendsDao.getTrends(250); Query query = buildQuery(trends); int hitsPerPage = start + count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); //Filter filter = new SeenArticlesFilter(viewsDao, userid); Filter f = new RecentFilter("timestamp", 1000 * 60 * 60 * 24); manager.maybeRefresh(); searcher = manager.acquire(); searcher.search(query, f, collector); ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs; List<RecommendedNewsItem> results = new ArrayList<>(hits.length); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); RecommendedNewsItem item = toNewsitem(d, docId, hit.score, "trending"); results.add(item); } return results; } catch (TrendsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } finally { try { if (searcher != null) { manager.release(searcher); } } catch (IOException ex) { logger.error(ex); } searcher = null; } }
From source file:book.Searcher.java
License:Apache License
public static void search(String indexDir, String q) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir).toPath())); IndexSearcher is = new IndexSearcher(reader); //alt: Directory dir = FSDirectory.open(new File(indexDir)); // 3 //alt: IndexSearcher is = IndexSearcher(dir); // 3 QueryParser parser = new QueryParser("contents", // 4 new StandardAnalyzer()); // 4 Query query = parser.parse(q); // 4 long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); // 5 long end = System.currentTimeMillis(); System.err.println("Found " + hits.totalHits + // 6 " document(s) (in " + (end - start) + // 6 " milliseconds) that matched query '" + // 6 q + "':"); // 6 for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); // 7 System.out.println(doc.get("fullpath")); // 8 }/*from ww w . ja v a 2 s . c o m*/ //was: is.close(); // 9 }
From source file:br.andrew.lucene.testing.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from w ww . j a v a 2s . co m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(final BufferedReader in, final IndexSearcher searcher, final Query query, final int hitsPerPage, final boolean raw, final boolean interactive) throws IOException { // Collect enough docs to show 5 pages final TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; final int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); final String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } final Document doc = searcher.doc(hits[i].doc); final String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); final String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); final String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { final int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:br.bireme.ngrams.CompareResults.java
private static void checkDocs(final String similarity, final String docId1, final String docId2, final IndexSearcher isearcher, final BufferedWriter bwriter) throws IOException { assert similarity != null; assert docId1 != null; assert docId2 != null; assert isearcher != null; assert bwriter != null; final Query query1 = new TermQuery(new Term("id", docId1)); final Query query2 = new TermQuery(new Term("id", docId2)); final TopDocs top1 = isearcher.search(query1, 1); final TopDocs top2 = isearcher.search(query2, 1); final ScoreDoc[] scores1 = top1.scoreDocs; final ScoreDoc[] scores2 = top2.scoreDocs; if ((scores1.length > 0) && (scores2.length > 0)) { final Document doc1 = isearcher.doc(scores1[0].doc); final Document doc2 = isearcher.doc(scores2[0].doc); writeDocDifferences(similarity, doc1, doc2, bwriter); }//from www . jav a 2 s . co m }
From source file:br.bireme.ngrams.NGrams.java
private static void searchRaw(final Parameters parameters, final IndexSearcher searcher, final NGAnalyzer analyzer, final NGramDistance ngDistance, final String text, final boolean useSimilarity, final Set<String> id_id, final Set<Result> results) throws IOException, ParseException { assert parameters != null; assert searcher != null; assert analyzer != null; assert ngDistance != null; assert id_id != null; assert results != null; if (text == null) { throw new NullPointerException("text"); }// w ww . ja v a 2s . c o m final String text2 = StringEscapeUtils.unescapeHtml4(text); final String[] param = text2.trim().split(" *\\| *", Integer.MAX_VALUE); if (param.length != parameters.nameFields.size()) { throw new IOException(text); } final String fname = parameters.indexed.name; final QueryParser parser = new QueryParser(fname, analyzer); final String ntext = Tools .limitSize(Tools.normalize(param[parameters.indexed.pos], OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim(); final int MAX_RESULTS = 20; if (!ntext.isEmpty()) { final Query query = parser.parse(QueryParser.escape(ntext)); final TopDocs top = searcher.search(query, MAX_RESULTS); final float lower = parameters.scores.first().minValue; ScoreDoc[] scores = top.scoreDocs; int remaining = MAX_RESULTS; for (ScoreDoc sdoc : scores) { if (remaining-- <= 0) { break; // Only for performance } final Document doc = searcher.doc(sdoc.doc); if (useSimilarity) { final String dname = doc.get(fname); if (dname == null) { throw new IOException("dname"); } final float similarity = ngDistance.getDistance(ntext, doc.get(fname)); if (similarity < lower) { if (remaining > 3) { remaining = 3; //System.out.println("Atualizando tot=" + tot + " score=" + sdoc.score + " similarity=" + similarity+ " text=" + doc.get(fname)); } } else { final Result out = createResult(id_id, parameters, param, doc, ngDistance, similarity, sdoc.score); if (out != null) { results.add(out); } } } else { if (sdoc.score < 1.0) { System.out.println("Saindo score=" + sdoc.score); break; // Only for performance } final Result out = createResult(id_id, parameters, param, doc, ngDistance, 0, sdoc.score); if (out != null) { results.add(out); } } } } }
From source file:br.com.crawlerspring.model.Searcher.java
public List<br.com.crawlerspring.model.Document> parametrizeDocuments(String parameters) throws Exception { List<br.com.crawlerspring.model.Document> parametrizedDocuments = new ArrayList<br.com.crawlerspring.model.Document>(); RegexQuery q = new RegexQuery(new Term("title", ".*" + parameters + ".*")); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector);/* w w w . ja v a2 s. co m*/ ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int cont = 0; cont < hits.length; ++cont) { br.com.crawlerspring.model.Document document = new br.com.crawlerspring.model.Document(); int docId = hits[cont].doc; org.apache.lucene.document.Document luceneDocument = searcher.doc(docId); document.setTitle(luceneDocument.get("title")); document.setContent(luceneDocument.get("content")); parametrizedDocuments.add(document); } return parametrizedDocuments; }
From source file:br.usp.icmc.gazetteer.SemanticSearchTest.LuceneSearcher.java
License:Open Source License
public void busca(IndexSearcher reader, String q) throws ParseException, IOException { QueryParser parser = new QueryParser(Version.LUCENE_36, "content", a); Query query = parser.parse(q); TopDocs hits = reader.search(query, topK); ScoreDoc[] docs = hits.scoreDocs;// w w w .j av a2s. c o m // Iterate through the results: int i = 0; for (ScoreDoc d : docs) { Document doc = reader.doc(d.doc); documentospesquisados.add(new Documento(numQ, doc.get("filename"), 0)); } }