List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.termExtract.LuceneTopTermExtract.java
License:Apache License
/** * Returns the 10 most important terms in the document with the specified * id./*from w ww .j ava2 s .c o m*/ * * @param id * @param reader * @param numberOfTerms * @return */ public Map<String, Double> getTopTerms(String id, IndexReader reader, int numberOfTerms) { try { IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); Query q = new TermQuery(new Term("id", id)); searcher.search(q, collector); if (collector.getTotalHits() > 0) { int docNr = collector.topDocs().scoreDocs[0].doc; return getTopTerms(docNr, reader, numberOfTerms); } else { logger.warn("No document found with id=" + id); } } catch (IOException ex) { logger.error(ex); } return new HashMap<>(0); }
From source file:be.ugent.tiwi.sleroux.newsrec.recommendationstester.LuceneTopTermExtract.java
License:Apache License
public Map<String, Double> getTopTerms(String id, IndexReader reader, int numberOfTerms) { try {//from w w w . j a va2 s . c o m IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); Query q = new TermQuery(new Term("id", id)); searcher.search(q, collector); if (collector.getTotalHits() > 0) { int docNr = collector.topDocs().scoreDocs[0].doc; return getTopTerms(docNr, reader, numberOfTerms); } else { logger.warn("No document found with id=" + id); } } catch (IOException ex) { logger.error(ex); } return new HashMap<>(0); }
From source file:be.ugent.tiwi.sleroux.newsrec.stormNewsFetch.storm.bolts.NewsItemToTermsBolt.java
License:Apache License
private void updateTermMap(DirectoryReader reader, IndexSearcher searcher, Map<String, Double> termMap, long id, String field, double weight) throws IOException { Query query = NumericRangeQuery.newLongRange("id", id, id, true, true); TopDocs topdocs = searcher.search(query, 1); if (topdocs.totalHits > 0) { int docNr = topdocs.scoreDocs[0].doc; Terms vector = reader.getTermVector(docNr, field); if (vector != null) { TermsEnum termsEnum;/*from w w w. j a v a 2s. c o m*/ termsEnum = vector.iterator(TermsEnum.EMPTY); BytesRef text; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); int docFreq = reader.docFreq(new Term(field, text)); // ignore really rare terms and really common terms //double minFreq = reader.numDocs() * 0.0001; //double maxFreq = reader.numDocs() / 3; double minFreq = 0; double maxFreq = Double.MAX_VALUE; if (docFreq > minFreq && docFreq < maxFreq) { double tf = 1 + ((double) termsEnum.totalTermFreq()) / reader.getSumTotalTermFreq(field); double idf = Math.log((double) reader.numDocs() / docFreq); if (!Double.isInfinite(idf)) { if (!termMap.containsKey(term)) { termMap.put(term, tf * idf * weight); } else { termMap.put(term, termMap.get(term) + tf * idf * weight); } } } } } else { logger.debug("no term available for doc=" + docNr + " and field=" + field); } } else { logger.warn("No documents found with id=" + id); } }
From source file:book.Searcher.java
License:Apache License
public static void search(String indexDir, String q) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir).toPath())); IndexSearcher is = new IndexSearcher(reader); //alt: Directory dir = FSDirectory.open(new File(indexDir)); // 3 //alt: IndexSearcher is = IndexSearcher(dir); // 3 QueryParser parser = new QueryParser("contents", // 4 new StandardAnalyzer()); // 4 Query query = parser.parse(q); // 4 long start = System.currentTimeMillis(); TopDocs hits = is.search(query, 10); // 5 long end = System.currentTimeMillis(); System.err.println("Found " + hits.totalHits + // 6 " document(s) (in " + (end - start) + // 6 " milliseconds) that matched query '" + // 6 q + "':"); // 6 for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); // 7 System.out.println(doc.get("fullpath")); // 8 }//from w w w. jav a 2 s . com //was: is.close(); // 9 }
From source file:bostoncase.widgets.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w ww. j av a2s.com } String index = "/Users/michaelhundt/Documents/Meine/Studium/MASTER/MasterProject/data/lucene_index/"; String field = "content"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); // IndexReader reader = StandardDirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); if (line.equals("geo")) { // query = new GeoPointInBBoxQuery("geo", 42.2279, 42.3969, -70.9235, -71.1908); // query = new GeoPointInBBoxQuery("geo", -71.1908, -70.9235, 42.2279, 42.3969); query = new GeoPointInBBoxQuery("geo", 42.2279, 42.3969, -71.1908, -70.9235); } System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:br.andrew.lucene.testing.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits./*from ww w. ja v a2s .co m*/ * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(final BufferedReader in, final IndexSearcher searcher, final Query query, final int hitsPerPage, final boolean raw, final boolean interactive) throws IOException { // Collect enough docs to show 5 pages final TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; final int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); final String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } final Document doc = searcher.doc(hits[i].doc); final String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); final String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); final String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { final int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:br.bireme.ngrams.CompareResults.java
private static void checkDocs(final String similarity, final String docId1, final String docId2, final IndexSearcher isearcher, final BufferedWriter bwriter) throws IOException { assert similarity != null; assert docId1 != null; assert docId2 != null; assert isearcher != null; assert bwriter != null; final Query query1 = new TermQuery(new Term("id", docId1)); final Query query2 = new TermQuery(new Term("id", docId2)); final TopDocs top1 = isearcher.search(query1, 1); final TopDocs top2 = isearcher.search(query2, 1); final ScoreDoc[] scores1 = top1.scoreDocs; final ScoreDoc[] scores2 = top2.scoreDocs; if ((scores1.length > 0) && (scores2.length > 0)) { final Document doc1 = isearcher.doc(scores1[0].doc); final Document doc2 = isearcher.doc(scores2[0].doc); writeDocDifferences(similarity, doc1, doc2, bwriter); }/*from www .ja va2 s . c o m*/ }
From source file:br.bireme.ngrams.NGrams.java
private static void searchRaw(final Parameters parameters, final IndexSearcher searcher, final NGAnalyzer analyzer, final NGramDistance ngDistance, final String text, final boolean useSimilarity, final Set<String> id_id, final Set<Result> results) throws IOException, ParseException { assert parameters != null; assert searcher != null; assert analyzer != null; assert ngDistance != null; assert id_id != null; assert results != null; if (text == null) { throw new NullPointerException("text"); }//w ww. jav a2s. c om final String text2 = StringEscapeUtils.unescapeHtml4(text); final String[] param = text2.trim().split(" *\\| *", Integer.MAX_VALUE); if (param.length != parameters.nameFields.size()) { throw new IOException(text); } final String fname = parameters.indexed.name; final QueryParser parser = new QueryParser(fname, analyzer); final String ntext = Tools .limitSize(Tools.normalize(param[parameters.indexed.pos], OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim(); final int MAX_RESULTS = 20; if (!ntext.isEmpty()) { final Query query = parser.parse(QueryParser.escape(ntext)); final TopDocs top = searcher.search(query, MAX_RESULTS); final float lower = parameters.scores.first().minValue; ScoreDoc[] scores = top.scoreDocs; int remaining = MAX_RESULTS; for (ScoreDoc sdoc : scores) { if (remaining-- <= 0) { break; // Only for performance } final Document doc = searcher.doc(sdoc.doc); if (useSimilarity) { final String dname = doc.get(fname); if (dname == null) { throw new IOException("dname"); } final float similarity = ngDistance.getDistance(ntext, doc.get(fname)); if (similarity < lower) { if (remaining > 3) { remaining = 3; //System.out.println("Atualizando tot=" + tot + " score=" + sdoc.score + " similarity=" + similarity+ " text=" + doc.get(fname)); } } else { final Result out = createResult(id_id, parameters, param, doc, ngDistance, similarity, sdoc.score); if (out != null) { results.add(out); } } } else { if (sdoc.score < 1.0) { System.out.println("Saindo score=" + sdoc.score); break; // Only for performance } final Result out = createResult(id_id, parameters, param, doc, ngDistance, 0, sdoc.score); if (out != null) { results.add(out); } } } } }
From source file:br.com.crawlerspring.model.Searcher.java
public List<br.com.crawlerspring.model.Document> parametrizeDocuments(String parameters) throws Exception { List<br.com.crawlerspring.model.Document> parametrizedDocuments = new ArrayList<br.com.crawlerspring.model.Document>(); RegexQuery q = new RegexQuery(new Term("title", ".*" + parameters + ".*")); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int cont = 0; cont < hits.length; ++cont) { br.com.crawlerspring.model.Document document = new br.com.crawlerspring.model.Document(); int docId = hits[cont].doc; org.apache.lucene.document.Document luceneDocument = searcher.doc(docId); document.setTitle(luceneDocument.get("title")); document.setContent(luceneDocument.get("content")); parametrizedDocuments.add(document); }//from w w w .j av a2s . c om return parametrizedDocuments; }
From source file:br.usp.icmc.gazetteer.SemanticSearchTest.LuceneSearcher.java
License:Open Source License
public void busca(IndexSearcher reader, String q) throws ParseException, IOException { QueryParser parser = new QueryParser(Version.LUCENE_36, "content", a); Query query = parser.parse(q); TopDocs hits = reader.search(query, topK); ScoreDoc[] docs = hits.scoreDocs;/*from w w w . j a va 2 s . c o m*/ // Iterate through the results: int i = 0; for (ScoreDoc d : docs) { Document doc = reader.doc(d.doc); documentospesquisados.add(new Documento(numQ, doc.get("filename"), 0)); } }