List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:com.evoapps.lucene.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from w ww .j ava2 s.c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public ArrayList<Publication> doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + "total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); String content = doc.get("contents"); if (path != null) { // System.out.println((i+1) + ". " + path+">>"+content); list.add(new Publication(content, path)); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } return list; }
From source file:com.example.search.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * //from w w w . j a v a 2s . c om * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected. * * @throws InvalidTokenOffsetsException * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, ArrayList<SearchResult> result, int startPage) throws IOException, InvalidTokenOffsetsException { startPage = Math.max(0, startPage); // Collect enough docs to show 5 pages 10 // System.out.println("need " + startPage); TopDocs results = searcher.search(query, startPage + hitsPerPage);// 5 * // hitsPerPage ScoreDoc[] hits = results.scoreDocs; if (startPage > hits.length) return; int numTotalHits = results.totalHits; // System.out.println("judge " + hits.length + " " + numTotalHits); System.out.println(numTotalHits + " total matching documents"); int start = startPage; // int end = Math.min(numTotalHits, hitsPerPage); /* * if (end > hits.length) { System.out .println("Only results 1 - " + * hits.length + " of " + numTotalHits + * " total matching documents collected."); * System.out.println("Collect more (y/n) ?"); String line = * in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { * break; } * * hits = searcher.search(query, numTotalHits).scoreDocs; } */ int end = Math.min(hits.length, start + hitsPerPage); // ICTCLASAnalyzer analyzer = new ICTCLASAnalyzer(); for (int i = start; i < end; i++) { if (raw) { // output raw format // System.out.println("doc="+hits[i].doc+" score="+hits[i].score); System.out.println(" score=" + hits[i].score); // continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("url"); if (path != null) { // System.out.println((i + 1) + ". " + path); String title = doc.get("title");// // if (title != null) { // System.out.println(" Title: " + title); String content = doc.get("content"); // if (content != null) // System.out.println("Content: " + content); SearchResult item = new SearchResult(); item.title = i + " " + title; item.url = path; item.score = hits[i].score; /*SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter( "<font color='red'>", "</font>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(100));*/ // if (content != null) { /* TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content)); String highLightText = highlighter.getBestFragment( tokenStream, content); System.out.println(" " + (i + 1) + " "); System.out.println(highLightText); item.content=highLightText;*/ if (content.length() > 403) content = content.substring(0, 399) + "..."; item.content = content; result.add(item); //} } else { System.out.println((i + 1) + ". " + "No path for this document"); } } // end for // analyzer.close(); // if (numTotalHits >= end) { // boolean quit = false; // while (true) { // System.out.print("Press "); // if (start - hitsPerPage >= 0) { // System.out.print("(p)revious page, "); SearchResult.hasPrePage = (start - hitsPerPage >= 0); // } // if (start + hitsPerPage < numTotalHits) { // System.out.print("(n)ext page, "); // } SearchResult.hasNextPage = (start + hitsPerPage < numTotalHits); System.out.println("hasNextPage" + SearchResult.hasNextPage); // System.out // .println("(q)uit or enter number to jump to a page."); // String line = in.readLine(); /* * String line=new String(); if (line.length() == 0 || line.charAt(0) == * 'q') { quit = true; break; } */ /* * if (line.charAt(0) == 'p') { start = Math.max(0, start - * hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + * hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { * int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < * numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { * System.out.println("No such page"); } } // } if (quit) break; */ // end = Math.min(numTotalHits, start + hitsPerPage); }
From source file:com.flaptor.hounder.indexer.IndexManager.java
License:Apache License
/** * Performs the deletes and remove duplicates from the index. *//*from ww w . jav a 2 s . c om*/ private synchronized void applyDeletes() { IndexReader reader = null; IndexSearcher searcher = null; try { reader = IndexReader.open(indexDirectory); Set<Integer> documentsToDelete = new HashSet<Integer>(); Enumeration keysEnum = lastOperation.keys(); //First we collect the lucene ids of document to be deleted. while (keysEnum.hasMoreElements()) { searcher = new IndexSearcher(reader); String key = (String) keysEnum.nextElement(); // if the last operation is a delete lastAddition will be 0 and we'll find no match in the index. //This way, all the documents with that DocumentId will be erased. String lastAddition = String.valueOf((Long) (lastOperation.get(key))); if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: searching " + docIdName + " = [" + key + "]"); } ScorelessHitCollector collector = new HashSetScorelessHitCollector(); searcher.search(new TermQuery(new Term(docIdName, key)), collector); Set<Integer> docIds = collector.getMatchingDocuments(); if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: found matches: " + docIds.size()); } for (Integer docId : docIds) { Document d = searcher.doc(docId); String addId = d.get("AddId"); if (!lastAddition.equals(addId)) { if (logger.isEnabledFor(Level.DEBUG)) { logger.debug("Applying deletes: deleting AddId:" + addId); } documentsToDelete.add(docId); } } } //Now we have all lucene's ids of documents to be deleted and we can //proceed with the actual deletion. for (Integer i : documentsToDelete) { reader.deleteDocument(i); } } catch (IOException e) { logger.fatal("applyDeletes: IOException caught.", e); throw new RuntimeException(e); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception e) { String s = "applyDeletes: Couldn't close searcher, nothing I can do about it" + e; logger.error(s); throw new IllegalStateException(s); } } if (reader != null) { try { reader.close(); } catch (Exception e) { logger.warn("Couldn't close reader, nothing I can do about it", e); } } } lastOperation.clear(); }
From source file:com.flaptor.hounder.searcher.spell.SpellChecker.java
License:Apache License
/** * Suggest similar words (restricted or not to a field of a user index) * @param word String the word you want a spell check done on * @param num_sug int the number of suggest words * @param ir the indexReader of the user index (can be null see field param) * @param field String the field of the user index: if field is not null, the suggested * words are restricted to the words present in this field. * @param morePopular boolean return only the suggest words that are more frequent than the searched word * (only if restricted mode = (indexReader!=null and field!=null) * @throws IOException//from w w w .j av a 2 s.com * @return String[] the sorted list of the suggest words with this 2 criteria: * first criteria: the edit distance, second criteria (only if restricted mode): the popularity * of the suggest words in the field of the user index */ public String[] suggestSimilar(String word, int num_sug, IndexReader ir, String field, boolean morePopular) throws IOException { float minScore = min; final TRStringDistance sd = new TRStringDistance(word); final int lengthWord = word.length(); final int goalFreq = (morePopular && ir != null) ? ir.docFreq(new Term(field, word)) : 0; if (!morePopular && goalFreq > 0) { return new String[] { word }; // return the word if it exist in the index and i don't want a more popular word } BooleanQuery query = new BooleanQuery(); String[] grams; String key; for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) { key = "gram" + ng; // form key grams = formGrams(word, ng); // form word into ngrams (allow dups too) if (grams.length == 0) { continue; // hmm } if (bStart > 0) { // should we boost prefixes? add(query, "start" + ng, grams[0], bStart); // matches start of word } if (bEnd > 0) { // should we boost suffixes add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word } for (int i = 0; i < grams.length; i++) { add(query, key, grams[i]); } } IndexSearcher searcher = new IndexSearcher(this.spellindex); TopDocCollector collector = new TopDocCollector(10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers searcher.search(query, collector); ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs; SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug); SuggestWord sugword = new SuggestWord(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(i); sugword.string = doc.get(F_WORD); // get orig word) if (sugword.string.equals(word)) { continue; // don't suggest a word for itself, that would be silly } //edit distance/normalize with the min word length sugword.score = doc.getBoost() * (1.0f - ((float) sd.getDistance(sugword.string) / Math.min(sugword.string.length(), lengthWord))); if (sugword.score < minScore) { continue; } if (ir != null) { // use the user index sugword.freq = ir.docFreq(new Term(field, sugword.string)); // freq in the index if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1) { // don't suggest a word that is not present in the field continue; } } sugqueue.insert(sugword); if (sugqueue.size() == num_sug) { //if queue full , maintain the min score minScore = ((SuggestWord) sugqueue.top()).score; } sugword = new SuggestWord(); } // convert to array string String[] list = new String[sugqueue.size()]; for (int i = sugqueue.size() - 1; i >= 0; i--) { list[i] = ((SuggestWord) sugqueue.pop()).string; } searcher.close(); return list; }
From source file:com.flaptor.hounder.util.Idx.java
License:Apache License
public static void main(String arg[]) throws Exception { check(arg.length > 1, null);/* w ww. j av a 2s.co m*/ String cmd = arg[0]; File idx = new File(arg[1]); if ("list".equals(cmd)) { int num = (arg.length > 2) ? Integer.parseInt(arg[2]) : -1; check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); int docs = reader.numDocs(); int max = reader.maxDoc(); System.err.println("Index contains " + docs + " documents plus " + (max - docs) + " deleted."); if (num > -1) { if (num == 0) num = docs; for (int i = 0; i < max && i < num; i++) { System.out.println("----------------------------------------"); if (!reader.isDeleted(i)) { Document doc = reader.document(i); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); String attr = (fld.isIndexed() ? ",i" : "") + (fld.isStored() ? ",s" : "") + (fld.isTokenized() ? ",t" : ""); System.out.println(fld.name() + attr + ": " + fld.stringValue()); } } } reader.close(); System.out.println(); } } else if ("search".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); ScorelessHitCollector collector = new HashSetScorelessHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); Set<Integer> docIds = collector.getMatchingDocuments(); System.out.println("\nNumber of hits: " + docIds.size() + "\n"); for (Integer docId : docIds) { Document doc = searcher.doc(docId); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); System.out.println(fld.name() + ": " + fld.stringValue()); } } searcher.close(); System.out.println(); } else if ("delete".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexReader reader = IndexReader.open(idx); reader.deleteDocuments(new Term(field, value)); reader.close(); } else if ("optimize".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.optimize(); writer.close(); } else if ("merge".equals(cmd)) { check(arg.length == 3, "not enough parameters"); File idx2 = new File(arg[2]); check(idx.exists(), "Index dir 1 not found"); check(idx2.exists(), "Index dir 2 not found"); IndexReader reader = IndexReader.open(idx2); IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.addIndexes(new IndexReader[] { reader }); writer.close(); reader.close(); } else if ("term-count".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); String field = arg[2]; int count = 0; TermEnum terms = reader.terms(); while (terms.next()) { Term term = terms.term(); if (term.field().equals(field)) count++; } terms.close(); reader.close(); System.out.println("Found " + count + " different values for field " + field); } else if ("hit-count".equals(cmd)) { check(arg.length > 3, "Not enough arguments"); check(idx.exists(), "Index dir not found"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); CountingHitCollector collector = new CountingHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); System.out.println("\nNumber of hits: " + collector.getDocCount() + "\n"); searcher.close(); } else if ("uncompound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(false); writer.optimize(); writer.close(); } else if ("compound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(true); writer.optimize(); writer.close(); } else if ("terms".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); String field = arg[2]; IndexReader reader = IndexReader.open(idx); TermEnum terms = reader.terms(); while (terms.next()) { Term t = terms.term(); if (t.field().equals(field)) { System.out.println(t.text()); } } } }
From source file:com.flycode.CRIBSearch.SearchEngine.Demo.SearchFiles.java
License:Apache License
/** * Simple command-line based search demo. *///from ww w . j a v a2 s .c om public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0); } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:com.fuerve.villageelder.actions.results.SearchResultItemTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.actions.results.SearchResultItem#SearchResultItem(org.apache.lucene.search.TopDocs, java.util.List)}. *///from w ww. j a v a 2 s. c om @Test public final void testSearchResultItem() throws Exception { Directory indexDirectoryExpected = new RAMDirectory(); Directory taxonomyDirectoryExpected = new RAMDirectory(); buildDummyIndex(indexDirectoryExpected, taxonomyDirectoryExpected); IndexReader reader = DirectoryReader.open(indexDirectoryExpected); IndexSearcher searcher = new IndexSearcher(reader); TaxonomyReader taxo = new DirectoryTaxonomyReader(taxonomyDirectoryExpected); QueryParser parser = new SearchQueryParser(Lucene.LUCENE_VERSION, Lucene.DEFAULT_QUERY_FIELD, Lucene.getPerFieldAnalyzer()); TopFieldCollector indexCollector = getDummyCollector(); FacetsCollector facetsCollector = getDummyFacetsCollector((DirectoryReader) reader, taxo); Collector collector = MultiCollector.wrap(indexCollector, facetsCollector); searcher.search(parser.parse("Revision:5*"), collector); facetsCollector.getFacetResults(); SearchResultItem target = new SearchResultItem(indexCollector.topDocs(), facetsCollector.getFacetResults()); assertEquals(2, target.getTopDocs().totalHits); assertEquals(1, target.getFacetResults().size()); }
From source file:com.fuerve.villageelder.actions.results.SearchResultTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.actions.results.SearchResult#aggregate(com.fuerve.villageelder.actions.results.SearchResultItem)}. *//*from w w w . ja va 2s . co m*/ @Test public final void testAggregateSearchResultItem() throws Exception { Directory indexDirectoryExpected = new RAMDirectory(); Directory taxonomyDirectoryExpected = new RAMDirectory(); buildDummyIndex(indexDirectoryExpected, taxonomyDirectoryExpected); IndexReader reader = DirectoryReader.open(indexDirectoryExpected); IndexSearcher searcher = new IndexSearcher(reader); TaxonomyReader taxo = new DirectoryTaxonomyReader(taxonomyDirectoryExpected); QueryParser parser = new SearchQueryParser(Lucene.LUCENE_VERSION, Lucene.DEFAULT_QUERY_FIELD, Lucene.getPerFieldAnalyzer()); TopFieldCollector indexCollector = getDummyCollector(); FacetsCollector facetsCollector = getDummyFacetsCollector((DirectoryReader) reader, taxo); Collector collector = MultiCollector.wrap(indexCollector, facetsCollector); searcher.search(parser.parse("Revision:5*"), collector); facetsCollector.getFacetResults(); SearchResult target = new SearchResult(); target.aggregate(new SearchResultItem(indexCollector.topDocs(), facetsCollector.getFacetResults())); assertEquals(2, target.getTopDocs().totalHits); assertEquals(1, target.getFacetResults().size()); }
From source file:com.fuerve.villageelder.search.SearchQueryParserTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.search.SearchQueryParser#getRangeQuery(java.lang.String, java.lang.String, java.lang.String, boolean, boolean)}. * @throws Exception //from w ww .ja va2s . c o m */ @Test public final void testGetRangeQueryRevisionRange() throws Exception { QueryParser target = new SearchQueryParser(Lucene.LUCENE_VERSION, "Message", Lucene.getPerFieldAnalyzer()); Query testQuery = target.parse("RevisionNumber:[50 TO 100]"); assertEquals(NumericRangeQuery.class, testQuery.getClass()); IndexSearcher searcher = new IndexSearcher(buildDummyIndex()); ScoreDoc[] docs = searcher.search(testQuery, 10).scoreDocs; assertEquals(1, docs.length); }
From source file:com.fuerve.villageelder.search.SearchQueryParserTest.java
License:Apache License
/** * Test method for {@link com.fuerve.villageelder.search.SearchQueryParser#getRangeQuery(java.lang.String, java.lang.String, java.lang.String, boolean, boolean)}. * @throws Exception //w w w .java 2 s . c om */ @Test public final void testGetRangeQueryRevisionSingleton() throws Exception { QueryParser target = new SearchQueryParser(Lucene.LUCENE_VERSION, "Message", Lucene.getPerFieldAnalyzer()); Query testQuery = target.parse("Revision:50"); assertEquals(TermQuery.class, testQuery.getClass()); IndexSearcher searcher = new IndexSearcher(buildDummyIndex()); ScoreDoc[] docs = searcher.search(testQuery, 10).scoreDocs; assertEquals(1, docs.length); }