List of usage examples for org.apache.lucene.search IndexSearcher search
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager) throws IOException
From source file:com.m3958.apps.pcms.lucene.facet.SimpleFacetsExample.java
License:Apache License
/** User runs a query and counts facets. */ private List<FacetResult> search() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); // Count both "Publish Date" and "Author" dimensions FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Publish Date"), 10), new CountFacetRequest(new CategoryPath("Author"), 10)); // Aggregates the facet counts FacetsCollector fc = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.search(new MatchAllDocsQuery(), fc); // Retrieve results List<FacetResult> facetResults = fc.getFacetResults(); indexReader.close();/*from w ww .j av a 2 s . c om*/ taxoReader.close(); return facetResults; }
From source file:com.m3958.apps.pcms.lucene.facet.SimpleFacetsExample.java
License:Apache License
/** User drills down on 'Publish Date/2010'. */ private List<FacetResult> drillDown() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); // Now user drills down on Publish Date/2010: FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10)); // Passing no baseQuery means we drill down on all // documents ("browse only"): DrillDownQuery q = new DrillDownQuery(fsp.indexingParams); q.add(new CategoryPath("Publish Date/2010", '/')); FacetsCollector fc = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader); searcher.search(q, fc); // Retrieve results List<FacetResult> facetResults = fc.getFacetResults(); indexReader.close();//from ww w. j a v a 2 s . c o m taxoReader.close(); return facetResults; }
From source file:com.m3958.apps.pcms.lucene.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from ww w . jav a 2 s . c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs;// // DBObject queryCondition = new BasicDBObject(); // //age in [13, 47] // queryCondition = new BasicDBObject(); // BasicDBList values = new BasicDBList(); // values.add(13); // values.add(47); // queryCondition.put("age", new BasicDBObject("$in", values)); // DBCursor dbCursor = coll.find(queryCondition); int numTotalHits = results.totalHits;// System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) {// output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1, String s2, String radioBtn) throws IOException, ParseException, InvalidTokenOffsetsException { //getting reference of directory Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words, takes out the stop words Analyzer analyzer = new StandardAnalyzer(); String contents = "contents"; QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); }/*ww w .j av a 2 s . com*/ Query q1 = parser.parse(s1); Query q2 = parser.parse(s2); //conjuction, disjunction and negation BooleanQuery.Builder bq = new BooleanQuery.Builder(); //occur.must : both queries required in a doc if (radioBtn.equals("conjunction")) { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST); bq.build(); } //occur.should: one of the q1 should be presen t in doc else if (radioBtn.equals("disjunction")) { bq.add(q1, BooleanClause.Occur.SHOULD); bq.add(q2, BooleanClause.Occur.SHOULD); bq.build(); } //negation: first should present , second should not else { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST_NOT); bq.build(); } TopDocs hits = searcher.search(bq.build(), 10); Formatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException { //Get directory reference Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader. IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words Analyzer analyzer = new StandardAnalyzer(); //Query parser to be used for creating TermQuery String queries = null;/*from w w w . ja va 2s. c o m*/ String queryString = null; //regular search String contents = "contents"; BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); } Query q1 = parser.parse(s1); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(q1, BooleanClause.Occur.MUST); //Search the lucene documents TopDocs hits = searcher.search(bq.build(), 10); // TopScoreDocCollector collector = TopScoreDocCollector.create(5); /** * Highlighter Code Start *** */ //Uses HTML <B></B> tag to highlight the searched terms Formatter formatter = new SimpleHTMLFormatter(); //It scores cont fragments by the number of unique q1 terms found //Basically the matching score in layman terms QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); //Iterate over found results for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); //int rank = hits.scoreDocs.length; int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); // String title = doc.get("title"); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.mathworks.xzheng.advsearching.MultiFieldQueryParserTest.java
License:Apache License
public void testDefaultOperator() throws Exception { Query query = new MultiFieldQueryParser(Version.LUCENE_46, new String[] { "title", "subject" }, new SimpleAnalyzer(Version.LUCENE_46)).parse("development"); Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir)); TopDocs hits = searcher.search(query, 10); assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Ant in Action")); assertTrue(TestUtil.hitsIncludeTitle( //A searcher, //A hits, //A "Extreme Programming Explained")); //A dir.close();//from w w w. ja v a 2s . com }
From source file:com.mathworks.xzheng.advsearching.MultiFieldQueryParserTest.java
License:Apache License
public void testSpecifiedOperator() throws Exception { Query query = MultiFieldQueryParser.parse(Version.LUCENE_46, "lucene", new String[] { "title", "subject" }, new BooleanClause.Occur[] { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST }, new SimpleAnalyzer(Version.LUCENE_46)); Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir)); TopDocs hits = searcher.search(query, 10); assertTrue(TestUtil.hitsIncludeTitle(searcher, hits, "Lucene in Action, Second Edition")); assertEquals("one and only one", 1, hits.scoreDocs.length); dir.close();/*from w w w . j a v a2 s. co m*/ }
From source file:com.mathworks.xzheng.advsearching.MultiSearcherTest.java
License:Apache License
public void testMulti() throws Exception { MultiReader multiReader = new MultiReader(searchers[0].getIndexReader(), searchers[1].getIndexReader()); IndexSearcher searcher = new IndexSearcher(multiReader); TermRangeQuery query = new TermRangeQuery("animal", // #3 new BytesRef("h"), // #3 new BytesRef("t"), // #3 true, true);// #3 TopDocs hits = searcher.search(query, 10); assertEquals("tarantula not included", 12, hits.totalHits); }
From source file:com.mathworks.xzheng.advsearching.TimeLimitingCollectorTest.java
License:Apache License
public void testTimeLimitingCollector() throws Exception { Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir)); Query q = new MatchAllDocsQuery(); int numAllBooks = TestUtil.hitCount(searcher, q); TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); Collector collector = new TimeLimitingCollector(topDocs, // #A null, 1000); // #A try {/* w ww. j a va 2 s . c om*/ searcher.search(q, collector); assertEquals(numAllBooks, topDocs.getTotalHits()); // #B } catch (TimeExceededException tee) { // #C System.out.println("Too much time taken."); // #C } // #C dir.close(); }
From source file:com.mathworks.xzheng.analysis.codec.MetaphoneAnalyzerTest.java
License:Apache License
public void testKoolKat() throws Exception { RAMDirectory directory = new RAMDirectory(); Analyzer analyzer = new MetaphoneReplacementAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); Document doc = new Document(); doc.add(new Field("contents", //#A "cool cat", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);/*from w w w.ja v a2 s. c om*/ writer.close(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory)); Query query = new QueryParser(Version.LUCENE_46, //#B "contents", analyzer) //#B .parse("kool kat"); //#B TopDocs hits = searcher.search(query, 1); assertEquals(1, hits.totalHits); //#C int docID = hits.scoreDocs[0].doc; doc = searcher.doc(docID); assertEquals("cool cat", doc.get("contents")); //#D }