List of usage examples for org.apache.lucene.search IndexSearcher rewrite
public Query rewrite(Query original) throws IOException
From source file:org.sindice.siren.search.TestSirenWildcardQuery.java
License:Open Source License
/** * Tests if a SirenWildcardQuery that has only a trailing * in the term is * rewritten to a single SirenPrefixQuery. The boost and rewriteMethod should be * preserved.//from ww w .j a v a2s .c om */ public void testPrefixTerm() throws IOException { final Directory indexStore = this.getIndexStore("field", new String[] { "prefix", "prefixx" }); final IndexSearcher searcher = new IndexSearcher(indexStore, true); final SirenMultiTermQuery wq = new SirenWildcardQuery(new Term("field", "prefix*")); this.assertMatches(searcher, wq, 2); final SirenMultiTermQuery expected = new SirenPrefixQuery(new Term("field", "prefix")); wq.setRewriteMethod(SirenMultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); wq.setBoost(0.1F); expected.setRewriteMethod(wq.getRewriteMethod()); expected.setBoost(wq.getBoost()); assertEquals(searcher.rewrite(expected), searcher.rewrite(wq)); wq.setRewriteMethod(SirenMultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); wq.setBoost(0.3F); expected.setRewriteMethod(wq.getRewriteMethod()); expected.setBoost(wq.getBoost()); assertEquals(searcher.rewrite(expected), searcher.rewrite(wq)); wq.setRewriteMethod(SirenMultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); wq.setBoost(0.4F); expected.setRewriteMethod(wq.getRewriteMethod()); expected.setBoost(wq.getBoost()); assertEquals(searcher.rewrite(expected), searcher.rewrite(wq)); searcher.close(); indexStore.close(); }
From source file:org.tallison.lucene.search.concordance.charoffsets.SpansCrawler.java
License:Apache License
public static void crawl(SpanQuery query, Query filter, IndexSearcher searcher, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { query = (SpanQuery) query.rewrite(searcher.getIndexReader()); SpanWeight w = query.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f); if (filter == null) { for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; }/* ww w. j av a2 s . co m*/ boolean cont = visitLeafReader(ctx, spans, visitor); if (!cont) { break; } } } else { filter = searcher.rewrite(filter); Weight searcherWeight = searcher.createWeight(filter, ScoreMode.COMPLETE_NO_SCORES, 1.0f); for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Scorer leafReaderContextScorer = searcherWeight.scorer(ctx); if (leafReaderContextScorer == null) { continue; } //Can we tell from the scorer that there were no hits? //in <= 5.x we could stop here if the filter query had no hits. Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; } DocIdSetIterator filterItr = leafReaderContextScorer.iterator(); if (filterItr == null || filterItr.equals(DocIdSetIterator.empty())) { continue; } boolean cont = visitLeafReader(ctx, spans, filterItr, visitor); if (!cont) { break; } } } }
From source file:Search.SearchExecutor.java
License:Educational Community License
/** * Search user generated transcriptions. Results contain embedded images. Results are restricted to those the user usrID has * permissing to view the transcription// www. j a va 2s .c om * @param searchWord * @param language * @param order * @param paged * @param pageNumber * @param usrID * @return * @throws Exception */ public Stack<Transcription> transcriptionSearch(String searchWord, String language, int order, Boolean paged, int pageNumber, String usrID) throws Exception { Boolean wildcard = true; final int pageSize = 20; //Number of results per page, could be made a parm one day final int maxResults = 1000; //No matter what dont return more than this many results from Lucene. This is ok because result filtering occurs before this limitation is applied String returnStringArray = ""; //we dont currently worry about language filtering, but ENAP did, so we could do it if we wanted to if (language != null && language.length() > 1) { //searchWord=searchWord+" AND lang:"+language; } /**@TODO the location should be a param*/ IndexSearcher is = new IndexSearcher("/usr/indexTranscriptions"); QueryParser parser = new QueryParser("text", analyser); Sort newsort; Query query = parser.parse(searchWord); is.rewrite(parser.parse(searchWord)); QueryScorer queryScorer = new QueryScorer(query); ScoreDoc[] hits; //If the person wasnt logged in, give them only public comments. comment owner if (usrID.compareTo("") == 0) { usrID = "0"; } Query secQuery = parser.parse("security:private OR creator:" + usrID); //This will filter search results so only comments owned by the user and public comments will be returned QueryFilter secFilter = new QueryFilter(query); //If a sort was specified, use it, otherwise use the default sorting which is by hit quality if (order > 0) { //order=1 means sort by line number, first line of the text is first. //order=2 means inverse sort by line number, last line of the text is first. //Java, the Nanny language, doesn't want to let me use newsort even if I ensure its not a null pointer //So if were going to use a filter, set the filter to type 1, then check to see if it should be something else. try { hits = is.search(query, secFilter, maxResults).scoreDocs; } catch (org.apache.lucene.search.BooleanQuery.TooManyClauses e) { return null; } } else { try { newsort = new Sort("creator", false); hits = is.search(query, secFilter, maxResults, newsort).scoreDocs; } catch (org.apache.lucene.search.BooleanQuery.TooManyClauses e) { return null; } } //Start at the hit that belongs at the top of the page they requested. For page 2, that is 19 //Ensure we do not print more than pageNumber hits, or go beyond the end of the hit list String link = ""; int ctr = 1; Stack<Transcription> results = new Stack(); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, queryScorer); if (pageSize * (pageNumber - 1) < hits.length) { returnStringArray += "Your search for \"<b>" + searchWord + "</b>\" returned " + hits.length + " results.<br/>"; for (int i = pageSize * (pageNumber - 1); i < hits.length && i - (pageSize * (pageNumber - 1)) < pageSize; i++) { Document hitDoc = is.doc(hits[i].doc); field = hitDoc.getField("line"); Transcription t = new Transcription(Integer.parseInt(hitDoc.getField("id").stringValue())); results.add(t); String paragraph = field.stringValue(); String pageno = ""; String creator = hitDoc.getField("creator").stringValue(); user.User u = new User(Integer.parseInt(creator)); creator = "" + u.getLname() + " " + u.getFname(); if (isInteger(paragraph)) { field = hitDoc.getField("page"); pageno = field.stringValue(); if (pageno == null) { pageno = "hi null"; } if (paragraph == null) { paragraph = "hola null"; } } else { String folio = ""; Folio f = new Folio(Integer.parseInt(folio)); link = " <a href=transcriptionImageTest.jsp?p=" + folio + ">" + field.stringValue() + "(Archive:" + f.getArchive() + " Shelfmark:" + f.getCollectionName() + " page:" + folio + ")</a>"; } returnStringArray = returnStringArray + (ctr + ". " + link + "<br/>"); ctr++; } } else /*we dont have any results for the page/search they gave us*/ { returnStringArray = "No results to display."; } totalHits = hits.length; totalPages = hits.length / pageSize; if (hits.length % pageSize > 0) { totalPages++; } if (!wildcard) { return results; } else { return results; } }