List of usage examples for org.apache.lucene.search.highlight QueryScorer QueryScorer
public QueryScorer(WeightedSpanTerm[] weightedTerms)
From source file:apm.common.core.DaoImpl.java
License:Open Source License
/** * //from w ww .j a v a 2s .c o m * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:ca.dracode.ais.indexer.FileSearcher.java
License:Open Source License
/** * Takes a list of Documents and highlights information relevant to a given Query * @param docs The documents to highlight * @param qry The query used to highlight the documents * @param type The type of the search, one of QUERY_BOOLEAN, * which just notes the page on which the term exists or QUERY_STANDARD, * which gives highlighted fragments and the page on which they exist. * @param term The term that created the query * @param maxResults The maximum number of results that will be returned * @return A SearchResult containing the results sorted by relevance and page */// w w w. j a v a 2 s . c o m private SearchResult getHighlightedResults(List<Document> docs, Query qry, int type, String term, int maxResults) { try { int numResults = 0; LinkedHashMap<String, LinkedHashMap<Integer, List<String>>> results = new LinkedHashMap<String, LinkedHashMap<Integer, List<String>>>(); for (int i = 0; i < docs.size() && numResults < maxResults; i++) { Document d = docs.get(i); int docPage = Integer.parseInt(d.get("page")); String name = d.get("path"); LinkedHashMap<Integer, List<String>> docResult = results.get(name); if (docResult == null) { docResult = new LinkedHashMap<Integer, List<String>>(); results.put(name, docResult); } if (type != FileSearcher.QUERY_BOOLEAN) { String contents = d.get("text"); Highlighter highlighter = new Highlighter(new QueryScorer(qry)); String[] frag = null; try { frag = highlighter.getBestFragments(new SimpleAnalyzer(Version.LUCENE_47), "text", contents, maxResults - numResults); numResults += frag.length; } catch (IOException e) { Log.e(TAG, "Error while reading index", e); } catch (InvalidTokenOffsetsException e) { Log.e(TAG, "Error while highlighting", e); } if (frag != null) { Log.i(TAG, "Frags: " + frag.length + " " + frag + " " + frag[0]); } ArrayList<String> tmpList = new ArrayList<String>( Arrays.asList(frag != null ? frag : new String[0])); Log.i(TAG, "list " + tmpList.getClass().getName()); docResult.put(docPage, tmpList); } else { ArrayList<String> tmp = new ArrayList<String>(); tmp.add(term); docResult.put(docPage, tmp); } } Log.i(TAG, "" + results.size()); return new SearchResult(results); } catch (Exception e) { Log.e("TAG", "Error while Highlighting", e); return null; } }
From source file:ca.uhn.fhir.jpa.dao.FhirSearchDao.java
License:Apache License
@Override public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) { Validate.notBlank(theContext, "theContext must be provided"); Validate.notBlank(theSearchParam, "theSearchParam must be provided"); Validate.notBlank(theText, "theSearchParam must be provided"); long start = System.currentTimeMillis(); String[] contextParts = StringUtils.split(theContext, '/'); if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) { throw new InvalidRequestException("Invalid context: " + theContext); }/* www. j a va 2 s .c o m*/ IdDt contextId = new IdDt(contextParts[0], contextParts[1]); Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextId, myEntityManager); FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); //@formatter:off Query textQuery = qb.phrase().withSlop(2).onField("myContentText").boostedTo(4.0f) .andField("myContentTextEdgeNGram").boostedTo(2.0f).andField("myContentTextNGram").boostedTo(1.0f) .andField("myContentTextPhonetic").boostedTo(0.5f).sentence(theText.toLowerCase()).createQuery(); Query query = qb.bool() .must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery()) .must(textQuery).createQuery(); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class); ftq.setProjection("myContentText"); ftq.setMaxResults(20); List<?> resultList = ftq.getResultList(); List<Suggestion> suggestions = Lists.newArrayList(); for (Object next : resultList) { Object[] nextAsArray = (Object[]) next; String nextValue = (String) nextAsArray[0]; try { MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); formatter.setAnalyzer("myContentTextPhonetic"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); formatter.setAnalyzer("myContentTextNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); formatter.setFindPhrasesWith(); formatter.setAnalyzer("myContentTextEdgeNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentText"); // highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextEdgeNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextPhonetic"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); } catch (Exception e) { throw new InternalErrorException(e); } } Collections.sort(suggestions); Set<String> terms = Sets.newHashSet(); for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext();) { String nextTerm = iter.next().getTerm().toLowerCase(); if (!terms.add(nextTerm)) { iter.remove(); } } long delay = System.currentTimeMillis() - start; ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[] { terms.size(), theText, delay }); return suggestions; }
From source file:ca.uhn.fhir.jpa.dao.FulltextSearchSvcImpl.java
License:Apache License
@Override public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) { Validate.notBlank(theContext, "theContext must be provided"); Validate.notBlank(theSearchParam, "theSearchParam must be provided"); Validate.notBlank(theText, "theSearchParam must be provided"); long start = System.currentTimeMillis(); String[] contextParts = StringUtils.split(theContext, '/'); if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) { throw new InvalidRequestException("Invalid context: " + theContext); }/* w ww . j a v a 2s . co m*/ Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextParts[0], contextParts[1], myForcedIdDao); FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); //@formatter:off Query textQuery = qb.phrase().withSlop(2).onField("myContentText").boostedTo(4.0f) .andField("myContentTextEdgeNGram").boostedTo(2.0f).andField("myContentTextNGram").boostedTo(1.0f) .andField("myContentTextPhonetic").boostedTo(0.5f).sentence(theText.toLowerCase()).createQuery(); Query query = qb.bool() .must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery()) .must(textQuery).createQuery(); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class); ftq.setProjection("myContentText"); ftq.setMaxResults(20); List<?> resultList = ftq.getResultList(); List<Suggestion> suggestions = Lists.newArrayList(); for (Object next : resultList) { Object[] nextAsArray = (Object[]) next; String nextValue = (String) nextAsArray[0]; try { MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); formatter.setAnalyzer("myContentTextPhonetic"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); formatter.setAnalyzer("myContentTextNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); formatter.setFindPhrasesWith(); formatter.setAnalyzer("myContentTextEdgeNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentText"); // highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextEdgeNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextPhonetic"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); } catch (Exception e) { throw new InternalErrorException(e); } } Collections.sort(suggestions); Set<String> terms = Sets.newHashSet(); for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext();) { String nextTerm = iter.next().getTerm().toLowerCase(); if (!terms.add(nextTerm)) { iter.remove(); } } long delay = System.currentTimeMillis() - start; ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[] { terms.size(), theText, delay }); return suggestions; }
From source file:calliope.search.AeseSearch.java
License:Open Source License
/** * Search the index for the given expression * @param expr the expression to be parsed * @param langCode the language of the expression and index * @param profile the hit profile (where to start from etc) * @return the result docs/*from ww w . j a va 2s .c o m*/ */ public static String searchIndex(String expr, String langCode, HitProfile profile) { StringBuilder sb = new StringBuilder(); try { Analyzer analyzer = AeseSearch.createAnalyzer(langCode); DirectoryReader reader = DirectoryReader.open(AeseSearch.index); if (reader != null) { IndexSearcher searcher = new IndexSearcher(reader); QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer); Query q = qp.parse(expr); TopDocs hits = searcher.search(q, AeseSearch.maxHits); ScoreDoc[] docs = hits.scoreDocs; for (int j = profile.from; j < profile.to && j < docs.length; j++) { Document doc = searcher.doc(docs[j].doc); String vid = doc.get(LuceneFields.VID); String docID = doc.get(LuceneFields.DOCID); Highlighter h = new Highlighter(new QueryScorer(q)); String text = getCorTexVersion(docID, vid); sb.append(formatDocID(docID)); sb.append(" "); sb.append(formatVersionID(vid)); sb.append(" "); String frag = h.getBestFragment(analyzer, "text", text); sb.append("<span class=\"found\">"); sb.append(frag); sb.append("</span>\n"); } profile.numHits = docs.length; } reader.close(); } catch (Exception e) { sb.append(e.getMessage()); } return sb.toString(); }
From source file:ch.admin.isb.hermes5.business.search.HighlighterRepository.java
License:Apache License
public HighlighterWrapper getHighlighter(Analyzer analyzer, IndexSearcher isearcher, Query query) { SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); return new HighlighterWrapper(highlighter, numberOfFragments.getIntegerValue(), isearcher, analyzer, trimstringsList());// w w w .j a v a 2 s . c om }
From source file:ci6226.eval_index_reader.java
public static void Searchit(IndexReader reader, IndexSearcher searcher, Analyzer _analyzer, String field, String[] _searchList, int _topn, PrintWriter writer) throws org.apache.lucene.queryparser.classic.ParseException, IOException, InvalidTokenOffsetsException { Analyzer analyzer = _analyzer;/*from www .j a va 2 s . c o m*/ QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); String[] testString = _searchList;//{"to","123","impressed","Geezer","geezer","semi-busy","\"eggs vegetable\"","gs veget","\"gs veget\""};//,"good","I","but","coffee"}; for (int j = 0; j < testString.length; j++) { String lstr = String.valueOf(j) + "," + testString[j]; Query query = parser.parse(testString[j]); System.out.println("Searching for: " + query.toString(field)); TopDocs topdocs = searcher.search(query, _topn); lstr += "," + topdocs.totalHits; ScoreDoc[] scoreDocs = topdocs.scoreDocs; SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query.rewrite(reader))); for (int i = 0; i < scoreDocs.length; i++) { int doc = scoreDocs[i].doc; Document document = searcher.doc(doc); // System.out.println("Snippet=" + document.get(field)); System.out.println(i); String text = document.get(field); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), doc, field, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); String line = ""; for (int m = 0; m < frag.length; m++) { if ((frag[m] != null) && (frag[m].getScore() > 0)) { System.out.println((frag[m].toString())); line = frag[m].toString(); line = line.replaceAll("\n", ""); line = line.replaceAll("\r", ""); line = line.replaceAll("\"", ""); line = line.replaceAll(",", " "); } } lstr += "," + line; lstr += "," + String.valueOf(scoreDocs[i].score); } writer.write(lstr + "\n"); System.out.println("Search for:" + testString[j] + " Total hits=" + scoreDocs.length); System.out.println("////////////////////////////////////////////////////"); } }
From source file:ci6226.loadIndex.java
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected./* w ww . ja va 2 s . co m*/ * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, Analyzer analyzer) throws IOException, InvalidTokenOffsetsException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("review_id"); if (path != null) { System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String title = doc.get("business_id"); if (title != null) { String text = doc.get("text"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits[i].doc, "text", doc, analyzer);//TokenSources.getAnyTokenStream(searcher.getIndexReader() ,"text", analyzer); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(ANSI_RED, ANSI_RESET); // SimpleFragmenter fragmenter = new SimpleFragmenter(80); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4); System.out.print("Snippet=\t"); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { System.out.println((frag[j].toString())); } } //System.out.print("\n"); System.out.println("Full Review=\t" + doc.get("text") + "\nBusinessID=\t" + title); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); int cpage = start / hitsPerPage; System.out.println(String.format("Current page=%d,max page=%d", cpage + 1, 1 + numTotalHits / hitsPerPage)); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java
License:Apache License
@Override public SearchResult search(SearchRequest sResquest) throws EsearchException { try {//from w w w .j a v a 2 s . c o m esearchSystem.flushEvents(2000); } catch (EsearchException e) { LOGGER.error("Esearch flush events error. \n{}", e); } String queryString = sResquest.getQuery(); String queryField = sResquest.getField(); LOGGER.info("The search request coming: queryField:{},queryString:{}", queryField, queryString); Analyzer analyzer = esearchSystem.getAnalyzer(); QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryField, analyzer); SearchResult result = new SearchResult(); List<EsearchMultiReader<R>> readers = null; MultiReader multiReader = null; IndexSearcher searcher = null; try { Query query = null; if (Strings.isNullOrEmpty(queryString)) { query = new MatchAllDocsQuery(); } else { query = queryParser.parse(queryString); } readers = esearchSystem.getIndexReaders(); multiReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false); searcher = new IndexSearcher(multiReader); long start = System.currentTimeMillis(); TopDocs docs = searcher.search(query, null, sResquest.getSize()); long end = System.currentTimeMillis(); result.setTime(end - start); result.setTotalDocs(multiReader.numDocs()); result.setTotalHits(docs.totalHits); LOGGER.info("Got {} hits. Cost:{} ms", docs.totalHits, end - start); if (sResquest.getSearchType() == SearchRequest.SearchType.COUNT) { return result; } ScoreDoc[] scoreDocs = docs.scoreDocs; ArrayList<SearchHit> hitList = new ArrayList<SearchHit>(scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { SearchHit hit = new SearchHit(); hit.setScore(scoreDoc.score); int docID = scoreDoc.doc; Document doc = multiReader.document(docID); String content = doc.get(queryField); Scorer qs = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hl\">", "</span>"); Highlighter hl = new Highlighter(formatter, qs); String[] fragments = hl.getBestFragments(analyzer, queryField, content, 1); Map<String, String[]> fields = convert(doc, sResquest.getSearchType()); fields.put("fragment", fragments); hit.setFields(fields); hitList.add(hit); } result.setHits(hitList.toArray(new SearchHit[hitList.size()])); return result; } catch (Exception e) { LOGGER.error(e.getMessage(), e); throw new EsearchException(e.getMessage(), e); } finally { if (multiReader != null) { try { multiReader.close(); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } } esearchSystem.returnIndexReaders(readers); } }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {/*from ww w . j ava2 s .c om*/ try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }