List of usage examples for org.apache.lucene.search.highlight QueryScorer QueryScorer
public QueryScorer(Query query, String field)
From source file:aos.lucene.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);/* ww w .j av a 2s . c o m*/ } String filename = args[0]; String searchText = "term"; // QueryParser parser = new QueryParser(Version.LUCENE_46, // "f", // new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // SimpleHTMLFormatter formatter = // new SimpleHTMLFormatter("<span class=\"highlight\">", // "</span>"); // TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // .tokenStream("f", new StringReader(text)); // QueryScorer scorer = new QueryScorer(query, "f"); // Highlighter highlighter = new Highlighter(formatter, scorer); // highlighter.setTextFragmenter( // new SimpleSpanFragmenter(scorer)); // String result = // highlighter.getBestFragments(tokens, text, 3, "..."); // FileWriter writer = new FileWriter(filename); // writer.write("<html>"); // writer.write("<style>\n" + // ".highlight {\n" + // " background: yellow;\n" + // "}\n" + // "</style>"); // writer.write("<body>"); // writer.write(result); // writer.write("</body></html>"); // writer.close(); // }
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHighlighting() throws Exception { String text = "The quick brown fox jumps over the lazy dog"; TermQuery query = new TermQuery(new Term("field", "fox")); TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "field"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); assertEquals("The quick brown <B>fox</B> jumps over the lazy dog", highlighter.getBestFragment(tokenStream, text)); }
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);/* ww w.ja v a 2 s . com*/ String fragment = highlighter.getBestFragment(stream, title); LOGGER.info(fragment); } }
From source file:blackbelt.lucene.testHighlight.MainHighlight.java
License:Open Source License
public static void main(String[] args) throws ParseException, IOException { String keyWord = "hibernate"; String language = "en"; String text = "Hibernate is an object-relational mapping (ORM) library for the Java language," + "providing a framework for mapping an object-oriented domain model to a traditional relational" + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct " + "persistence-related database accesses with high-level object handling functions. " + "Hibernate is free software that is distributed under the GNU Lesser General Public License. " + "Hibernate's primary feature is mapping from Java classes to database tables " + "(and from Java data types to SQL data types). Hibernate also provides data query" + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve" + " the developer from manual result set handling and object conversion and keep the application" + " portable to all supported SQL databases with little performance overhead."; String result;/* w w w . j a va 2 s. co m*/ QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(keyWord); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85)); try { result = highlighter.getBestFragments(tokens, text, 4, "<BR/>..."); System.out.println(result); System.out.println("\n" + result.length()); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } result = "<html><body>" + result + "</body></html>"; File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html"); try { PrintWriter pw = new PrintWriter(file); pw.print(result); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.appeligo.search.actions.SearchResults.java
License:Apache License
public List<SearchResult> getSearchResults(int startIndex) { initializeStatics();// w w w .j a va 2 s. c o m hasMoreResults = false; try { IndexSearcher searcher = null; try { searcher = newIndexSearcher(); IndexReader reader = searcher.getIndexReader(); Query luceneQuery = generateLuceneQuery(searcher); luceneQuery = luceneQuery.rewrite(reader); Hits hits = searcher.search(luceneQuery); usingSuggestedQuery = false; suggestedQuery = null; if ((didYouMeanParser != null) && ((hits.length() < minimumHits) || (calcScore(searcher, getQuery()) < minimumScore))) { if (log.isDebugEnabled()) { log.debug("Need to suggest because either num hits " + hits.length() + " < " + minimumHits + "\n or top hit score " + (hits.length() > 0 ? hits.score(0) : "[NO HITS]") + " < " + minimumScore); } IndexSearcher compositeSearcher = new IndexSearcher(compositeIndexLocation); try { log.debug("calling suggest() with query=" + getQuery() + " and composite index from " + compositeIndexLocation); //Query didYouMean = didYouMeanParser.suggest(getQuery(), compositeSearcher.getIndexReader()); Query suggestedQueries[] = didYouMeanParser.getSuggestions(getQuery(), compositeSearcher.getIndexReader()); TreeSet<Suggestion> suggestions = new TreeSet<Suggestion>(); if (suggestedQueries != null) { for (int i = 0; i < suggestedQueries.length; i++) { log.debug("trying suggested query: " + suggestedQueries[i].toString(defaultField)); String suggestedQueryString = suggestedQueries[i].toString(defaultField); String constrainedQueryString = suggestedQueryString; if (constrainedQueryString.indexOf('"') < 0 && constrainedQueryString.indexOf('\'') < 0) { constrainedQueryString = "\"" + constrainedQueryString + "\"~5"; // proximity/distance query (within 5 words of each other) } Query suggestedLuceneQuery = generateLuceneQuery(constrainedQueryString, searcher); suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader); Hits suggestedHits = searcher.search(suggestedLuceneQuery); float score = calcScore(suggestedQueryString, suggestedHits); log.debug("========================================="); log.debug("SCORE = " + score); log.debug("========================================="); suggestions.add( new Suggestion(suggestedQueryString, suggestedLuceneQuery, suggestedHits, score, ((i == 0) ? didYouMeanParser.includesOriginal() : false))); log.debug("hits=" + suggestedHits.length() + ", score=" + score); } } Suggestion best = null; if (suggestions.size() > 0) { best = suggestions.last(); } if (best != null && !best.isOriginal()) { suggestedQuery = best.getQueryString(); if (suggestedQuery != null && suggestedQuery.indexOf('+') >= 0 && getQuery().indexOf('+') < 0) { suggestedQuery = suggestedQuery.replaceAll("\\+", ""); } if (hits.length() == 0) { if (best.getHits().length() > 0) { // Requery probably required because we added proximity before String suggestedQueryString = best.getQueryString(); luceneQuery = generateLuceneQuery(suggestedQueryString, searcher); luceneQuery = luceneQuery.rewrite(reader); hits = searcher.search(luceneQuery); //hits = best.getHits(); //luceneQuery = best.getLuceneQuery(); usingSuggestedQuery = true; } } log.debug("DidYouMeanParser suggested " + suggestedQuery); } else { if (best != null && best.isOriginal()) { log.debug("The suggestion was the original query after all"); } log.debug("DidYouMeanParser did not suggest anything"); } } finally { compositeSearcher.close(); } } /* if (hits.length() == 0 && suggestedQuery != null) { // If we didn't find anything at all, go ahead and show them what the suggested query // will give them Query suggestedLuceneQuery = generateLuceneQuery(suggestedQuery, searcher); suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader); Hits suggestedHits = searcher.search(suggestedLuceneQuery); if (suggestedHits.length() > 0) { hits = suggestedHits; luceneQuery = suggestedLuceneQuery; usingSuggestedQuery = true; } } */ totalHits = hits.length(); //Get the genere matches: try { BitSetFacetHitCounter facetHitCounter = new BitSetFacetHitCounter(); facetHitCounter.setSearcher(searcher); String baseQueryString = (isUsingSuggestedQuery() ? suggestedQuery : query); String quotedQueryString = baseQueryString; if (quotedQueryString.indexOf('"') == -1 && quotedQueryString.indexOf(' ') > -1) { quotedQueryString = "\"" + quotedQueryString + "\""; } facetHitCounter.setBaseQuery(luceneQuery, baseQueryString); List<HitCount> subQueries = new ArrayList<HitCount>(); for (Map.Entry<String, Query> entry : genreQueries.entrySet()) { subQueries.add( new HitCount(entry.getKey(), entry.getValue(), entry.getValue().toString(), 0)); } facetHitCounter.setSubQueries(subQueries); genreCounts = facetHitCounter.getFacetHitCounts(true); whatMatchedCounts = new ArrayList<HitCount>(); whatMatchedCounts .add(new HitCount("Title", getFieldQuery(baseQueryString, "programTitle", searcher), "programTitle:" + quotedQueryString, 0)); whatMatchedCounts.add( new HitCount("Episode Title", getFieldQuery(baseQueryString, "episodeTitle", searcher), "episodeTitle:" + quotedQueryString, 0)); whatMatchedCounts.add( new HitCount("Description", getFieldQuery(baseQueryString, "description", searcher), "description:" + quotedQueryString, 0)); whatMatchedCounts.add(new HitCount("Content", getFieldQuery(baseQueryString, "text", searcher), "text:" + quotedQueryString, 0)); whatMatchedCounts .add(new HitCount("Credits", getFieldQuery(baseQueryString, "credits", searcher), "credits:" + quotedQueryString, 0)); facetHitCounter.setSubQueries(whatMatchedCounts); whatMatchedCounts = facetHitCounter.getFacetHitCounts(true); //Program Count -- Not sure if there is a better way to do this. HashSet<String> programTitles = new HashSet<String>(); programCounts = new ArrayList<HitCount>(); for (int i = 0; i < hits.length() && programCounts.size() < 5; i++) { String title = hits.doc(i).get("programTitle"); if (!programTitles.contains(title)) { String queryTitle = title; queryTitle = QueryParser.escape(title); if (queryTitle.indexOf('"') > -1) { queryTitle.replace("\"", "\\\""); } if (queryTitle.indexOf(' ') > -1) { queryTitle = "\"" + queryTitle + "\""; } programCounts .add(new HitCount(title, getFieldQuery(queryTitle, "programTitle", searcher), "programTitle:" + queryTitle, 0)); programTitles.add(title); } } facetHitCounter.setSubQueries(programCounts); programCounts = facetHitCounter.getFacetHitCounts(false); } catch (Exception e) { e.printStackTrace(); } results = new ArrayList<SearchResult>(); programToSearchResult.clear(); Query userQuery = getContentQuery(query, searcher); userQuery.rewrite(reader); Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(userQuery, "text")); log.debug("#hits=" + hits.length()); EPGProvider epgProvider = DefaultEpg.getInstance(); boolean missingWebPaths = false; // We added this to the index midstream, so some do and some don't. // Next index rebuild, and they'll all have it. for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { if (hits.doc(i + startIndex).get("webPath") == null) { missingWebPaths = true; break; } } Program[] programs = null; if (missingWebPaths) { List<String> programIds = new ArrayList<String>(pageSize); for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { programIds.add(hits.doc(i + startIndex).get("programID")); } programs = DefaultEpg.getInstance().getProgramList(programIds); } for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) { addDocument(hits.doc(i + startIndex), hits.score(i + startIndex), epgProvider, highlighter, analyzer, null, null, (programs == null ? null : programs[i])); } if (results.size() + startIndex < hits.length()) { hasMoreResults = true; } } finally { if (searcher != null) { searcher.close(); } } } catch (IOException e) { log.error("Error searching index", e); } catch (ParseException e) { log.error("Error searching index", e); } return results; }
From source file:com.bugull.mongo.lucene.BuguHighlighter.java
License:Apache License
public String getResult(String fieldName, String fieldValue) throws Exception { BuguIndex index = BuguIndex.getInstance(); QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer()); Query query = parser.parse(keywords); TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue)); QueryScorer scorer = new QueryScorer(query, fieldName); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "..."); }
From source file:com.difference.historybook.index.lucene.LuceneIndex.java
License:Apache License
@Override public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug) throws IndexException { try {//from ww w.j av a 2 s . c om //TODO: make age be a component in the ranking? BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); queryBuilder.add(parser.parse(query), Occur.MUST); queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)), Occur.FILTER); Query baseQuery = queryBuilder.build(); FunctionQuery boostQuery = new FunctionQuery( new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000, new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F)); Query q = new CustomScoreQuery(baseQuery, boostQuery); QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(fragmenter); GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1) .setAllGroups(true).setIncludeMaxScore(true); TopGroups<?> groups = gsearch.search(searcher, q, offset, size); ArrayList<SearchResult> results = new ArrayList<>(size); for (int i = offset; i < offset + size && i < groups.groups.length; i++) { ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0]; Document luceneDoc = searcher.doc(scoreDoc.doc); IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc); TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH, reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), analyzer, highlighter.getMaxDocCharsToAnalyze() - 1); String[] snippets = highlighter.getBestFragments(tokenStream, luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3); String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n")); snippet = Jsoup.clean(snippet, Whitelist.simpleText()); String debugInfo = null; if (includeDebug) { Explanation explanation = searcher.explain(q, scoreDoc.doc); debugInfo = explanation.toString(); } results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(), doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score)); } SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset) .setMaxResultsRequested(size) .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0) .setResults(results); if (includeDebug) { wrapper.setDebugInfo(q.toString()); } return wrapper; } catch (IOException | ParseException | InvalidTokenOffsetsException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
From source file:com.flaptor.hounder.searcher.SnippetSearcher.java
License:Apache License
/** * Add snippets to the search-results. It adds a new field * SNIPPET_FIELDNAME_PREFIX_field with the snippet for each field *///w w w. ja v a2s.co m private void addSnippets(GroupedSearchResults res, org.apache.lucene.search.Query query) throws IOException { Formatter simpleHtmlFormatter = new SimpleHTMLFormatter(HIGHLIGHTER_PREFIX, HIGHLIGHTER_SUFFIX); for (int i = 0; i < snippetOfFields.length; i++) { String fieldToSnippet = snippetOfFields[i]; int snippetLength = snippetsLength[i]; QueryScorer scorer = new QueryScorer(query, fieldToSnippet); addSnippets(res, fieldToSnippet, snippetLength, scorer, simpleHtmlFormatter); } }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle, boolean searchInTags, boolean searchInText, boolean superFuzzy) { ArrayList<CardSnapshot> cardSnaps = new ArrayList(); try {/* w w w . java 2 s . c o m*/ ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>(); for (String indexName : indexNames) { IndexReader reader = IndexReader .open(FSDirectory.open(new File(indexDir + File.separator + indexName), new SimpleFSLockFactory(indexDir + File.separator + indexName))); IndexSearcher searcher = new IndexSearcher(reader); searchers.add(searcher); } BooleanQuery query = new BooleanQuery(); if (searchInTitle) { IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy); } if (searchInTags) { IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy); } if (searchInText) { IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy); IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy); } for (IndexSearcher searcher : searchers) { TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"), new StandardAnalyzer(Version.LUCENE_20.LUCENE_35)); QueryScorer scorer = new QueryScorer(query, "analyzedText"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5); String highlights = ""; for (String fragment : fragments) { highlights += fragment + "..."; } if (highlights.equals("")) { String text = doc.get("text"); if (text.length() > 100) { highlights += doc.get("text").substring(0, 100); } else { highlights += doc.get("text"); } } cardSnaps.add(new CardSnapshot(highlights, doc)); } searcher.getIndexReader().close(); searcher.close(); searcher = null; } } catch (Exception ex) { ex.printStackTrace(); } return cardSnaps; }
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * // w ww . j a v a 2 s .com * @param analyzer * @param query * @param content * @param result * @return * @throws IOException * @throws InvalidTokenOffsetsException */ private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException { if (content == null) { content = ""; } int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150; QueryScorer scorer = new QueryScorer(query, "content"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); // use an artificial delimiter for the token String termTag = "!!--["; String termTagEnd = "]--!!"; SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3); if (ArrayUtils.isEmpty(fragments)) { if (SearchObjectType.blob == result.type) { return ""; } // clip commit message String fragment = content; if (fragment.length() > fragmentLength) { fragment = fragment.substring(0, fragmentLength) + "..."; } return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>"; } // make sure we have unique fragments Set<String> uniqueFragments = new LinkedHashSet<String>(); for (String fragment : fragments) { uniqueFragments.add(fragment); } fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]); StringBuilder sb = new StringBuilder(); for (int i = 0, len = fragments.length; i < len; i++) { String fragment = fragments[i]; String tag = "<pre class=\"text\">"; // resurrect the raw fragment from removing the artificial delimiters String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); // determine position of the raw fragment in the content int pos = content.indexOf(raw); // restore complete first line of fragment int c = pos; while (c > 0) { c--; if (content.charAt(c) == '\n') { break; } } if (c > 0) { // inject leading chunk of first fragment line fragment = content.substring(c + 1, pos) + fragment; } if (SearchObjectType.blob == result.type) { // count lines as offset into the content for this fragment int line = Math.max(1, StringUtils.countLines(content.substring(0, pos))); // create fragment tag with line number and language String lang = ""; String ext = StringUtils.getFileExtension(result.path).toLowerCase(); if (!StringUtils.isEmpty(ext)) { // maintain leading space! lang = " lang-" + ext; } tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang); } sb.append(tag); // replace the artificial delimiter with html tags String html = StringUtils.escapeForHtml(fragment, false); html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>"); sb.append(html); sb.append("</pre>"); if (i < len - 1) { sb.append("<span class=\"ellipses\">...</span><br/>"); } } return sb.toString(); }