List of usage examples for org.apache.lucene.search.highlight SimpleSpanFragmenter SimpleSpanFragmenter
public SimpleSpanFragmenter(QueryScorer queryScorer)
From source file:aos.lucene.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);//w ww .ja v a 2 s.c om } String filename = args[0]; String searchText = "term"; // QueryParser parser = new QueryParser(Version.LUCENE_46, // "f", // new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // SimpleHTMLFormatter formatter = // new SimpleHTMLFormatter("<span class=\"highlight\">", // "</span>"); // TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // .tokenStream("f", new StringReader(text)); // QueryScorer scorer = new QueryScorer(query, "f"); // Highlighter highlighter = new Highlighter(formatter, scorer); // highlighter.setTextFragmenter( // new SimpleSpanFragmenter(scorer)); // String result = // highlighter.getBestFragments(tokens, text, 3, "..."); // FileWriter writer = new FileWriter(filename); // writer.write("<html>"); // writer.write("<style>\n" + // ".highlight {\n" + // " background: yellow;\n" + // "}\n" + // "</style>"); // writer.write("<body>"); // writer.write(result); // writer.write("</body></html>"); // writer.close(); // }
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHighlighting() throws Exception { String text = "The quick brown fox jumps over the lazy dog"; TermQuery query = new TermQuery(new Term("field", "fox")); TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "field"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); assertEquals("The quick brown <B>fox</B> jumps over the lazy dog", highlighter.getBestFragment(tokenStream, text)); }
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);//from w w w. j a v a 2 s . c o m String fragment = highlighter.getBestFragment(stream, title); LOGGER.info(fragment); } }
From source file:com.aurel.track.lucene.search.LuceneSearcher.java
License:Open Source License
private static int[] getQueryResults(Query query, String userQueryString, String preprocessedQueryString, Map<Integer, String> highlightedTextMap) { int[] hitIDs = new int[0]; IndexSearcher indexSearcher = null;/*from ww w . j a va2 s . c o m*/ try { long start = 0; if (LOGGER.isDebugEnabled()) { start = new Date().getTime(); } indexSearcher = getIndexSearcher(LuceneUtil.INDEXES.WORKITEM_INDEX); if (indexSearcher == null) { return hitIDs; } ScoreDoc[] scoreDocs; try { TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(MAXIMAL_HITS); indexSearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (IOException e) { LOGGER.warn("Getting the workitem search results failed with failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } if (LOGGER.isDebugEnabled()) { long end = new Date().getTime(); LOGGER.debug("Found " + scoreDocs.length + " document(s) (in " + (end - start) + " milliseconds) that matched the user query '" + userQueryString + "' the preprocessed query '" + preprocessedQueryString + "' and the query.toString() '" + query.toString() + "'"); } QueryScorer queryScorer = new QueryScorer(query/*, LuceneUtil.HIGHLIGHTER_FIELD*/); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); // Set the best scorer fragments highlighter.setTextFragmenter(fragmenter); // Set fragment to highlight hitIDs = new int[scoreDocs.length]; for (int i = 0; i < scoreDocs.length; i++) { int docID = scoreDocs[i].doc; Document doc = null; try { doc = indexSearcher.doc(docID); } catch (IOException e) { LOGGER.error("Getting the workitem documents failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } if (doc != null) { Integer itemID = Integer.valueOf(doc.get(LuceneUtil.getFieldName(SystemFields.ISSUENO))); if (itemID != null) { hitIDs[i] = itemID.intValue(); if (highlightedTextMap != null) { String highligherFieldValue = doc.get(LuceneUtil.HIGHLIGHTER_FIELD); TokenStream tokenStream = null; try { tokenStream = TokenSources.getTokenStream(LuceneUtil.HIGHLIGHTER_FIELD, null, highligherFieldValue, LuceneUtil.getAnalyzer(), -1); } catch (Exception ex) { LOGGER.debug(ex.getMessage()); } if (tokenStream != null) { String fragment = highlighter.getBestFragment(tokenStream, highligherFieldValue); if (fragment != null) { highlightedTextMap.put(itemID, fragment); } } } } } } return hitIDs; } catch (BooleanQuery.TooManyClauses e) { LOGGER.error("Searching the query resulted in too many clauses. Try to narrow the query results. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); throw e; } catch (Exception e) { LOGGER.error("Searching the workitems failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } finally { closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, "workItem"); } }
From source file:com.bugull.mongo.lucene.BuguHighlighter.java
License:Apache License
public String getResult(String fieldName, String fieldValue) throws Exception { BuguIndex index = BuguIndex.getInstance(); QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer()); Query query = parser.parse(keywords); TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue)); QueryScorer scorer = new QueryScorer(query, fieldName); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "..."); }
From source file:com.difference.historybook.index.lucene.LuceneIndex.java
License:Apache License
@Override public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug) throws IndexException { try {// ww w . j a v a 2 s. c om //TODO: make age be a component in the ranking? BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); queryBuilder.add(parser.parse(query), Occur.MUST); queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)), Occur.FILTER); Query baseQuery = queryBuilder.build(); FunctionQuery boostQuery = new FunctionQuery( new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000, new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F)); Query q = new CustomScoreQuery(baseQuery, boostQuery); QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(fragmenter); GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1) .setAllGroups(true).setIncludeMaxScore(true); TopGroups<?> groups = gsearch.search(searcher, q, offset, size); ArrayList<SearchResult> results = new ArrayList<>(size); for (int i = offset; i < offset + size && i < groups.groups.length; i++) { ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0]; Document luceneDoc = searcher.doc(scoreDoc.doc); IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc); TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH, reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), analyzer, highlighter.getMaxDocCharsToAnalyze() - 1); String[] snippets = highlighter.getBestFragments(tokenStream, luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3); String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n")); snippet = Jsoup.clean(snippet, Whitelist.simpleText()); String debugInfo = null; if (includeDebug) { Explanation explanation = searcher.explain(q, scoreDoc.doc); debugInfo = explanation.toString(); } results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(), doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score)); } SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset) .setMaxResultsRequested(size) .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0) .setResults(results); if (includeDebug) { wrapper.setDebugInfo(q.toString()); } return wrapper; } catch (IOException | ParseException | InvalidTokenOffsetsException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java
License:Open Source License
private Highlighter addStringHighlighter(Query query) { QueryScorer scorer = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); return highlighter; }
From source file:com.leavesfly.lia.tool.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);/*from ww w .ja va 2 s . c o m*/ } String filename = args[0]; String searchText = "term"; // #1 QueryParser parser = new QueryParser(Version.LUCENE_30, // #1 "f", // #1 new StandardAnalyzer(Version.LUCENE_30));// #1 Query query = parser.parse(searchText); // #1 SimpleHTMLFormatter formatter = // #2 new SimpleHTMLFormatter("<span class=\"highlight\">", // #2 "</span>"); // #2 TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30) // #3 .tokenStream("f", new StringReader(text)); // #3 QueryScorer scorer = new QueryScorer(query, "f"); // #4 Highlighter highlighter = new Highlighter(formatter, scorer); // #5 highlighter.setTextFragmenter( // #6 new SimpleSpanFragmenter(scorer)); // #6 String result = // #7 highlighter.getBestFragments(tokens, text, 3, "..."); // #7 FileWriter writer = new FileWriter(filename); // #8 writer.write("<html>"); // #8 writer.write("<style>\n" + // #8 ".highlight {\n" + // #8 " background: yellow;\n" + // #8 "}\n" + // #8 "</style>"); // #8 writer.write("<body>"); // #8 writer.write(result); // #8 writer.write("</body></html>"); // #8 writer.close(); // #8 }
From source file:com.leavesfly.lia.tool.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);//from www .jav a 2s . c o m String fragment = highlighter.getBestFragment(stream, title); System.out.println(fragment); } }
From source file:com.mathworks.xzheng.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);/*from w w w. ja v a 2s . c om*/ } String filename = args[0]; String searchText = "term"; // #1 QueryParser parser = new QueryParser(Version.LUCENE_46, // #1 "f", // #1 new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // #1 SimpleHTMLFormatter formatter = // #2 new SimpleHTMLFormatter("<span class=\"highlight\">", // #2 "</span>"); // #2 TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // #3 .tokenStream("f", new StringReader(text)); // #3 QueryScorer scorer = new QueryScorer(query, "f"); // #4 Highlighter highlighter = new Highlighter(formatter, scorer); // #5 highlighter.setTextFragmenter( // #6 new SimpleSpanFragmenter(scorer)); // #6 String result = // #7 highlighter.getBestFragments(tokens, text, 3, "..."); // #7 FileWriter writer = new FileWriter(filename); // #8 writer.write("<html>"); // #8 writer.write("<style>\n" + // #8 ".highlight {\n" + // #8 " background: yellow;\n" + // #8 "}\n" + // #8 "</style>"); // #8 writer.write("<body>"); // #8 writer.write(result); // #8 writer.write("</body></html>"); // #8 writer.close(); // #8 }