List of usage examples for org.apache.lucene.search.highlight SimpleHTMLFormatter SimpleHTMLFormatter
public SimpleHTMLFormatter(String preTag, String postTag)
From source file:aos.lucene.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);//from w w w . j a va 2 s . co m } String filename = args[0]; String searchText = "term"; // QueryParser parser = new QueryParser(Version.LUCENE_46, // "f", // new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // SimpleHTMLFormatter formatter = // new SimpleHTMLFormatter("<span class=\"highlight\">", // "</span>"); // TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // .tokenStream("f", new StringReader(text)); // QueryScorer scorer = new QueryScorer(query, "f"); // Highlighter highlighter = new Highlighter(formatter, scorer); // highlighter.setTextFragmenter( // new SimpleSpanFragmenter(scorer)); // String result = // highlighter.getBestFragments(tokens, text, 3, "..."); // FileWriter writer = new FileWriter(filename); // writer.write("<html>"); // writer.write("<style>\n" + // ".highlight {\n" + // " background: yellow;\n" + // "}\n" + // "</style>"); // writer.write("<body>"); // writer.write(result); // writer.write("</body></html>"); // writer.close(); // }
From source file:apm.common.core.DaoImpl.java
License:Open Source License
/** * /*from w w w.ja v a2 s . c o m*/ * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:blackbelt.lucene.testHighlight.MainHighlight.java
License:Open Source License
public static void main(String[] args) throws ParseException, IOException { String keyWord = "hibernate"; String language = "en"; String text = "Hibernate is an object-relational mapping (ORM) library for the Java language," + "providing a framework for mapping an object-oriented domain model to a traditional relational" + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct " + "persistence-related database accesses with high-level object handling functions. " + "Hibernate is free software that is distributed under the GNU Lesser General Public License. " + "Hibernate's primary feature is mapping from Java classes to database tables " + "(and from Java data types to SQL data types). Hibernate also provides data query" + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve" + " the developer from manual result set handling and object conversion and keep the application" + " portable to all supported SQL databases with little performance overhead."; String result;/*from w w w .j a v a 2 s.c o m*/ QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(keyWord); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85)); try { result = highlighter.getBestFragments(tokens, text, 4, "<BR/>..."); System.out.println(result); System.out.println("\n" + result.length()); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } result = "<html><body>" + result + "</body></html>"; File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html"); try { PrintWriter pw = new PrintWriter(file); pw.print(result); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:ci6226.loadIndex.java
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected./*from ww w . j a va 2 s . c o m*/ * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, Analyzer analyzer) throws IOException, InvalidTokenOffsetsException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("review_id"); if (path != null) { System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String title = doc.get("business_id"); if (title != null) { String text = doc.get("text"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits[i].doc, "text", doc, analyzer);//TokenSources.getAnyTokenStream(searcher.getIndexReader() ,"text", analyzer); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(ANSI_RED, ANSI_RESET); // SimpleFragmenter fragmenter = new SimpleFragmenter(80); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4); System.out.print("Snippet=\t"); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { System.out.println((frag[j].toString())); } } //System.out.print("\n"); System.out.println("Full Review=\t" + doc.get("text") + "\nBusinessID=\t" + title); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); int cpage = start / hitsPerPage; System.out.println(String.format("Current page=%d,max page=%d", cpage + 1, 1 + numTotalHits / hitsPerPage)); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java
License:Apache License
@Override public SearchResult search(SearchRequest sResquest) throws EsearchException { try {//from w w w.j a va 2 s .c o m esearchSystem.flushEvents(2000); } catch (EsearchException e) { LOGGER.error("Esearch flush events error. \n{}", e); } String queryString = sResquest.getQuery(); String queryField = sResquest.getField(); LOGGER.info("The search request coming: queryField:{},queryString:{}", queryField, queryString); Analyzer analyzer = esearchSystem.getAnalyzer(); QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryField, analyzer); SearchResult result = new SearchResult(); List<EsearchMultiReader<R>> readers = null; MultiReader multiReader = null; IndexSearcher searcher = null; try { Query query = null; if (Strings.isNullOrEmpty(queryString)) { query = new MatchAllDocsQuery(); } else { query = queryParser.parse(queryString); } readers = esearchSystem.getIndexReaders(); multiReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false); searcher = new IndexSearcher(multiReader); long start = System.currentTimeMillis(); TopDocs docs = searcher.search(query, null, sResquest.getSize()); long end = System.currentTimeMillis(); result.setTime(end - start); result.setTotalDocs(multiReader.numDocs()); result.setTotalHits(docs.totalHits); LOGGER.info("Got {} hits. Cost:{} ms", docs.totalHits, end - start); if (sResquest.getSearchType() == SearchRequest.SearchType.COUNT) { return result; } ScoreDoc[] scoreDocs = docs.scoreDocs; ArrayList<SearchHit> hitList = new ArrayList<SearchHit>(scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { SearchHit hit = new SearchHit(); hit.setScore(scoreDoc.score); int docID = scoreDoc.doc; Document doc = multiReader.document(docID); String content = doc.get(queryField); Scorer qs = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hl\">", "</span>"); Highlighter hl = new Highlighter(formatter, qs); String[] fragments = hl.getBestFragments(analyzer, queryField, content, 1); Map<String, String[]> fields = convert(doc, sResquest.getSearchType()); fields.put("fragment", fragments); hit.setFields(fields); hitList.add(hit); } result.setHits(hitList.toArray(new SearchHit[hitList.size()])); return result; } catch (Exception e) { LOGGER.error(e.getMessage(), e); throw new EsearchException(e.getMessage(), e); } finally { if (multiReader != null) { try { multiReader.close(); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } } esearchSystem.returnIndexReaders(readers); } }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {//from ww w .ja v a 2 s.c o m try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }
From source file:com.aistor.common.persistence.BaseDaoImpl.java
License:Open Source License
/** * // w w w . j a v a2s . c o m * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String desciption = highlighter.getBestFragment(analyzer, field, text); if (desciption != null) { Reflections.invokeSetter(entity, fields[0], desciption); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:com.bluedragon.search.search.QueryRun.java
License:Open Source License
private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount, int recordsSearched) throws CorruptIndexException, Exception { DocumentWrap document = new DocumentWrap(searcher.doc(docid)); queryResultData.addRow(1);/*from w w w .j av a 2 s. co m*/ queryResultData.setCurrentRow(queryResultData.getSize()); // Add in the standard columns that we know we have for every search queryResultData.setCell(1, new cfStringData(document.getId())); queryResultData.setCell(2, new cfStringData(document.getName())); queryResultData.setCell(3, new cfNumberData(score)); queryResultData.setCell(4, new cfNumberData(searchCount)); queryResultData.setCell(5, new cfNumberData(recordsSearched)); queryResultData.setCell(6, new cfNumberData(rank + 1)); String uC = queryAttributes.getUniqueColumn(); // Now we do the custom ones List<IndexableField> fields = document.getDocument().getFields(); Iterator<IndexableField> it = fields.iterator(); while (it.hasNext()) { IndexableField fieldable = it.next(); String fieldName = fieldable.name().toLowerCase(); // Check for the unique if (uniqueSet != null && fieldName.equals(uC)) { if (uniqueSet.contains(fieldable.stringValue())) { queryResultData.deleteRow(queryResultData.getSize()); return; } else uniqueSet.add(fieldable.stringValue()); } // Check to see if we have this column if (fieldName.equals("contents") && !queryAttributes.getContentFlag()) continue; if (!activeColumns.containsKey(fieldName)) { int newcolumn = queryResultData.addColumnData(fieldable.name().toUpperCase(), cfArrayData.createArray(1), null); activeColumns.put(fieldName, newcolumn); } int column = activeColumns.get(fieldName); if (column <= 6) continue; queryResultData.setCell(column, new cfStringData(fieldable.stringValue())); } // Do the context stuff if enable if (queryAttributes.getContextPassages() > 0) { Scorer scorer = new QueryScorer(queryAttributes.getQuery()); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(queryAttributes.getContextHighlightStart(), queryAttributes.getContextHighlightEnd()); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(queryAttributes.getContextBytes()); highlighter.setTextFragmenter(fragmenter); String nextContext = ""; String contents = document.getAttribute(DocumentWrap.CONTENTS); if (contents != null) { TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream(DocumentWrap.CONTENTS, new StringReader(contents)); String[] fragments = null; try { fragments = highlighter.getBestFragments(tokenStream, contents, queryAttributes.getContextPassages()); if (fragments.length == 1) { nextContext = fragments[0] + "..."; } else { StringBuilder context = new StringBuilder(); for (int f = 0; f < fragments.length; f++) { context.append("..."); context.append(fragments[f]); } context.append("..."); nextContext = context.toString(); } } catch (Exception e) { } // Add in the context if (!activeColumns.containsKey("context")) { int newcolumn = queryResultData.addColumnData("CONTEXT", cfArrayData.createArray(1), null); activeColumns.put("context", newcolumn); } queryResultData.setCell(activeColumns.get("context"), new cfStringData(nextContext)); } } }
From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java
License:Open Source License
public String highlight(String strToHighlight, String fieldName, Query luceneQuery) { String highlightedText;/* w ww . j a v a2s. c o m*/ Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH); try { Directory directory = FSDirectory.open(indexDir); IndexReader indexReader = DirectoryReader.open(directory); Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader); QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery); SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>"); Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer); Fragmenter luceneFragmenter; // Si la chaine highlighter est sup 250 carac if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) { // Cration de best fragments de 100 carac chaque luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT); } else { // Toute la chaine est highlight luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE); } luceneHighlighter.setTextFragmenter(luceneFragmenter); TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight)); String fragment = null; if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) { fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT, FRAGMENT_SEP); } else { fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight); } if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) { fragment = strToHighlight; } indexReader.close(); directory.close(); highlightedText = fragment; } catch (IOException e) { throw new RuntimeException(e); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } return highlightedText; }
From source file:com.duroty.application.bookmark.manager.BookmarkManager.java
License:Open Source License
/** * DOCUMENT ME!/*from w w w .j a v a2 s. c om*/ * * @param repositoryName DOCUMENT ME! * @param token DOCUMENT ME! * @param page DOCUMENT ME! * @param messagesByPage DOCUMENT ME! * @param order DOCUMENT ME! * @param orderType DOCUMENT ME! * * @return DOCUMENT ME! * * @throws BookmarkException DOCUMENT ME! * @throws SearchException DOCUMENT ME! */ public SearchObj search(String repositoryName, String token, int page, int messagesByPage, int order, String orderType, boolean isNotebook) throws BookmarkException { String lucenePath = ""; if (!defaultLucenePath.endsWith(File.separator)) { lucenePath = defaultLucenePath + File.separator + repositoryName + File.separator + Constants.BOOKMARK_LUCENE_BOOKMARK; } else { lucenePath = defaultLucenePath + repositoryName + File.separator + Constants.BOOKMARK_LUCENE_BOOKMARK; } Searcher searcher = null; SearchObj searchObj = new SearchObj(); Highlighter highlighter = null; try { searcher = BookmarkIndexer.getSearcher(lucenePath); Query query = null; Hits hits = null; if (StringUtils.isBlank(token)) { if (isNotebook) { query = SimpleQueryParser.parse("notebook:true", new KeywordAnalyzer()); } else { query = new MatchAllDocsQuery(); } hits = searcher.search(query, new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField(Field_insert_date, SortField.STRING, true) })); } else { query = SimpleQueryParser.parse(token, analyzer); StringBuffer buffer = new StringBuffer(); if (isNotebook) { buffer.append("(" + query.toString() + ") AND "); QueryParser parser = new QueryParser(Field_notebook, new KeywordAnalyzer()); parser.setDefaultOperator(Operator.AND); Query aux = parser.parse(String.valueOf(true)); buffer.append("(" + aux.toString() + ") "); } if (buffer.length() > 0) { QueryParser parser = new QueryParser("", new WhitespaceAnalyzer()); query = parser.parse(buffer.toString()); } hits = searcher.search(query); } Date searchStart = new Date(); Date searchEnd = new Date(); //time in seconds double time = ((double) (searchEnd.getTime() - searchStart.getTime())) / (double) 1000; int hitsLength = hits.length(); if (hitsLength <= 0) { return null; } int start = page * messagesByPage; int end = start + messagesByPage; if (end > 0) { end = Math.min(hitsLength, end); } else { end = hitsLength; } if (start > end) { throw new SearchException("Search index of bound. start > end"); } Vector bookmarks = new Vector(); for (int j = start; j < end; j++) { Document doc = hits.doc(j); if (doc != null) { LuceneBookmark luceneBookmark = new LuceneBookmark(doc); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); BookmarkObj bookmarkObj = new BookmarkObj(); bookmarkObj.setCacheDate(luceneBookmark.getCacheDate()); bookmarkObj.setComments(luceneBookmark.getComments()); bookmarkObj.setContents(luceneBookmark.getCotents()); bookmarkObj.setDepth(luceneBookmark.getDepth()); bookmarkObj.setFlagged(luceneBookmark.isFlagged()); bookmarkObj.setIdint(luceneBookmark.getIdint()); bookmarkObj.setInsertDate(luceneBookmark.getInsertDate()); bookmarkObj.setKeywords(luceneBookmark.getKeywords()); bookmarkObj.setNotebook(luceneBookmark.isNotebook()); bookmarkObj.setParent(Long.parseLong(luceneBookmark.getParent())); bookmarkObj.setTitle(luceneBookmark.getTitle()); bookmarkObj.setTitleHighlight(luceneBookmark.getTitle()); bookmarkObj.setUrl(luceneBookmark.getUrl()); String contents = luceneBookmark.getCotents(); String hcontents = null; if ((contents != null) && (!contents.trim().equals(""))) { contents = contents.replaceAll("\\s+", " "); TokenStream tokenStream = analyzer.tokenStream(Field_contents, new StringReader(contents)); hcontents = highlighter.getBestFragment(tokenStream, contents); if (hcontents != null) { contents = hcontents; } else { contents = null; } } bookmarkObj.setContentsHighlight(contents); String title = luceneBookmark.getTitle(); String htitle = null; if ((title != null) && (!title.trim().equals(""))) { title = title.replaceAll("\\s+", " "); TokenStream tokenStream = analyzer.tokenStream(Field_title, new StringReader(title)); htitle = highlighter.getBestFragment(tokenStream, title); if (htitle != null) { title = htitle; } } bookmarkObj.setTitleHighlight(title); bookmarks.addElement(bookmarkObj); } } searchObj.setHits(hitsLength); searchObj.setTime(time); searchObj.setBookmarks(bookmarks); } catch (Exception ex) { throw new SearchException(ex); } finally { } return searchObj; }