List of usage examples for org.apache.lucene.search.highlight SimpleHTMLEncoder SimpleHTMLEncoder
public SimpleHTMLEncoder()
From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java
License:Apache License
/** * Searches pages using a particular combination of flags. * * @param query The query to perform in Lucene query language * @param flags A set of flags//from w w w. ja v a 2s .c om * @return A Collection of SearchResult instances * @throws ProviderException if there is a problem with the backend */ public Collection findPages(String query, int flags) throws ProviderException { Searcher searcher = null; ArrayList<SearchResult> list = null; Highlighter highlighter = null; try { String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; QueryParser qp = new MultiFieldQueryParser(queryfields, getLuceneAnalyzer()); //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); Query luceneQuery = qp.parse(query); if ((flags & FLAG_CONTEXTS) != 0) { highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery)); } try { searcher = new IndexSearcher(m_luceneDirectory); } catch (Exception ex) { log.info("Lucene not yet ready; indexing not started", ex); return null; } Hits hits = searcher.search(luceneQuery); list = new ArrayList<SearchResult>(hits.length()); for (int curr = 0; curr < hits.length(); curr++) { Document doc = hits.doc(curr); String pageName = doc.get(LUCENE_ID); WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); if (page != null) { if (page instanceof Attachment) { // Currently attachments don't look nice on the search-results page // When the search-results are cleaned up this can be enabled again. } int score = (int) (hits.score(curr) * 100); // Get highlighted search contexts String text = doc.get(LUCENE_PAGE_CONTENTS); String[] fragments = new String[0]; if (text != null && highlighter != null) { TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); } SearchResult result = new SearchResultImpl(page, score, fragments); list.add(result); } else { log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); pageRemoved(new WikiPage(m_engine, pageName)); } } } catch (IOException e) { log.error("Failed during lucene search", e); } catch (InstantiationException e) { log.error("Unable to get a Lucene analyzer", e); } catch (IllegalAccessException e) { log.error("Unable to get a Lucene analyzer", e); } catch (ClassNotFoundException e) { log.error("Specified Lucene analyzer does not exist", e); } catch (ParseException e) { log.info("Broken query; cannot parse", e); throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage()); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { } } } return list; }
From source file:com.meltmedia.cadmium.search.SearchService.java
License:Apache License
private Map<String, Object> buildSearchResults(final String query, final String path) throws Exception { logger.info("Running search for [{}]", query); final Map<String, Object> resultMap = new LinkedHashMap<String, Object>(); new SearchTemplate(provider) { public void doSearch(IndexSearcher index) throws IOException, ParseException { QueryParser parser = createParser(getAnalyzer()); resultMap.put("number-hits", 0); List<Map<String, Object>> resultList = new ArrayList<Map<String, Object>>(); resultMap.put("results", resultList); if (index != null && parser != null) { String literalQuery = query.replaceAll(ALLOWED_CHARS_PATTERN, "\\\\$1"); Query query1 = parser.parse(literalQuery); if (StringUtils.isNotBlank(path)) { Query pathPrefix = new PrefixQuery(new Term("path", path)); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(pathPrefix, Occur.MUST); boolQuery.add(query1, Occur.MUST); query1 = boolQuery;/*from w w w. ja v a 2s . co m*/ } TopDocs results = index.search(query1, null, 100000); QueryScorer scorer = new QueryScorer(query1); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), scorer); logger.info("Search returned {} hits.", results.totalHits); resultMap.put("number-hits", results.totalHits); for (ScoreDoc doc : results.scoreDocs) { Document document = index.doc(doc.doc); String content = document.get("content"); String title = document.get("title"); Map<String, Object> result = new LinkedHashMap<String, Object>(); String excerpt = ""; try { excerpt = highlighter.getBestFragments( parser.getAnalyzer().tokenStream(null, new StringReader(content)), content, 3, "..."); excerpt = fixExcerpt(excerpt); result.put("excerpt", excerpt); } catch (Exception e) { logger.debug("Failed to get search excerpt from content.", e); try { excerpt = highlighter.getBestFragments( parser.getAnalyzer().tokenStream(null, new StringReader(title)), title, 1, "..."); excerpt = fixExcerpt(excerpt); result.put("excerpt", excerpt); } catch (Exception e1) { logger.debug("Failed to get search excerpt from title.", e1); result.put("excerpt", ""); } } result.put("score", doc.score); result.put("title", title); result.put("path", document.get("path")); resultList.add(result); } } } }.search(); return resultMap; }
From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java
License:Apache License
@Override public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms) throws InvalidQuerySyntaxException { Analyzer analyzer = null;/*from w ww . j a v a2 s .c o m*/ // default QueryParser.escape(pattern) method does not support phrase queries pattern = QuerySyntaxUtil.escapeQueryPattern(pattern); if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) { return Collections.emptyList(); } logger.log(Level.FINE, "Escaped search pattern: " + pattern); Lock lock = rwlock.readLock(); lock.lock(); if (exception != null) { lock.unlock(); throw new RuntimeException("Failed to refesh index reader after last commit", exception); } try { List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>(); analyzer = new TermNameAnalyzer(false); QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer); Query query = parser.parse(pattern); logger.log(Level.FINE, "Query: " + query); // For highlighting words in query results QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder(); Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer); highlighter.setMaxDocCharsToAnalyze(MAX_CHARS); scorer.setExpandMultiTermQuery(true); // Perform search ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs; for (int i = 0; i < hits.length; i++) { int id = hits[i].doc; Document doc = searcher.doc(id); String ontology = doc.get(FIELD_ONTOLOGY); String referenceId = doc.get(FIELD_ID); String term = doc.get(FIELD_TERM); byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM); boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1; if (!isSynonym || includeSynonyms) { Analyzer highlighterAnalyzer = new TermNameAnalyzer(true); TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM, highlighterAnalyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1); if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) { results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(), frag[0].getScore(), isSynonym)); } highlighterAnalyzer.close(); } } return results; } catch (ParseException e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (TokenMgrError e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (Throwable e) { String msg = "Failed to perform Lucene seach with pattern: " + pattern; logger.log(Level.WARNING, msg, e); throw new RuntimeException(msg, e); } finally { close(analyzer); lock.unlock(); } }
From source file:com.tripod.lucene.service.AbstractLuceneService.java
License:Apache License
/** * @param query the tripod query being performed * @param luceneQuery the Lucene query being performed * @return the highlighter to use if the tripod query has one or more highlight fields, or null *//*w w w . ja v a 2 s . co m*/ private Highlighter getHighlighter(final Q query, final Query luceneQuery) { Highlighter highlighter = null; if (query.getHighlightFields() != null && query.getHighlightFields().size() > 0) { SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder(); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(query.getHighlightPreTag(), query.getHighlightPostTag()); highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(luceneQuery)); } return highlighter; }
From source file:de.blizzy.documentr.search.GetSearchHitTask.java
License:Open Source License
@Override public SearchHit call() throws IOException { Formatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); //$NON-NLS-1$ //$NON-NLS-2$ Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(FRAGMENT_SIZE)); highlighter.setEncoder(new SimpleHTMLEncoder()); Document doc = reader.document(docId); String projectName = doc.get(PageIndex.PROJECT); String branchName = doc.get(PageIndex.BRANCH); String path = doc.get(PageIndex.PATH); String title = doc.get(PageIndex.TITLE); String text = doc.get(PageIndex.TEXT); String[] tagsArray = doc.getValues(PageIndex.TAG); List<String> tags = Lists.newArrayList(tagsArray); Collections.sort(tags);//from w w w . j a v a2 s . c om TokenStream tokenStream = null; String highlightedText = StringUtils.EMPTY; try { tokenStream = TokenSources.getAnyTokenStream(reader, docId, PageIndex.TEXT, doc, analyzer); String[] fragments = highlighter.getBestFragments(tokenStream, text, NUM_FRAGMENTS); cleanupFragments(fragments); highlightedText = Util.join(fragments, " <strong>...</strong> "); //$NON-NLS-1$ } catch (InvalidTokenOffsetsException e) { // ignore } finally { Closeables.closeQuietly(tokenStream); } return new SearchHit(projectName, branchName, path, title, highlightedText, tags); }
From source file:net.hillsdon.reviki.search.impl.LuceneSearcher.java
License:Apache License
private LinkedHashSet<SearchMatch> doQuery(final IndexReader reader, final Analyzer analyzer, final Searcher searcher, final String field, final boolean provideExtracts, final Query query) throws IOException, CorruptIndexException { Highlighter highlighter = null; if (provideExtracts) { highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new SimpleHTMLEncoder(), new QueryScorer(query)); }//from w w w . jav a 2s . c o m Hits hits = searcher.search(query); LinkedHashSet<SearchMatch> results = new LinkedHashSet<SearchMatch>(); @SuppressWarnings("unchecked") Iterator<Hit> iter = hits.iterator(); while (iter.hasNext()) { Hit hit = iter.next(); String text = hit.get(field); String extract = null; // The text is not stored for all fields, just provide a null extract. if (highlighter != null && text != null) { TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text)); // Get 3 best fragments and separate with a "..." extract = highlighter.getBestFragments(tokenStream, text, 3, "..."); } results.add(new SearchMatch(_wikiName.equals(hit.get(FIELD_WIKI)), hit.get(FIELD_WIKI), hit.get(FIELD_PATH), extract)); } return results; }
From source file:org.apache.solr.highlight.HtmlEncoder.java
License:Apache License
@Override public Encoder getEncoder(String fieldName, SolrParams params) { return new SimpleHTMLEncoder(); }
From source file:org.apache.wiki.search.LuceneSearchProvider.java
License:Apache License
/** * Searches pages using a particular combination of flags. * * @param query The query to perform in Lucene query language * @param flags A set of flags/*from w ww . j a va 2 s . co m*/ * @return A Collection of SearchResult instances * @throws ProviderException if there is a problem with the backend */ public Collection findPages(String query, int flags) throws ProviderException { IndexSearcher searcher = null; ArrayList<SearchResult> list = null; Highlighter highlighter = null; try { String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer()); //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); Query luceneQuery = qp.parse(query); if ((flags & FLAG_CONTEXTS) != 0) { highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery)); } try { File dir = new File(m_luceneDirectory); Directory luceneDir = new SimpleFSDirectory(dir, null); IndexReader reader = IndexReader.open(luceneDir); searcher = new IndexSearcher(reader); } catch (Exception ex) { log.info("Lucene not yet ready; indexing not started", ex); return null; } ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs; list = new ArrayList<SearchResult>(hits.length); for (int curr = 0; curr < hits.length; curr++) { int docID = hits[curr].doc; Document doc = searcher.doc(docID); String pageName = doc.get(LUCENE_ID); WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); if (page != null) { if (page instanceof Attachment) { // Currently attachments don't look nice on the search-results page // When the search-results are cleaned up this can be enabled again. } int score = (int) (hits[curr].score * 100); // Get highlighted search contexts String text = doc.get(LUCENE_PAGE_CONTENTS); String[] fragments = new String[0]; if (text != null && highlighter != null) { TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); } SearchResult result = new SearchResultImpl(page, score, fragments); list.add(result); } else { log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); pageRemoved(new WikiPage(m_engine, pageName)); } } } catch (IOException e) { log.error("Failed during lucene search", e); } catch (ParseException e) { log.info("Broken query; cannot parse query ", e); throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage()); } catch (InvalidTokenOffsetsException e) { log.error("Tokens are incompatible with provided text ", e); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { log.error(e); } } } return list; }
From source file:org.compass.core.lucene.engine.highlighter.DefaultLuceneHighlighterFactory.java
License:Apache License
protected Encoder createEncoder(String highlighterName, CompassSettings settings) throws SearchEngineException { Encoder encoder;// ww w . j a v a2 s.c om Object obj = settings.getSetting(LuceneEnvironment.Highlighter.Encoder.TYPE); if (obj instanceof Encoder) { encoder = (Encoder) obj; if (log.isDebugEnabled()) { log.debug("Highlighter [" + highlighterName + "] uses encoder instance [" + encoder + "]"); } } else { String encoderSetting = settings.getSetting(LuceneEnvironment.Highlighter.Encoder.TYPE, LuceneEnvironment.Highlighter.Encoder.DEFAULT); if (log.isDebugEnabled()) { log.debug("Highlighter [" + highlighterName + "] uses encoder [" + encoderSetting + "]"); } if (LuceneEnvironment.Highlighter.Encoder.DEFAULT.equals(encoderSetting)) { encoder = new DefaultEncoder(); } else if (LuceneEnvironment.Highlighter.Encoder.HTML.equals(encoderSetting)) { encoder = new SimpleHTMLEncoder(); } else { try { // the formatter is the fully qualified class name encoder = (Encoder) ClassUtils.forName(encoderSetting, settings.getClassLoader()).newInstance(); } catch (Exception e) { throw new SearchEngineException( "Cannot instantiate Lucene encoder [" + encoderSetting + "] for highlighter [" + highlighterName + "]. Please verify the highlighter encoder setting at [" + LuceneEnvironment.Highlighter.Encoder.TYPE + "]", e); } } } if (encoder instanceof CompassConfigurable) { ((CompassConfigurable) encoder).configure(settings); } return encoder; }
From source file:org.jamwiki.search.LuceneSearchEngine.java
License:LGPL
/** * Find all documents that contain a specific search term, ordered by relevance. * This method supports all Lucene search query syntax. * * @param virtualWiki The virtual wiki for the topic. * @param text The search term being searched for. * @return A collection of SearchResultEntry objects for all documents that * contain the search term./* w w w . j av a 2s . c om*/ */ public Collection findResults(String virtualWiki, String text) { StandardAnalyzer analyzer = new StandardAnalyzer(); Collection results = new Vector(); logger.fine("search text: " + text); IndexSearcher searcher = null; try { BooleanQuery query = new BooleanQuery(); QueryParser qp; qp = new QueryParser(ITYPE_TOPIC, analyzer); query.add(qp.parse(text), Occur.SHOULD); qp = new QueryParser(ITYPE_CONTENT, analyzer); query.add(qp.parse(text), Occur.SHOULD); searcher = new IndexSearcher(FSDirectory.getDirectory(getSearchIndexPath(virtualWiki))); // rewrite the query to expand it - required for wildcards to work with highlighter Query rewrittenQuery = searcher.rewrite(query); // actually perform the search Hits hits = searcher.search(rewrittenQuery); Highlighter highlighter = new Highlighter( new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(rewrittenQuery)); for (int i = 0; i < hits.length(); i++) { String summary = retrieveResultSummary(hits.doc(i), highlighter, analyzer); SearchResultEntry result = new SearchResultEntry(); result.setRanking(hits.score(i)); result.setTopic(hits.doc(i).get(ITYPE_TOPIC_PLAIN)); result.setSummary(summary); results.add(result); } } catch (Exception e) { logger.severe("Exception while searching for " + text, e); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception e) { } } } return results; }