Example usage for org.apache.lucene.search.highlight SimpleHTMLEncoder SimpleHTMLEncoder

List of usage examples for org.apache.lucene.search.highlight SimpleHTMLEncoder SimpleHTMLEncoder

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight SimpleHTMLEncoder SimpleHTMLEncoder.

Prototype

public SimpleHTMLEncoder() 

Source Link

Usage

From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Searches pages using a particular combination of flags.
 *
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags//from   w w  w.  ja  v a 2s  .c  om
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
 */
public Collection findPages(String query, int flags) throws ProviderException {
    Searcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
        QueryParser qp = new MultiFieldQueryParser(queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        }

        try {
            searcher = new IndexSearcher(m_luceneDirectory);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;
        }

        Hits hits = searcher.search(luceneQuery);

        list = new ArrayList<SearchResult>(hits.length());
        for (int curr = 0; curr < hits.length(); curr++) {
            Document doc = hits.doc(curr);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.
                }

                int score = (int) (hits.score(curr) * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);

                }

                SearchResult result = new SearchResultImpl(page, score, fragments);
                list.add(result);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
            }
        }
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (InstantiationException e) {
        log.error("Unable to get a Lucene analyzer", e);
    } catch (IllegalAccessException e) {
        log.error("Unable to get a Lucene analyzer", e);
    } catch (ClassNotFoundException e) {
        log.error("Specified Lucene analyzer does not exist", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
            }
        }
    }

    return list;
}

From source file:com.meltmedia.cadmium.search.SearchService.java

License:Apache License

private Map<String, Object> buildSearchResults(final String query, final String path) throws Exception {
    logger.info("Running search for [{}]", query);
    final Map<String, Object> resultMap = new LinkedHashMap<String, Object>();

    new SearchTemplate(provider) {
        public void doSearch(IndexSearcher index) throws IOException, ParseException {
            QueryParser parser = createParser(getAnalyzer());

            resultMap.put("number-hits", 0);

            List<Map<String, Object>> resultList = new ArrayList<Map<String, Object>>();

            resultMap.put("results", resultList);

            if (index != null && parser != null) {
                String literalQuery = query.replaceAll(ALLOWED_CHARS_PATTERN, "\\\\$1");
                Query query1 = parser.parse(literalQuery);
                if (StringUtils.isNotBlank(path)) {
                    Query pathPrefix = new PrefixQuery(new Term("path", path));
                    BooleanQuery boolQuery = new BooleanQuery();
                    boolQuery.add(pathPrefix, Occur.MUST);
                    boolQuery.add(query1, Occur.MUST);
                    query1 = boolQuery;/*from w w  w.  ja  v  a 2s  . co m*/
                }
                TopDocs results = index.search(query1, null, 100000);
                QueryScorer scorer = new QueryScorer(query1);
                Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
                        scorer);

                logger.info("Search returned {} hits.", results.totalHits);
                resultMap.put("number-hits", results.totalHits);

                for (ScoreDoc doc : results.scoreDocs) {
                    Document document = index.doc(doc.doc);
                    String content = document.get("content");
                    String title = document.get("title");

                    Map<String, Object> result = new LinkedHashMap<String, Object>();
                    String excerpt = "";

                    try {
                        excerpt = highlighter.getBestFragments(
                                parser.getAnalyzer().tokenStream(null, new StringReader(content)), content, 3,
                                "...");
                        excerpt = fixExcerpt(excerpt);

                        result.put("excerpt", excerpt);
                    } catch (Exception e) {
                        logger.debug("Failed to get search excerpt from content.", e);

                        try {
                            excerpt = highlighter.getBestFragments(
                                    parser.getAnalyzer().tokenStream(null, new StringReader(title)), title, 1,
                                    "...");
                            excerpt = fixExcerpt(excerpt);

                            result.put("excerpt", excerpt);
                        } catch (Exception e1) {
                            logger.debug("Failed to get search excerpt from title.", e1);

                            result.put("excerpt", "");
                        }
                    }

                    result.put("score", doc.score);
                    result.put("title", title);
                    result.put("path", document.get("path"));

                    resultList.add(result);
                }
            }

        }
    }.search();

    return resultMap;
}

From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java

License:Apache License

@Override
public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms)
        throws InvalidQuerySyntaxException {
    Analyzer analyzer = null;/*from w ww . j a v a2  s  .c o m*/

    // default QueryParser.escape(pattern) method does not support phrase queries
    pattern = QuerySyntaxUtil.escapeQueryPattern(pattern);
    if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) {
        return Collections.emptyList();
    }

    logger.log(Level.FINE, "Escaped search pattern: " + pattern);

    Lock lock = rwlock.readLock();
    lock.lock();
    if (exception != null) {
        lock.unlock();
        throw new RuntimeException("Failed to refesh index reader after last commit", exception);
    }

    try {
        List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>();
        analyzer = new TermNameAnalyzer(false);

        QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer);
        Query query = parser.parse(pattern);

        logger.log(Level.FINE, "Query: " + query);

        // For highlighting words in query results
        QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM);
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder();
        Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer);
        highlighter.setMaxDocCharsToAnalyze(MAX_CHARS);
        scorer.setExpandMultiTermQuery(true);

        // Perform search
        ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String ontology = doc.get(FIELD_ONTOLOGY);
            String referenceId = doc.get(FIELD_ID);
            String term = doc.get(FIELD_TERM);
            byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM);
            boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1;

            if (!isSynonym || includeSynonyms) {
                Analyzer highlighterAnalyzer = new TermNameAnalyzer(true);
                TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM,
                        highlighterAnalyzer);
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1);
                if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) {
                    results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(),
                            frag[0].getScore(), isSynonym));
                }
                highlighterAnalyzer.close();
            }
        }

        return results;
    } catch (ParseException e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (TokenMgrError e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (Throwable e) {
        String msg = "Failed to perform Lucene seach with pattern: " + pattern;
        logger.log(Level.WARNING, msg, e);
        throw new RuntimeException(msg, e);
    } finally {
        close(analyzer);
        lock.unlock();
    }
}

From source file:com.tripod.lucene.service.AbstractLuceneService.java

License:Apache License

/**
 * @param query the tripod query being performed
 * @param luceneQuery the Lucene query being performed
 * @return the highlighter to use if the tripod query has one or more highlight fields, or null
 *//*w  w  w . ja  v a 2 s  . co  m*/
private Highlighter getHighlighter(final Q query, final Query luceneQuery) {
    Highlighter highlighter = null;
    if (query.getHighlightFields() != null && query.getHighlightFields().size() > 0) {
        SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(query.getHighlightPreTag(),
                query.getHighlightPostTag());
        highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(luceneQuery));
    }
    return highlighter;
}

From source file:de.blizzy.documentr.search.GetSearchHitTask.java

License:Open Source License

@Override
public SearchHit call() throws IOException {
    Formatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); //$NON-NLS-1$ //$NON-NLS-2$
    Scorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(FRAGMENT_SIZE));
    highlighter.setEncoder(new SimpleHTMLEncoder());

    Document doc = reader.document(docId);
    String projectName = doc.get(PageIndex.PROJECT);
    String branchName = doc.get(PageIndex.BRANCH);
    String path = doc.get(PageIndex.PATH);
    String title = doc.get(PageIndex.TITLE);
    String text = doc.get(PageIndex.TEXT);
    String[] tagsArray = doc.getValues(PageIndex.TAG);
    List<String> tags = Lists.newArrayList(tagsArray);
    Collections.sort(tags);//from   w  w w  . j a  v a2 s  . c om
    TokenStream tokenStream = null;
    String highlightedText = StringUtils.EMPTY;
    try {
        tokenStream = TokenSources.getAnyTokenStream(reader, docId, PageIndex.TEXT, doc, analyzer);
        String[] fragments = highlighter.getBestFragments(tokenStream, text, NUM_FRAGMENTS);
        cleanupFragments(fragments);
        highlightedText = Util.join(fragments, " <strong>...</strong> "); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        // ignore
    } finally {
        Closeables.closeQuietly(tokenStream);
    }
    return new SearchHit(projectName, branchName, path, title, highlightedText, tags);
}

From source file:net.hillsdon.reviki.search.impl.LuceneSearcher.java

License:Apache License

private LinkedHashSet<SearchMatch> doQuery(final IndexReader reader, final Analyzer analyzer,
        final Searcher searcher, final String field, final boolean provideExtracts, final Query query)
        throws IOException, CorruptIndexException {
    Highlighter highlighter = null;
    if (provideExtracts) {
        highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new SimpleHTMLEncoder(),
                new QueryScorer(query));
    }//from w w  w  . jav  a  2s .  c  o  m
    Hits hits = searcher.search(query);
    LinkedHashSet<SearchMatch> results = new LinkedHashSet<SearchMatch>();
    @SuppressWarnings("unchecked")
    Iterator<Hit> iter = hits.iterator();
    while (iter.hasNext()) {
        Hit hit = iter.next();
        String text = hit.get(field);
        String extract = null;
        // The text is not stored for all fields, just provide a null extract.
        if (highlighter != null && text != null) {
            TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
            // Get 3 best fragments and separate with a "..."
            extract = highlighter.getBestFragments(tokenStream, text, 3, "...");
        }
        results.add(new SearchMatch(_wikiName.equals(hit.get(FIELD_WIKI)), hit.get(FIELD_WIKI),
                hit.get(FIELD_PATH), extract));
    }
    return results;
}

From source file:org.apache.solr.highlight.HtmlEncoder.java

License:Apache License

@Override
public Encoder getEncoder(String fieldName, SolrParams params) {
    return new SimpleHTMLEncoder();
}

From source file:org.apache.wiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Searches pages using a particular combination of flags.
 *
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags/*from   w ww . j a  va 2  s .  co m*/
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
 */
public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
        QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        }

        try {
            File dir = new File(m_luceneDirectory);
            Directory luceneDir = new SimpleFSDirectory(dir, null);
            IndexReader reader = IndexReader.open(luceneDir);
            searcher = new IndexSearcher(reader);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;
        }

        ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

        list = new ArrayList<SearchResult>(hits.length);
        for (int curr = 0; curr < hits.length; curr++) {
            int docID = hits[curr].doc;
            Document doc = searcher.doc(docID);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.
                }

                int score = (int) (hits[curr].score * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);

                }

                SearchResult result = new SearchResultImpl(page, score, fragments);
                list.add(result);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
            }
        }
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse query ", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
        log.error("Tokens are incompatible with provided text ", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error(e);
            }
        }
    }

    return list;
}

From source file:org.compass.core.lucene.engine.highlighter.DefaultLuceneHighlighterFactory.java

License:Apache License

protected Encoder createEncoder(String highlighterName, CompassSettings settings) throws SearchEngineException {
    Encoder encoder;//  ww w .  j a v a2 s.c om
    Object obj = settings.getSetting(LuceneEnvironment.Highlighter.Encoder.TYPE);
    if (obj instanceof Encoder) {
        encoder = (Encoder) obj;
        if (log.isDebugEnabled()) {
            log.debug("Highlighter [" + highlighterName + "] uses encoder instance [" + encoder + "]");
        }
    } else {
        String encoderSetting = settings.getSetting(LuceneEnvironment.Highlighter.Encoder.TYPE,
                LuceneEnvironment.Highlighter.Encoder.DEFAULT);
        if (log.isDebugEnabled()) {
            log.debug("Highlighter [" + highlighterName + "] uses encoder [" + encoderSetting + "]");
        }
        if (LuceneEnvironment.Highlighter.Encoder.DEFAULT.equals(encoderSetting)) {
            encoder = new DefaultEncoder();
        } else if (LuceneEnvironment.Highlighter.Encoder.HTML.equals(encoderSetting)) {
            encoder = new SimpleHTMLEncoder();
        } else {
            try {
                // the formatter is the fully qualified class name
                encoder = (Encoder) ClassUtils.forName(encoderSetting, settings.getClassLoader()).newInstance();
            } catch (Exception e) {
                throw new SearchEngineException(
                        "Cannot instantiate Lucene encoder [" + encoderSetting + "] for highlighter ["
                                + highlighterName + "]. Please verify the highlighter encoder setting at ["
                                + LuceneEnvironment.Highlighter.Encoder.TYPE + "]",
                        e);
            }
        }
    }
    if (encoder instanceof CompassConfigurable) {
        ((CompassConfigurable) encoder).configure(settings);
    }
    return encoder;
}

From source file:org.jamwiki.search.LuceneSearchEngine.java

License:LGPL

/**
 * Find all documents that contain a specific search term, ordered by relevance.
 * This method supports all Lucene search query syntax.
 *
 * @param virtualWiki The virtual wiki for the topic.
 * @param text The search term being searched for.
 * @return A collection of SearchResultEntry objects for all documents that
 *  contain the search term./*  w w  w .  j  av  a  2s  .  c om*/
 */
public Collection findResults(String virtualWiki, String text) {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    Collection results = new Vector();
    logger.fine("search text: " + text);
    IndexSearcher searcher = null;
    try {
        BooleanQuery query = new BooleanQuery();
        QueryParser qp;
        qp = new QueryParser(ITYPE_TOPIC, analyzer);
        query.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(ITYPE_CONTENT, analyzer);
        query.add(qp.parse(text), Occur.SHOULD);
        searcher = new IndexSearcher(FSDirectory.getDirectory(getSearchIndexPath(virtualWiki)));
        // rewrite the query to expand it - required for wildcards to work with highlighter
        Query rewrittenQuery = searcher.rewrite(query);
        // actually perform the search
        Hits hits = searcher.search(rewrittenQuery);
        Highlighter highlighter = new Highlighter(
                new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"), new SimpleHTMLEncoder(),
                new QueryScorer(rewrittenQuery));
        for (int i = 0; i < hits.length(); i++) {
            String summary = retrieveResultSummary(hits.doc(i), highlighter, analyzer);
            SearchResultEntry result = new SearchResultEntry();
            result.setRanking(hits.score(i));
            result.setTopic(hits.doc(i).get(ITYPE_TOPIC_PLAIN));
            result.setSummary(summary);
            results.add(result);
        }
    } catch (Exception e) {
        logger.severe("Exception while searching for " + text, e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (Exception e) {
            }
        }
    }
    return results;
}