Example usage for org.apache.lucene.search.highlight TokenSources getTokenStream

List of usage examples for org.apache.lucene.search.highlight TokenSources getTokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight TokenSources getTokenStream.

Prototype

@Deprecated 
    public static TokenStream getTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer)
            throws IOException 

Source Link

Usage

From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java

License:Apache License

@Override
public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms)
        throws InvalidQuerySyntaxException {
    Analyzer analyzer = null;//from  w ww  .  jav a  2 s.  com

    // default QueryParser.escape(pattern) method does not support phrase queries
    pattern = QuerySyntaxUtil.escapeQueryPattern(pattern);
    if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) {
        return Collections.emptyList();
    }

    logger.log(Level.FINE, "Escaped search pattern: " + pattern);

    Lock lock = rwlock.readLock();
    lock.lock();
    if (exception != null) {
        lock.unlock();
        throw new RuntimeException("Failed to refesh index reader after last commit", exception);
    }

    try {
        List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>();
        analyzer = new TermNameAnalyzer(false);

        QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer);
        Query query = parser.parse(pattern);

        logger.log(Level.FINE, "Query: " + query);

        // For highlighting words in query results
        QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM);
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder();
        Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer);
        highlighter.setMaxDocCharsToAnalyze(MAX_CHARS);
        scorer.setExpandMultiTermQuery(true);

        // Perform search
        ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String ontology = doc.get(FIELD_ONTOLOGY);
            String referenceId = doc.get(FIELD_ID);
            String term = doc.get(FIELD_TERM);
            byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM);
            boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1;

            if (!isSynonym || includeSynonyms) {
                Analyzer highlighterAnalyzer = new TermNameAnalyzer(true);
                TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM,
                        highlighterAnalyzer);
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1);
                if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) {
                    results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(),
                            frag[0].getScore(), isSynonym));
                }
                highlighterAnalyzer.close();
            }
        }

        return results;
    } catch (ParseException e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (TokenMgrError e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (Throwable e) {
        String msg = "Failed to perform Lucene seach with pattern: " + pattern;
        logger.log(Level.WARNING, msg, e);
        throw new RuntimeException(msg, e);
    } finally {
        close(analyzer);
        lock.unlock();
    }
}

From source file:org.apache.zeppelin.search.LuceneSearch.java

License:Apache License

private List<Map<String, String>> doSearch(IndexSearcher searcher, Query query, Analyzer analyzer,
        Highlighter highlighter) {
    List<Map<String, String>> matchingParagraphs = Lists.newArrayList();
    ScoreDoc[] hits;//from   w  w w  .  ja  va 2  s  .c  o  m
    try {
        hits = searcher.search(query, 20).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            logger.debug("doc={} score={}", hits[i].doc, hits[i].score);

            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String path = doc.get(ID_FIELD);
            if (path != null) {
                logger.debug((i + 1) + ". " + path);
                String title = doc.get("title");
                if (title != null) {
                    logger.debug("   Title: {}", doc.get("title"));
                }

                String text = doc.get(SEARCH_FIELD_TEXT);
                String header = doc.get(SEARCH_FIELD_TITLE);
                String fragment = "";

                if (text != null) {
                    TokenStream tokenStream = TokenSources.getTokenStream(searcher.getIndexReader(), id,
                            SEARCH_FIELD_TEXT, analyzer);
                    TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, true, 3);
                    logger.debug("    {} fragments found for query '{}'", frag.length, query);
                    for (int j = 0; j < frag.length; j++) {
                        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                            logger.debug("    Fragment: {}", frag[j].toString());
                        }
                    }
                    fragment = (frag != null && frag.length > 0) ? frag[0].toString() : "";
                }

                if (header != null) {
                    TokenStream tokenTitle = TokenSources.getTokenStream(searcher.getIndexReader(), id,
                            SEARCH_FIELD_TITLE, analyzer);
                    TextFragment[] frgTitle = highlighter.getBestTextFragments(tokenTitle, header, true, 3);
                    header = (frgTitle != null && frgTitle.length > 0) ? frgTitle[0].toString() : "";
                } else {
                    header = "";
                }
                matchingParagraphs.add(ImmutableMap.of("id", path, // <noteId>/paragraph/<paragraphId>
                        "name", title, "snippet", fragment, "text", text, "header", header));
            } else {
                logger.info("{}. No {} for this document", i + 1, ID_FIELD);
            }
        }
    } catch (IOException | InvalidTokenOffsetsException e) {
        logger.error("Exception on searching for {}", query, e);
    }
    return matchingParagraphs;
}