List of usage examples for org.apache.lucene.search.highlight TokenSources getTokenStream
@Deprecated
public static TokenStream getTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer)
throws IOException
From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java
License:Apache License
@Override public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms) throws InvalidQuerySyntaxException { Analyzer analyzer = null;//from w ww . jav a 2 s. com // default QueryParser.escape(pattern) method does not support phrase queries pattern = QuerySyntaxUtil.escapeQueryPattern(pattern); if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) { return Collections.emptyList(); } logger.log(Level.FINE, "Escaped search pattern: " + pattern); Lock lock = rwlock.readLock(); lock.lock(); if (exception != null) { lock.unlock(); throw new RuntimeException("Failed to refesh index reader after last commit", exception); } try { List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>(); analyzer = new TermNameAnalyzer(false); QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer); Query query = parser.parse(pattern); logger.log(Level.FINE, "Query: " + query); // For highlighting words in query results QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder(); Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer); highlighter.setMaxDocCharsToAnalyze(MAX_CHARS); scorer.setExpandMultiTermQuery(true); // Perform search ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs; for (int i = 0; i < hits.length; i++) { int id = hits[i].doc; Document doc = searcher.doc(id); String ontology = doc.get(FIELD_ONTOLOGY); String referenceId = doc.get(FIELD_ID); String term = doc.get(FIELD_TERM); byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM); boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1; if (!isSynonym || includeSynonyms) { Analyzer highlighterAnalyzer = new TermNameAnalyzer(true); TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM, highlighterAnalyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1); if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) { results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(), frag[0].getScore(), isSynonym)); } highlighterAnalyzer.close(); } } return results; } catch (ParseException e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (TokenMgrError e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (Throwable e) { String msg = "Failed to perform Lucene seach with pattern: " + pattern; logger.log(Level.WARNING, msg, e); throw new RuntimeException(msg, e); } finally { close(analyzer); lock.unlock(); } }
From source file:org.apache.zeppelin.search.LuceneSearch.java
License:Apache License
private List<Map<String, String>> doSearch(IndexSearcher searcher, Query query, Analyzer analyzer, Highlighter highlighter) { List<Map<String, String>> matchingParagraphs = Lists.newArrayList(); ScoreDoc[] hits;//from w w w . ja va 2 s .c o m try { hits = searcher.search(query, 20).scoreDocs; for (int i = 0; i < hits.length; i++) { logger.debug("doc={} score={}", hits[i].doc, hits[i].score); int id = hits[i].doc; Document doc = searcher.doc(id); String path = doc.get(ID_FIELD); if (path != null) { logger.debug((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { logger.debug(" Title: {}", doc.get("title")); } String text = doc.get(SEARCH_FIELD_TEXT); String header = doc.get(SEARCH_FIELD_TITLE); String fragment = ""; if (text != null) { TokenStream tokenStream = TokenSources.getTokenStream(searcher.getIndexReader(), id, SEARCH_FIELD_TEXT, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, true, 3); logger.debug(" {} fragments found for query '{}'", frag.length, query); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { logger.debug(" Fragment: {}", frag[j].toString()); } } fragment = (frag != null && frag.length > 0) ? frag[0].toString() : ""; } if (header != null) { TokenStream tokenTitle = TokenSources.getTokenStream(searcher.getIndexReader(), id, SEARCH_FIELD_TITLE, analyzer); TextFragment[] frgTitle = highlighter.getBestTextFragments(tokenTitle, header, true, 3); header = (frgTitle != null && frgTitle.length > 0) ? frgTitle[0].toString() : ""; } else { header = ""; } matchingParagraphs.add(ImmutableMap.of("id", path, // <noteId>/paragraph/<paragraphId> "name", title, "snippet", fragment, "text", text, "header", header)); } else { logger.info("{}. No {} for this document", i + 1, ID_FIELD); } } } catch (IOException | InvalidTokenOffsetsException e) { logger.error("Exception on searching for {}", query, e); } return matchingParagraphs; }