List of usage examples for org.apache.lucene.search.highlight QueryScorer setExpandMultiTermQuery
public void setExpandMultiTermQuery(boolean expandMultiTermQuery)
From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java
License:Apache License
@Override public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms) throws InvalidQuerySyntaxException { Analyzer analyzer = null;/*from ww w.j a v a 2 s.c om*/ // default QueryParser.escape(pattern) method does not support phrase queries pattern = QuerySyntaxUtil.escapeQueryPattern(pattern); if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) { return Collections.emptyList(); } logger.log(Level.FINE, "Escaped search pattern: " + pattern); Lock lock = rwlock.readLock(); lock.lock(); if (exception != null) { lock.unlock(); throw new RuntimeException("Failed to refesh index reader after last commit", exception); } try { List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>(); analyzer = new TermNameAnalyzer(false); QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer); Query query = parser.parse(pattern); logger.log(Level.FINE, "Query: " + query); // For highlighting words in query results QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder(); Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer); highlighter.setMaxDocCharsToAnalyze(MAX_CHARS); scorer.setExpandMultiTermQuery(true); // Perform search ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs; for (int i = 0; i < hits.length; i++) { int id = hits[i].doc; Document doc = searcher.doc(id); String ontology = doc.get(FIELD_ONTOLOGY); String referenceId = doc.get(FIELD_ID); String term = doc.get(FIELD_TERM); byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM); boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1; if (!isSynonym || includeSynonyms) { Analyzer highlighterAnalyzer = new TermNameAnalyzer(true); TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM, highlighterAnalyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1); if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) { results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(), frag[0].getScore(), isSynonym)); } highlighterAnalyzer.close(); } } return results; } catch (ParseException e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (TokenMgrError e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (Throwable e) { String msg = "Failed to perform Lucene seach with pattern: " + pattern; logger.log(Level.WARNING, msg, e); throw new RuntimeException(msg, e); } finally { close(analyzer); lock.unlock(); } }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndex.java
License:Apache License
@Override public Cursor query(final IndexPlan plan, NodeState rootState) { final Filter filter = plan.getFilter(); FullTextExpression ft = filter.getFullTextConstraint(); final Set<String> relPaths = getRelativePaths(ft); if (relPaths.size() > 1) { return new MultiLuceneIndex(filter, rootState, relPaths).query(); }//from www. j a v a2 s . c o m final String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next(); // we only restrict non-full-text conditions if there is // no relative property in the full-text constraint final boolean nonFullTextConstraints = parent.isEmpty(); final int parentDepth = getDepth(parent); QueryEngineSettings settings = filter.getQueryEngineSettings(); Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); private final Set<String> seenPaths = Sets.newHashSet(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private long lastSearchIndexerVersion; @Override protected LuceneResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } return endOfData(); } private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt) throws IOException { IndexReader reader = searcher.getIndexReader(); PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (!parent.isEmpty()) { // TODO OAK-828 this breaks node aggregation // get the base path // ensure the path ends with the given // relative path // if (!path.endsWith("/" + parent)) { // continue; // } path = getAncestorPath(path, parentDepth); // avoid duplicate entries if (seenPaths.contains(path)) { return null; } seenPaths.add(path); } return new LuceneResultRow(path, doc.score, excerpt); } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH)); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TopDocs docs; long time = System.currentTimeMillis(); checkForIndexVersionChange(searcher); while (true) { if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); docs = searcher.search(query, nextBatchSize); } time = System.currentTimeMillis() - time; LOG.debug("... took {} ms", time); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); } for (ScoreDoc doc : docs.scoreDocs) { String excerpt = null; if (addExcerpt) { excerpt = getExcerpt(analyzer, searcher, doc); } LuceneResultRow row = convertToRow(doc, searcher, excerpt); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks Collection<String> suggestedWords = new ArrayList<String>(suggestWords.length); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(FieldNames.SUGGEST, suggestion.string); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) { suggestedWords.add(suggestion.string); break; } } } } queue.add(new LuceneResultRow(suggestedWords)); } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; List<Lookup.LookupResult> lookupResults = SuggestHelper .getSuggestions(indexNode.getLookup(), suggestQuery); // ACL filter suggestions Collection<String> suggestedWords = new ArrayList<String>(lookupResults.size()); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.FULLTEXT, indexNode.getDefinition().getAnalyzer()); for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.createPhraseQuery(FieldNames.FULLTEXT, suggestion.key.toString()); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); if (filter.isAccessible(retrievedDoc.get(FieldNames.PATH))) { suggestedWords.add( "{term=" + suggestion.key + ",weight=" + suggestion.value + "}"); break; } } } } queue.add(new LuceneResultRow(suggestedWords)); } } catch (IOException e) { LOG.warn("query via {} failed.", LuceneIndex.this, e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = LucenePropertyIndex.getVersion(searcher); if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); } this.lastSearchIndexerVersion = currentVersion; } }; SizeEstimator sizeEstimator = new SizeEstimator() { @Override public long getSize() { IndexNode indexNode = tracker.acquireIndexNode((String) plan.getAttribute(ATTR_INDEX_PATH)); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(filter, searcher.getIndexReader(), nonFullTextConstraints, indexNode.getDefinition()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("Estimated size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LuceneIndex.this, e); } finally { indexNode.release(); } return -1; } }; return new LucenePathCursor(itr, settings, sizeEstimator); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.java
License:Apache License
@Override public Cursor query(final IndexPlan plan, NodeState rootState) { final Filter filter = plan.getFilter(); final Sort sort = getSort(plan); final PlanResult pr = getPlanResult(plan); QueryEngineSettings settings = filter.getQueryEngineSettings(); Iterator<LuceneResultRow> itr = new AbstractIterator<LuceneResultRow>() { private final Deque<LuceneResultRow> queue = Queues.newArrayDeque(); private final Set<String> seenPaths = Sets.newHashSet(); private ScoreDoc lastDoc; private int nextBatchSize = LUCENE_QUERY_BATCH_SIZE; private boolean noDocs = false; private long lastSearchIndexerVersion; @Override//from www . ja v a 2 s . c o m protected LuceneResultRow computeNext() { while (!queue.isEmpty() || loadDocs()) { return queue.remove(); } return endOfData(); } private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher searcher, String excerpt, Facets facets, String explanation) throws IOException { IndexReader reader = searcher.getIndexReader(); //TODO Look into usage of field cache for retrieving the path //instead of reading via reader if no of docs in index are limited PathStoredFieldVisitor visitor = new PathStoredFieldVisitor(); reader.document(doc.doc, visitor); String path = visitor.getPath(); if (path != null) { if ("".equals(path)) { path = "/"; } if (pr.isPathTransformed()) { String originalPath = path; path = pr.transformPath(path); if (path == null) { LOG.trace("Ignoring path {} : Transformation returned null", originalPath); return null; } // avoid duplicate entries if (seenPaths.contains(path)) { LOG.trace("Ignoring path {} : Duplicate post transformation", originalPath); return null; } seenPaths.add(path); } LOG.trace("Matched path {}", path); return new LuceneResultRow(path, doc.score, excerpt, facets, explanation); } return null; } /** * Loads the lucene documents in batches * @return true if any document is loaded */ private boolean loadDocs() { if (noDocs) { return false; } ScoreDoc lastDocToRecord = null; final IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); CustomScoreQuery customScoreQuery = getCustomScoreQuery(plan, query); if (customScoreQuery != null) { query = customScoreQuery; } checkForIndexVersionChange(searcher); TopDocs docs; long start = PERF_LOGGER.start(); while (true) { if (lastDoc != null) { LOG.debug("loading the next {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.searchAfter(lastDoc, query, nextBatchSize); } else { docs = searcher.searchAfter(lastDoc, query, nextBatchSize, sort); } } else { LOG.debug("loading the first {} entries for query {}", nextBatchSize, query); if (sort == null) { docs = searcher.search(query, nextBatchSize); } else { docs = searcher.search(query, nextBatchSize, sort); } } PERF_LOGGER.end(start, -1, "{} ...", docs.scoreDocs.length); nextBatchSize = (int) Math.min(nextBatchSize * 2L, 100000); long f = PERF_LOGGER.start(); Facets facets = FacetHelper.getFacets(searcher, query, docs, plan, indexNode.getDefinition().isSecureFacets()); PERF_LOGGER.end(f, -1, "facets retrieved"); PropertyRestriction restriction = filter.getPropertyRestriction(QueryImpl.REP_EXCERPT); boolean addExcerpt = restriction != null && restriction.isNotNullRestriction(); restriction = filter.getPropertyRestriction(QueryImpl.OAK_SCORE_EXPLANATION); boolean addExplain = restriction != null && restriction.isNotNullRestriction(); Analyzer analyzer = indexNode.getDefinition().getAnalyzer(); FieldInfos mergedFieldInfos = null; if (addExcerpt) { // setup highlighter QueryScorer scorer = new QueryScorer(query); scorer.setExpandMultiTermQuery(true); highlighter.setFragmentScorer(scorer); mergedFieldInfos = MultiFields.getMergedFieldInfos(searcher.getIndexReader()); } for (ScoreDoc doc : docs.scoreDocs) { String excerpt = null; if (addExcerpt) { excerpt = getExcerpt(query, analyzer, searcher, doc, mergedFieldInfos); } String explanation = null; if (addExplain) { explanation = searcher.explain(query, doc.doc).toString(); } LuceneResultRow row = convertToRow(doc, searcher, excerpt, facets, explanation); if (row != null) { queue.add(row); } lastDocToRecord = doc; } if (queue.isEmpty() && docs.scoreDocs.length > 0) { //queue is still empty but more results can be fetched //from Lucene so still continue lastDoc = lastDocToRecord; } else { break; } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SpellcheckHelper.SpellcheckQuery) { String aclCheckField = indexNode.getDefinition().isFullTextEnabled() ? FieldNames.FULLTEXT : FieldNames.SPELLCHECK; noDocs = true; SpellcheckHelper.SpellcheckQuery spellcheckQuery = (SpellcheckHelper.SpellcheckQuery) luceneRequestFacade .getLuceneRequest(); SuggestWord[] suggestWords = SpellcheckHelper.getSpellcheck(spellcheckQuery); // ACL filter spellchecks QueryParser qp = new QueryParser(Version.LUCENE_47, aclCheckField, indexNode.getDefinition().getAnalyzer()); for (SuggestWord suggestion : suggestWords) { Query query = qp.createPhraseQuery(aclCheckField, QueryParserBase.escape(suggestion.string)); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.string)); break; } } } } } else if (luceneRequestFacade.getLuceneRequest() instanceof SuggestHelper.SuggestQuery) { SuggestHelper.SuggestQuery suggestQuery = (SuggestHelper.SuggestQuery) luceneRequestFacade .getLuceneRequest(); noDocs = true; List<Lookup.LookupResult> lookupResults = SuggestHelper .getSuggestions(indexNode.getLookup(), suggestQuery); QueryParser qp = new QueryParser(Version.LUCENE_47, FieldNames.SUGGEST, indexNode.getDefinition().isSuggestAnalyzed() ? indexNode.getDefinition().getAnalyzer() : SuggestHelper.getAnalyzer()); // ACL filter suggestions for (Lookup.LookupResult suggestion : lookupResults) { Query query = qp.parse("\"" + QueryParserBase.escape(suggestion.key.toString()) + "\""); query = addDescendantClauseIfRequired(query, plan); TopDocs topDocs = searcher.search(query, 100); if (topDocs.totalHits > 0) { for (ScoreDoc doc : topDocs.scoreDocs) { Document retrievedDoc = searcher.doc(doc.doc); String prefix = filter.getPath(); if (prefix.length() == 1) { prefix = ""; } if (filter.isAccessible(prefix + retrievedDoc.get(FieldNames.PATH))) { queue.add(new LuceneResultRow(suggestion.key.toString(), suggestion.value)); break; } } } } } } catch (Exception e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } if (lastDocToRecord != null) { this.lastDoc = lastDocToRecord; } return !queue.isEmpty(); } private void checkForIndexVersionChange(IndexSearcher searcher) { long currentVersion = getVersion(searcher); if (currentVersion != lastSearchIndexerVersion && lastDoc != null) { lastDoc = null; LOG.debug("Change in index version detected {} => {}. Query would be performed without " + "offset", currentVersion, lastSearchIndexerVersion); } this.lastSearchIndexerVersion = currentVersion; } }; SizeEstimator sizeEstimator = new SizeEstimator() { @Override public long getSize() { IndexNode indexNode = acquireIndexNode(plan); checkState(indexNode != null); try { IndexSearcher searcher = indexNode.getSearcher(); LuceneRequestFacade luceneRequestFacade = getLuceneRequest(plan, augmentorFactory, searcher.getIndexReader()); if (luceneRequestFacade.getLuceneRequest() instanceof Query) { Query query = (Query) luceneRequestFacade.getLuceneRequest(); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); int totalHits = collector.getTotalHits(); LOG.debug("Estimated size for query {} is {}", query, totalHits); return totalHits; } LOG.debug("estimate size: not a Query: {}", luceneRequestFacade.getLuceneRequest()); } catch (IOException e) { LOG.warn("query via {} failed.", LucenePropertyIndex.this, e); } finally { indexNode.release(); } return -1; } }; return new LucenePathCursor(itr, plan, settings, sizeEstimator); }
From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java
License:Open Source License
/** * Return a {@link org.apache.lucene.search.highlight.QueryScorer} suitable * for this Query and field./*from w ww . ja va2 s. c o m*/ * * @param query * The current query * @param tokenStream * document text CachingTokenStream * @param requestFieldname * The name of the field * @param request * The SolrQueryRequest */ @Override protected QueryScorer getSpanQueryScorer(Query query, String requestFieldname, TokenStream tokenStream, SolrQueryRequest request) { String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request); QueryScorer scorer = new QueryScorer(query, request.getParams().getFieldBool(requestFieldname, HighlightParams.FIELD_MATCH, false) ? schemaFieldName : null); scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true)); boolean defaultPayloads = true;// overwritten below try { // It'd be nice to know if payloads are on the tokenStream but the // presence of the attribute isn't a good // indicator. final Terms terms = request.getSearcher().getSlowAtomicReader().fields().terms(schemaFieldName); if (terms != null) { defaultPayloads = terms.hasPayloads(); } } catch (IOException e) { log.error("Couldn't check for existence of payloads", e); } scorer.setUsePayloads( request.getParams().getFieldBool(requestFieldname, HighlightParams.PAYLOADS, defaultPayloads)); return scorer; }
From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java
License:Apache License
/** * Return a {@link org.apache.lucene.search.highlight.QueryScorer} suitable for this Query and field. * @param query The current query// www. ja v a 2s. c om * @param tokenStream document text CachingTokenStream * @param fieldName The name of the field * @param request The SolrQueryRequest */ private QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) { boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false); Boolean highlightMultiTerm = request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true); if (highlightMultiTerm == null) { highlightMultiTerm = false; } QueryScorer scorer; if (reqFieldMatch) { scorer = new QueryScorer(query, fieldName); } else { scorer = new QueryScorer(query, null); } scorer.setExpandMultiTermQuery(highlightMultiTerm); return scorer; }
From source file:org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter.java
License:Apache License
@Override public HighlightField highlight(HighlighterContext highlighterContext) { SearchContextHighlight.Field field = highlighterContext.field; SearchContext context = highlighterContext.context; FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; FieldMapper mapper = highlighterContext.mapper; Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; if (!hitContext.cache().containsKey(CACHE_KEY)) { Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> mappers = new HashMap<>(); hitContext.cache().put(CACHE_KEY, mappers); }// w ww .ja v a2 s . c om @SuppressWarnings("unchecked") Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter>) hitContext .cache().get(CACHE_KEY); org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper); if (entry == null) { QueryScorer queryScorer = new CustomQueryScorer(highlighterContext.query, field.fieldOptions().requireFieldMatch() ? mapper.fieldType().name() : null); queryScorer.setExpandMultiTermQuery(true); Fragmenter fragmenter; if (field.fieldOptions().numberOfFragments() == 0) { fragmenter = new NullFragmenter(); } else if (field.fieldOptions().fragmenter() == null) { fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize()); } else if ("simple".equals(field.fieldOptions().fragmenter())) { fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize()); } else if ("span".equals(field.fieldOptions().fragmenter())) { fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize()); } else { throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]"); } Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]); entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer); entry.setTextFragmenter(fragmenter); // always highlight across all data entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); cache.put(mapper, entry); } // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments(); ArrayList<TextFragment> fragsList = new ArrayList<>(); List<Object> textsToHighlight; Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers() .indexAnalyzer(); try { textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext); for (Object textToHighlight : textsToHighlight) { String text = textToHighlight.toString(); try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) { if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) { // can't perform highlighting if the stream has no terms (binary token stream) or no offsets continue; } TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments); for (TextFragment bestTextFragment : bestTextFragments) { if (bestTextFragment != null && bestTextFragment.getScore() > 0) { fragsList.add(bestTextFragment); } } } } } catch (Exception e) { if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) { // this can happen if for example a field is not_analyzed and ignore_above option is set. // the field will be ignored when indexing but the huge term is still in the source and // the plain highlighter will parse the source and try to analyze it. return null; } else { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } } if (field.fieldOptions().scoreOrdered()) { CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() { @Override public int compare(TextFragment o1, TextFragment o2) { return Math.round(o2.getScore() - o1.getScore()); } }); } String[] fragments; // number_of_fragments is set to 0 but we have a multivalued field if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) { fragments = new String[fragsList.size()]; for (int i = 0; i < fragsList.size(); i++) { fragments[i] = fragsList.get(i).toString(); } } else { // refine numberOfFragments if needed numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments; fragments = new String[numberOfFragments]; for (int i = 0; i < fragments.length; i++) { fragments[i] = fragsList.get(i).toString(); } } if (fragments.length > 0) { return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); } int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize(); if (noMatchSize > 0 && textsToHighlight.size() > 0) { // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary. String fieldContents = textsToHighlight.get(0).toString(); int end; try { end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().name(), fieldContents); } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } if (end > 0) { return new HighlightField(highlighterContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) }); } } return null; }
From source file:org.elasticsearch.search.highlight.PlainHighlighter.java
License:Apache License
public HighlightField highlight(HighlighterContext highlighterContext) { SearchContextHighlight.Field field = highlighterContext.field; SearchContext context = highlighterContext.context; FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; FieldMapper<?> mapper = highlighterContext.mapper; Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; if (!hitContext.cache().containsKey(CACHE_KEY)) { Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> mappers = Maps.newHashMap(); hitContext.cache().put(CACHE_KEY, mappers); }//from ww w.j a v a 2 s. c om @SuppressWarnings("unchecked") Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter>) hitContext .cache().get(CACHE_KEY); org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper); if (entry == null) { Query query = highlighterContext.query.originalQuery(); QueryScorer queryScorer = new CustomQueryScorer(query, field.fieldOptions().requireFieldMatch() ? mapper.names().indexName() : null); queryScorer.setExpandMultiTermQuery(true); Fragmenter fragmenter; if (field.fieldOptions().numberOfFragments() == 0) { fragmenter = new NullFragmenter(); } else if (field.fieldOptions().fragmenter() == null) { fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize()); } else if ("simple".equals(field.fieldOptions().fragmenter())) { fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize()); } else if ("span".equals(field.fieldOptions().fragmenter())) { fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize()); } else { throw new ElasticsearchIllegalArgumentException( "unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]"); } Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]); entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer); entry.setTextFragmenter(fragmenter); // always highlight across all data entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); cache.put(mapper, entry); } // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments(); ArrayList<TextFragment> fragsList = new ArrayList<TextFragment>(); List<Object> textsToHighlight; try { textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext); for (Object textToHighlight : textsToHighlight) { String text = textToHighlight.toString(); Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers() .indexAnalyzer(); TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), text); if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) { // can't perform highlighting if the stream has no terms (binary token stream) or no offsets continue; } TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments); for (TextFragment bestTextFragment : bestTextFragments) { if (bestTextFragment != null && bestTextFragment.getScore() > 0) { fragsList.add(bestTextFragment); } } } } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } if (field.fieldOptions().scoreOrdered()) { CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() { public int compare(TextFragment o1, TextFragment o2) { return Math.round(o2.getScore() - o1.getScore()); } }); } String[] fragments; // number_of_fragments is set to 0 but we have a multivalued field if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) { fragments = new String[fragsList.size()]; for (int i = 0; i < fragsList.size(); i++) { fragments[i] = fragsList.get(i).toString(); } } else { // refine numberOfFragments if needed numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments; fragments = new String[numberOfFragments]; for (int i = 0; i < fragments.length; i++) { fragments[i] = fragsList.get(i).toString(); } } if (fragments.length > 0) { return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments)); } int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize(); if (noMatchSize > 0 && textsToHighlight.size() > 0) { // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary. String fieldContents = textsToHighlight.get(0).toString(); Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers() .indexAnalyzer(); int end; try { end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.names().indexName(), fieldContents)); } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } if (end > 0) { return new HighlightField(highlighterContext.fieldName, new Text[] { new StringText(fieldContents.substring(0, end)) }); } } return null; }