List of usage examples for org.apache.lucene.search.highlight OffsetLimitTokenFilter OffsetLimitTokenFilter
public OffsetLimitTokenFilter(TokenStream input, int offsetLimit)
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;// ww w. j a v a2s. com // END: Hack SolrParams params = req.getParams(); IndexableField[] docFields = doc.getFields(fieldName); List<String> listFields = new ArrayList<String>(); for (IndexableField field : docFields) { listFields.add(field.stringValue()); } String[] docTexts = listFields.toArray(new String[listFields.size()]); // according to Document javadoc, doc.getValues() never returns null. check empty instead of null if (docTexts.length == 0) return; TokenStream tokenStream; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization try { // TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName); // if (tvStream != null) { // tots = new TermOffsetsTokenStream(tvStream); // } } catch (IllegalArgumentException e) { // No problem. But we can't use TermOffsets optimization. } for (int j = 0; j < docTexts.length; j++) { if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length()); } else { // fall back to analyzer tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tokenStream = new CachingTokenFilter(tokenStream); } else { tokenStream = new CachingTokenFilter( new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream); // after highlighter initialization, reset tstream since construction of highlighter already used it tokenStream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first Collections.sort(frags, new Comparator<TextFragment>() { public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes String[] summaries = null; if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } if (fragTexts.size() >= numFragments) break; } summaries = (String[]) fragTexts.toArray(); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }
From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java
License:Open Source License
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") protected Object doHighlightingByHighlighter(Document doc, int docId, SchemaField schemaField, Query query, IndexReader reader, SolrQueryRequest req) throws IOException { final SolrParams params = req.getParams(); final String fieldName = schemaField.getName(); final int mvToExamine = params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, (schemaField.multiValued()) ? Integer.MAX_VALUE : 1); // Technically this is the max *fragments* (snippets), not max values: int mvToMatch = params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE); if (mvToExamine <= 0 || mvToMatch <= 0) { return null; }// w ww. jav a 2 s. c o m int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, DEFAULT_MAX_CHARS); if (maxCharsToAnalyze < 0) {//e.g. -1 maxCharsToAnalyze = Integer.MAX_VALUE; } List<String> fieldValues = getFieldValues(doc, fieldName, mvToExamine, maxCharsToAnalyze, req); if (fieldValues.isEmpty()) { return null; } // preserve order of values in a multiValued list boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false); int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); List<TextFragment> frags = new ArrayList<>(); //Try term vectors, which is faster // note: offsets are minimally sufficient for this HL. final Fields tvFields = schemaField.storeTermOffsets() ? reader.getTermVectors(docId) : null; final TokenStream tvStream = TokenSources.getTermVectorTokenStreamOrNull(fieldName, tvFields, maxCharsToAnalyze - 1); // We need to wrap in OffsetWindowTokenFilter if multi-valued final OffsetWindowTokenFilter tvWindowStream; if (tvStream != null && fieldValues.size() > 1) { tvWindowStream = new OffsetWindowTokenFilter(tvStream); } else { tvWindowStream = null; } for (String thisText : fieldValues) { if (mvToMatch <= 0 || maxCharsToAnalyze <= 0) { break; } TokenStream tstream; if (tvWindowStream != null) { // if we have a multi-valued field with term vectors, then get the next offset window tstream = tvWindowStream.advanceToNextWindowOfLength(thisText.length()); } else if (tvStream != null) { tstream = tvStream; // single-valued with term vectors } else { // fall back to analyzer tstream = createAnalyzerTStream(schemaField, thisText); } Highlighter highlighter; if (params.getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) { // We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream // needs to implement reset() efficiently. //If the tokenStream is right from the term vectors, then CachingTokenFilter is unnecessary. // It should be okay if OffsetLimit won't get applied in this case. final TokenStream tempTokenStream; if (tstream != tvStream) { if (maxCharsToAnalyze >= thisText.length()) { tempTokenStream = new CachingTokenFilter(tstream); } else { tempTokenStream = new CachingTokenFilter( new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze)); } } else { tempTokenStream = tstream; } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, tempTokenStream); // if the CachingTokenFilter was consumed then use it going forward. if (tempTokenStream instanceof CachingTokenFilter && ((CachingTokenFilter) tempTokenStream).isCached()) { tstream = tempTokenStream; } //tstream.reset(); not needed; getBestTextFragments will reset it. } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); maxCharsToAnalyze -= thisText.length(); // Highlight! try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, fixLocalisedText(thisText), mergeContiguousFragments, numFragments); for (TextFragment bestTextFragment : bestTextFragments) { if (bestTextFragment == null)//can happen via mergeContiguousFragments continue; // normally we want a score (must be highlighted), but if preserveMulti then we return a snippet regardless. if (bestTextFragment.getScore() > 0 || preserveMulti) { frags.add(bestTextFragment); if (bestTextFragment.getScore() > 0) --mvToMatch; // note: limits fragments (for multi-valued fields), not quite the number of values } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } //end field value loop // Put the fragments onto the Solr response (docSummaries) if (frags.size() > 0) { // sort such that the fragments with the highest score come first if (!preserveMulti) { Collections.sort(frags, (arg0, arg1) -> Float.compare(arg1.getScore(), arg0.getScore())); } // Truncate list to hl.snippets, but not when hl.preserveMulti if (frags.size() > numFragments && !preserveMulti) { frags = frags.subList(0, numFragments); } return getResponseForFragments(frags, req); } return null;//no highlights for this field }
From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java
License:Apache License
private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;/* w w w .j a v a 2s.c o m*/ // END: Hack SolrParams params = req.getParams(); // preserve order of values in a multiValued list boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false); List<IndexableField> allFields = doc.getFields(); if (allFields != null && allFields.size() == 0) return; // No explicit contract that getFields returns != null, // although currently it can't. TokenStream tstream = null; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); String[] summaries = null; List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization TokenStream tvStream = TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName); if (tvStream != null) { tots = new TermOffsetsTokenStream(tvStream); } int mvToExamine = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, Integer.toString(Integer.MAX_VALUE))); int mvToMatch = Integer.parseInt( req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.toString(Integer.MAX_VALUE))); for (IndexableField thisField : allFields) { if (mvToExamine <= 0 || mvToMatch <= 0) break; if (!thisField.name().equals(fieldName)) continue; // Is there a better way to do this? --mvToExamine; String thisText = thisField.stringValue(); if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tstream = tots.getMultiValuedTokenStream(thisText.length()); } else { // fall back to analyzer tstream = createAnalyzerTStream(schema, fieldName, thisText); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tstream = new CachingTokenFilter(tstream); } else { tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream); // after highlighter initialization, reset tstream since construction of highlighter already used it tstream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(thisText.length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, thisText, mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if (preserveMulti) { if (bestTextFragments[k] != null) { frags.add(bestTextFragments[k]); --mvToMatch; } } else { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); --mvToMatch; } } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first if (!preserveMulti) { Collections.sort(frags, new Comparator<TextFragment>() { @Override public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); } // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if (preserveMulti) { if (fragment != null) { fragTexts.add(fragment.toString()); } } else { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } } if (fragTexts.size() >= numFragments && !preserveMulti) break; } summaries = fragTexts.toArray(new String[0]); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }