List of usage examples for org.apache.lucene.search.vectorhighlight SimpleBoundaryScanner DEFAULT_MAX_SCAN
int DEFAULT_MAX_SCAN
To view the source code for org.apache.lucene.search.vectorhighlight SimpleBoundaryScanner DEFAULT_MAX_SCAN.
Click Source Link
From source file:org.elasticsearch.search.fetch.subphase.highlight.FastVectorHighlighter.java
License:Apache License
@Override public HighlightField highlight(HighlighterContext highlighterContext) { SearchContextHighlight.Field field = highlighterContext.field; SearchContext context = highlighterContext.context; FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; FieldMapper mapper = highlighterContext.mapper; if (canHighlight(mapper) == false) { throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); }// w ww .ja v a2 s.com Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; if (!hitContext.cache().containsKey(CACHE_KEY)) { hitContext.cache().put(CACHE_KEY, new HighlighterEntry()); } HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY); try { FieldQuery fieldQuery; if (field.fieldOptions().requireFieldMatch()) { if (cache.fieldMatchFieldQuery == null) { /* * we use top level reader to rewrite the query against all readers, with use caching it across hits (and across * readers...) */ cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); } fieldQuery = cache.fieldMatchFieldQuery; } else { if (cache.noFieldMatchFieldQuery == null) { /* * we use top level reader to rewrite the query against all readers, with use caching it across hits (and across * readers...) */ cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); } fieldQuery = cache.noFieldMatchFieldQuery; } MapperHighlightEntry entry = cache.mappers.get(mapper); if (entry == null) { FragListBuilder fragListBuilder; BaseFragmentsBuilder fragmentsBuilder; BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER; if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.fieldOptions().boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) { boundaryScanner = new SimpleBoundaryScanner(field.fieldOptions().boundaryMaxScan(), field.fieldOptions().boundaryChars()); } boolean forceSource = context.highlight().forceSource(field); if (field.fieldOptions().numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); if (!forceSource && mapper.fieldType().stored()) { fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } } else { fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset()); if (field.fieldOptions().scoreOrdered()) { if (!forceSource && mapper.fieldType().stored()) { fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } } else { if (!forceSource && mapper.fieldType().stored()) { fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } } } fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue); entry = new MapperHighlightEntry(); entry.fragListBuilder = fragListBuilder; entry.fragmentsBuilder = fragmentsBuilder; if (cache.fvh == null) { // parameters to FVH are not requires since: // first two booleans are not relevant since they are set on the CustomFieldQuery (phrase and fieldMatch) // fragment builders are used explicitly cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter(); } CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter()); cache.mappers.put(mapper, entry); } cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit()); String[] fragments; // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments(); int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize(); // we highlight against the low level reader and docId, because if we load source, we want to reuse it if possible // Only send matched fields if they were requested to save time. if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) { fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), field.fieldOptions().matchedFields(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); } else { fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); } if (fragments != null && fragments.length > 0) { return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); } int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize(); if (noMatchSize > 0) { // Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/); fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList()); fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); if (fragments != null && fragments.length > 0) { return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); } } return null; } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } }
From source file:org.elasticsearch.search.highlight.FastVectorHighlighter.java
License:Apache License
@Override public HighlightField highlight(HighlighterContext highlighterContext) { SearchContextHighlight.Field field = highlighterContext.field; SearchContext context = highlighterContext.context; FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; FieldMapper<?> mapper = highlighterContext.mapper; if (!(mapper.fieldType().storeTermVectors() && mapper.fieldType().storeTermVectorOffsets() && mapper.fieldType().storeTermVectorPositions())) { throw new ElasticsearchIllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); }//from w ww .ja va2 s. c o m Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; if (!hitContext.cache().containsKey(CACHE_KEY)) { hitContext.cache().put(CACHE_KEY, new HighlighterEntry()); } HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY); try { FieldQuery fieldQuery; if (field.fieldOptions().requireFieldMatch()) { if (cache.fieldMatchFieldQuery == null) { // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...) cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query.originalQuery(), hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); } fieldQuery = cache.fieldMatchFieldQuery; } else { if (cache.noFieldMatchFieldQuery == null) { // we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...) cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query.originalQuery(), hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); } fieldQuery = cache.noFieldMatchFieldQuery; } MapperHighlightEntry entry = cache.mappers.get(mapper); if (entry == null) { FragListBuilder fragListBuilder; BaseFragmentsBuilder fragmentsBuilder; BoundaryScanner boundaryScanner = DEFAULT_BOUNDARY_SCANNER; if (field.fieldOptions().boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || field.fieldOptions().boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) { boundaryScanner = new SimpleBoundaryScanner(field.fieldOptions().boundaryMaxScan(), field.fieldOptions().boundaryChars()); } boolean forceSource = context.highlight().forceSource(field); if (field.fieldOptions().numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); if (!forceSource && mapper.fieldType().stored()) { fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } } else { fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset()); if (field.fieldOptions().scoreOrdered()) { if (!forceSource && mapper.fieldType().stored()) { fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } } else { if (!forceSource && mapper.fieldType().stored()) { fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } } } fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue); entry = new MapperHighlightEntry(); entry.fragListBuilder = fragListBuilder; entry.fragmentsBuilder = fragmentsBuilder; if (cache.fvh == null) { // parameters to FVH are not requires since: // first two booleans are not relevant since they are set on the CustomFieldQuery (phrase and fieldMatch) // fragment builders are used explicitly cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter(); } CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter()); cache.mappers.put(mapper, entry); } cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit()); String[] fragments; // a HACK to make highlighter do highlighting, even though its using the single frag list builder int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments(); int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize(); // we highlight against the low level reader and docId, because if we load source, we want to reuse it if possible // Only send matched fields if they were requested to save time. if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) { fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.names().indexName(), field.fieldOptions().matchedFields(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); } else { fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.names().indexName(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); } if (fragments != null && fragments.length > 0) { return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments)); } int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize(); if (noMatchSize > 0) { // Essentially we just request that a fragment is built from 0 to noMatchSize using the normal fragmentsBuilder FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/); fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList()); fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.names().indexName(), fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder); if (fragments != null && fragments.length > 0) { return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments)); } } return null; } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } }
From source file:org.elasticsearch.search.highlight.HighlighterParseElement.java
License:Apache License
@Override public void parse(XContentParser parser, SearchContext context) throws Exception { XContentParser.Token token;/* w w w . j a va 2 s . c o m*/ String topLevelFieldName = null; List<Tuple<String, SearchContextHighlight.FieldOptions.Builder>> fieldsOptions = newArrayList(); SearchContextHighlight.FieldOptions.Builder globalOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder() .preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(false).highlightFilter(false) .requireFieldMatch(false).forceSource(false).fragmentCharSize(100).numberOfFragments(5) .encoder("default").boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN) .boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS).noMatchSize(0).phraseLimit(256); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { topLevelFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_ARRAY) { if ("pre_tags".equals(topLevelFieldName) || "preTags".equals(topLevelFieldName)) { List<String> preTagsList = Lists.newArrayList(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { preTagsList.add(parser.text()); } globalOptionsBuilder.preTags(preTagsList.toArray(new String[preTagsList.size()])); } else if ("post_tags".equals(topLevelFieldName) || "postTags".equals(topLevelFieldName)) { List<String> postTagsList = Lists.newArrayList(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { postTagsList.add(parser.text()); } globalOptionsBuilder.postTags(postTagsList.toArray(new String[postTagsList.size()])); } } else if (token.isValue()) { if ("order".equals(topLevelFieldName)) { globalOptionsBuilder.scoreOrdered("score".equals(parser.text())); } else if ("tags_schema".equals(topLevelFieldName) || "tagsSchema".equals(topLevelFieldName)) { String schema = parser.text(); if ("styled".equals(schema)) { globalOptionsBuilder.preTags(STYLED_PRE_TAG); globalOptionsBuilder.postTags(STYLED_POST_TAGS); } } else if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) { globalOptionsBuilder.highlightFilter(parser.booleanValue()); } else if ("fragment_size".equals(topLevelFieldName) || "fragmentSize".equals(topLevelFieldName)) { globalOptionsBuilder.fragmentCharSize(parser.intValue()); } else if ("number_of_fragments".equals(topLevelFieldName) || "numberOfFragments".equals(topLevelFieldName)) { globalOptionsBuilder.numberOfFragments(parser.intValue()); } else if ("encoder".equals(topLevelFieldName)) { globalOptionsBuilder.encoder(parser.text()); } else if ("require_field_match".equals(topLevelFieldName) || "requireFieldMatch".equals(topLevelFieldName)) { globalOptionsBuilder.requireFieldMatch(parser.booleanValue()); } else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) { globalOptionsBuilder.boundaryMaxScan(parser.intValue()); } else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) { char[] charsArr = parser.text().toCharArray(); Character[] globalBoundaryChars = new Character[charsArr.length]; for (int i = 0; i < charsArr.length; i++) { globalBoundaryChars[i] = charsArr[i]; } globalOptionsBuilder.boundaryChars(globalBoundaryChars); } else if ("type".equals(topLevelFieldName)) { globalOptionsBuilder.highlighterType(parser.text()); } else if ("fragmenter".equals(topLevelFieldName)) { globalOptionsBuilder.fragmenter(parser.text()); } else if ("no_match_size".equals(topLevelFieldName) || "noMatchSize".equals(topLevelFieldName)) { globalOptionsBuilder.noMatchSize(parser.intValue()); } else if ("force_source".equals(topLevelFieldName) || "forceSource".equals(topLevelFieldName)) { globalOptionsBuilder.forceSource(parser.booleanValue()); } else if ("phrase_limit".equals(topLevelFieldName) || "phraseLimit".equals(topLevelFieldName)) { globalOptionsBuilder.phraseLimit(parser.intValue()); } } else if (token == XContentParser.Token.START_OBJECT && "options".equals(topLevelFieldName)) { globalOptionsBuilder.options(parser.map()); } else if (token == XContentParser.Token.START_OBJECT) { if ("fields".equals(topLevelFieldName)) { String highlightFieldName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { highlightFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { SearchContextHighlight.FieldOptions.Builder fieldOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder(); String fieldName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { fieldName = parser.currentName(); } else if (token == XContentParser.Token.START_ARRAY) { if ("pre_tags".equals(fieldName) || "preTags".equals(fieldName)) { List<String> preTagsList = Lists.newArrayList(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { preTagsList.add(parser.text()); } fieldOptionsBuilder .preTags(preTagsList.toArray(new String[preTagsList.size()])); } else if ("post_tags".equals(fieldName) || "postTags".equals(fieldName)) { List<String> postTagsList = Lists.newArrayList(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { postTagsList.add(parser.text()); } fieldOptionsBuilder .postTags(postTagsList.toArray(new String[postTagsList.size()])); } else if ("matched_fields".equals(fieldName) || "matchedFields".equals(fieldName)) { Set<String> matchedFields = Sets.newHashSet(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { matchedFields.add(parser.text()); } fieldOptionsBuilder.matchedFields(matchedFields); } } else if (token.isValue()) { if ("fragment_size".equals(fieldName) || "fragmentSize".equals(fieldName)) { fieldOptionsBuilder.fragmentCharSize(parser.intValue()); } else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) { fieldOptionsBuilder.numberOfFragments(parser.intValue()); } else if ("fragment_offset".equals(fieldName) || "fragmentOffset".equals(fieldName)) { fieldOptionsBuilder.fragmentOffset(parser.intValue()); } else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) { fieldOptionsBuilder.highlightFilter(parser.booleanValue()); } else if ("order".equals(fieldName)) { fieldOptionsBuilder.scoreOrdered("score".equals(parser.text())); } else if ("require_field_match".equals(fieldName) || "requireFieldMatch".equals(fieldName)) { fieldOptionsBuilder.requireFieldMatch(parser.booleanValue()); } else if ("boundary_max_scan".equals(topLevelFieldName) || "boundaryMaxScan".equals(topLevelFieldName)) { fieldOptionsBuilder.boundaryMaxScan(parser.intValue()); } else if ("boundary_chars".equals(topLevelFieldName) || "boundaryChars".equals(topLevelFieldName)) { char[] charsArr = parser.text().toCharArray(); Character[] boundaryChars = new Character[charsArr.length]; for (int i = 0; i < charsArr.length; i++) { boundaryChars[i] = charsArr[i]; } fieldOptionsBuilder.boundaryChars(boundaryChars); } else if ("type".equals(fieldName)) { fieldOptionsBuilder.highlighterType(parser.text()); } else if ("fragmenter".equals(fieldName)) { fieldOptionsBuilder.fragmenter(parser.text()); } else if ("no_match_size".equals(fieldName) || "noMatchSize".equals(fieldName)) { fieldOptionsBuilder.noMatchSize(parser.intValue()); } else if ("force_source".equals(fieldName) || "forceSource".equals(fieldName)) { fieldOptionsBuilder.forceSource(parser.booleanValue()); } else if ("phrase_limit".equals(fieldName) || "phraseLimit".equals(fieldName)) { fieldOptionsBuilder.phraseLimit(parser.intValue()); } } else if (token == XContentParser.Token.START_OBJECT) { if ("highlight_query".equals(fieldName) || "highlightQuery".equals(fieldName)) { fieldOptionsBuilder .highlightQuery(context.queryParserService().parse(parser).query()); } else if ("options".equals(fieldName)) { fieldOptionsBuilder.options(parser.map()); } } } fieldsOptions.add(Tuple.tuple(highlightFieldName, fieldOptionsBuilder)); } } } else if ("highlight_query".equals(topLevelFieldName) || "highlightQuery".equals(topLevelFieldName)) { globalOptionsBuilder.highlightQuery(context.queryParserService().parse(parser).query()); } } } SearchContextHighlight.FieldOptions globalOptions = globalOptionsBuilder.build(); if (globalOptions.preTags() != null && globalOptions.postTags() == null) { throw new SearchParseException(context, "Highlighter global preTags are set, but global postTags are not set"); } List<SearchContextHighlight.Field> fields = Lists.newArrayList(); // now, go over and fill all fieldsOptions with default values from the global state for (Tuple<String, SearchContextHighlight.FieldOptions.Builder> tuple : fieldsOptions) { fields.add(new SearchContextHighlight.Field(tuple.v1(), tuple.v2().merge(globalOptions).build())); } context.highlight(new SearchContextHighlight(fields)); }