List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter prefixFilter(String value, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real filter TokenStream tok = null; try {// www . j a va 2 s . c om tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanFilter f = new BooleanFilter(); try { int remainingSize = sizeIsVariable ? 0 : sizeValue; // note: prefixes are not included while (tok.incrementToken()) { Term term = names().createIndexNameTerm(termAtt.toString()); if (termAtt.length() < 1 + chunkLength) { if (remainingSize > 0) { // implies size is fixed if (remainingSize < chunkLength) f.add(new PrefixLengthFilter(term, 1 + remainingSize, 1 + remainingSize), BooleanClause.Occur.MUST); else f.add(new PrefixLengthFilter(term, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); } else { // varying size: only limit to the chunkLength f.add(new PrefixLengthFilter(term, 0, 1 + chunkLength), BooleanClause.Occur.MUST); } } else { f.add(new TermFilter(term), BooleanClause.Occur.MUST); } remainingSize -= termAtt.length() - 1; // termAtt contains the prefix, remainingSize doesn't take it into account } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); f = null; } return f; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter rangeFilter(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) { // Special case: -infinity to +infinity if (lowerTerm == null && upperTerm == null) { if (sizeIsVariable) return null; StringBuilder sbWildcardPart = new StringBuilder(); for (int i = 0; i < chunkLength; i++) sbWildcardPart.append(wildcardOne); String wildcardPart = sbWildcardPart.toString(); BooleanFilter filter = new BooleanFilter(); for (int i = sizeValue / chunkLength - 1; i >= 0; i--) { filter.add(new WildcardFilter(names().createIndexNameTerm(prefixes.charAt(i) + wildcardPart)), BooleanClause.Occur.MUST); }//from ww w . j ava2 s . co m if (sizeValue % chunkLength != 0) { // If the size is not dividible by chunkLength, // we still have a last chunk, but that has a shorter length filter.add( new WildcardFilter(names().createIndexNameTerm(prefixes.charAt(sizeValue / chunkLength + 1) + wildcardPart.substring(0, sizeValue % chunkLength))), BooleanClause.Occur.MUST); } return filter; } // Check for emptyness if (lowerTerm != null && upperTerm != null) { int cmp = lowerTerm.compareTo(upperTerm); // Bound invertion if (cmp > 0) return MatchNoDocsFilter.INSTANCE; // Equal bounds if (cmp == 0) { // and both inclusive bounds: singleton if (includeLower && includeUpper) { // Special case: equal terms return fieldFilter(lowerTerm, context); } // otherwise, empty range return MatchNoDocsFilter.INSTANCE; } } // Analyze lower and upper terms List<String> lowerTerms = new LinkedList<String>(); List<String> upperTerms = new LinkedList<String>(); if (lowerTerm != null) { TokenStream tok = null; try { tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(lowerTerm)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); try { while (tok.incrementToken()) lowerTerms.add(termAtt.toString()); tok.end(); tok.close(); } catch (IOException e) { return null; } } if (upperTerm != null) { TokenStream tok = null; try { tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(upperTerm)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); try { while (tok.incrementToken()) upperTerms.add(termAtt.toString()); tok.end(); tok.close(); } catch (IOException e) { return null; } } // Generate the filter BooleanFilter topLevelAndFilter = new BooleanFilter(); Iterator<String> lowers = lowerTerms.iterator(); Iterator<String> uppers = upperTerms.iterator(); String currLower = null; String currUpper = null; int remainingLowerSize = sizeIsVariable ? 0 : sizeValue; int remainingUpperSize = sizeIsVariable ? 0 : sizeValue; // First, the common prefix while (lowers.hasNext() && uppers.hasNext()) { currLower = lowers.next(); currUpper = uppers.next(); // The last part cannot be part of the prefix // because that special case has already been handled if (!lowers.hasNext() || !uppers.hasNext()) break; if (!currLower.equals(currUpper)) break; topLevelAndFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST); remainingLowerSize -= currLower.length() - 1; remainingUpperSize -= currUpper.length() - 1; } String subPrefixLower = currLower; BooleanFilter secondLevelOrFilter = new BooleanFilter(); BooleanFilter lastFilter; // Add the range part of the query (secondLevelOrFilter) to the prefix part is already in topLevelAndFilter topLevelAndFilter.add(secondLevelOrFilter, BooleanClause.Occur.MUST); // We still have secondLevelOrFilter to populate lastFilter = new BooleanFilter(); // Handle the first diverging token of the lowerTerm (if it's not also the last available!) if (lowers.hasNext()) { lastFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST); remainingLowerSize -= currLower.length() - 1; currLower = lowers.next(); } secondLevelOrFilter.add(lastFilter, BooleanClause.Occur.SHOULD); // Then get to the last token of the lowerTerm while (lowers.hasNext()) { BooleanFilter orFilter = new BooleanFilter(); lastFilter.add(orFilter, BooleanClause.Occur.MUST); orFilter.add(new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD); BooleanFilter nextFilter = new BooleanFilter(); nextFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST); orFilter.add(nextFilter, BooleanClause.Occur.SHOULD); lastFilter = nextFilter; remainingLowerSize -= currLower.length() - 1; currLower = lowers.next(); } // Handle the last token of the lowerTerm if (remainingLowerSize < 0) lastFilter.add(new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), includeLower, false, 0, 1 + chunkLength), BooleanClause.Occur.MUST); else if (remainingLowerSize < chunkLength) lastFilter.add( new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), includeLower, false, 1 + remainingLowerSize, 1 + remainingLowerSize), BooleanClause.Occur.MUST); else lastFilter.add(new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), includeLower, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); // Range from the non prefix part of the lowerTerm to the non prefix part of the upperTerm if (remainingUpperSize < 0) secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false, false, 0, 1 + chunkLength), BooleanClause.Occur.SHOULD); else if (remainingUpperSize < chunkLength) secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false, false, 1 + remainingUpperSize, 1 + remainingUpperSize), BooleanClause.Occur.SHOULD); else secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD); lastFilter = new BooleanFilter(); // Handle the first diverging token of the upperTerm (if it's not also the last available!) if (uppers.hasNext()) { lastFilter.add(new TermFilter(names().createIndexNameTerm(currUpper)), BooleanClause.Occur.MUST); remainingUpperSize -= currUpper.length() - 1; currUpper = uppers.next(); } secondLevelOrFilter.add(lastFilter, BooleanClause.Occur.SHOULD); // Then get to the last token of the upperTerm while (uppers.hasNext()) { BooleanFilter orFilter = new BooleanFilter(); lastFilter.add(orFilter, BooleanClause.Occur.MUST); orFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD); BooleanFilter nextFilter = new BooleanFilter(); nextFilter.add(new TermFilter(names().createIndexNameTerm(currUpper)), BooleanClause.Occur.MUST); orFilter.add(nextFilter, BooleanClause.Occur.SHOULD); lastFilter = nextFilter; remainingUpperSize -= currUpper.length() - 1; currUpper = uppers.next(); } // Handle the last token of the upperTerm if (remainingUpperSize < 0) lastFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, includeUpper, 0, 1 + chunkLength), BooleanClause.Occur.MUST); else if (remainingUpperSize < chunkLength) lastFilter.add( new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, includeUpper, 1 + remainingUpperSize, 1 + remainingUpperSize), BooleanClause.Occur.MUST); else lastFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, includeUpper, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); return topLevelAndFilter; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {// www . j av a 2 s . com tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanQuery q = new BooleanQuery(); try { while (tok.incrementToken()) { q.add(new WildcardQuery(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny), BooleanClause.Occur.MUST); } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); q = null; } return q; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter wildcardFilter(String value, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {// ww w . j av a 2 s .co m tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanFilter f = new BooleanFilter(); try { while (tok.incrementToken()) { f.add(new WildcardFilter(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny), BooleanClause.Occur.MUST); } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); f = null; } return f; }
From source file:org.elasticsearch.index.mapper.token.AnalyzedTextFieldMapper.java
License:Apache License
static List<String> getAnalyzedText(TokenStream tokenStream) throws IOException { try {//from w w w .j a va 2 s.c o m List<String> analyzedText = new ArrayList<>(); CharTermAttribute terms = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { analyzedText.add(new String(terms.toString())); } tokenStream.end(); return analyzedText; } finally { tokenStream.close(); } }
From source file:org.elasticsearch.index.query.CommonTermsQueryParser.java
License:Apache License
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext, String queryAnalyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException { FieldMapper<?> mapper = null;/*from www. j a v a 2 s .co m*/ String field; MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) { mapper = smartNameFieldMappers.mapper(); field = mapper.names().indexName(); } else { field = fieldName; } Analyzer analyzer = null; if (queryAnalyzer == null) { if (mapper != null) { analyzer = mapper.searchAnalyzer(); } if (analyzer == null && smartNameFieldMappers != null) { analyzer = smartNameFieldMappers.searchAnalyzer(); } if (analyzer == null) { analyzer = parseContext.mapperService().searchAnalyzer(); } } else { analyzer = parseContext.mapperService().analysisService().analyzer(queryAnalyzer); if (analyzer == null) { throw new ElasticsearchIllegalArgumentException("No analyzer found for [" + queryAnalyzer + "]"); } } // Logic similar to QueryParser#getFieldQuery TokenStream source = analyzer.tokenStream(field, queryString.toString()); int count = 0; try { source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); while (source.incrementToken()) { BytesRef ref = new BytesRef(termAtt.length() * 4); // oversize for // UTF-8 UnicodeUtil.UTF16toUTF8(termAtt.buffer(), 0, termAtt.length(), ref); query.add(new Term(field, ref)); count++; } } finally { source.close(); } if (count == 0) { return null; } query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch); query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch); return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext); }
From source file:org.elasticsearch.index.search.TextQueryParser.java
License:Apache License
public Query parse(Type type) { FieldMapper mapper = null;/* w w w . jav a 2s . co m*/ String field = fieldName; MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { mapper = smartNameFieldMappers.mapper(); if (mapper != null) { field = mapper.names().indexName(); } } } if (mapper != null && mapper.useFieldQueryWithQueryString()) { return wrapSmartNameQuery(mapper.fieldQuery(text, parseContext), smartNameFieldMappers, parseContext); } Analyzer analyzer = null; if (this.analyzer == null) { if (mapper != null) { analyzer = mapper.searchAnalyzer(); } if (analyzer == null) { analyzer = parseContext.mapperService().searchAnalyzer(); } } else { analyzer = parseContext.mapperService().analysisService().analyzer(this.analyzer); if (analyzer == null) { throw new ElasticSearchIllegalArgumentException("No analyzer found for [" + this.analyzer + "]"); } } // Logic similar to QueryParser#getFieldQuery TokenStream source; try { source = analyzer.reusableTokenStream(field, new FastStringReader(text)); source.reset(); } catch (IOException e) { source = analyzer.tokenStream(field, new FastStringReader(text)); } CachingTokenFilter buffer = new CachingTokenFilter(source); CharTermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; boolean success = false; try { buffer.reset(); success = true; } catch (IOException e) { // success==false if we hit an exception } if (success) { if (buffer.hasAttribute(CharTermAttribute.class)) { termAtt = buffer.getAttribute(CharTermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { // ignore } } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { // ignore } Term termFactory = new Term(field); if (numTokens == 0) { return MatchNoDocsQuery.INSTANCE; } else if (type == Type.BOOLEAN) { if (numTokens == 1) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query q = newTermQuery(mapper, termFactory.createTerm(term)); return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } BooleanQuery q = new BooleanQuery(positionCount == 1); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term)); q.add(currentQuery, occur); } return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } else if (type == Type.PHRASE) { if (severalTokensAtSamePosition) { MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(termFactory.createTerm(term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); } else { PhraseQuery pq = new PhraseQuery(); pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.add(termFactory.createTerm(term), position); } else { pq.add(termFactory.createTerm(term)); } } return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext); } } else if (type == Type.PHRASE_PREFIX) { MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery(); mpq.setSlop(phraseSlop); mpq.setMaxExpansions(maxExpansions); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(termFactory.createTerm(term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); } throw new ElasticSearchIllegalStateException("No type found for [" + type + "]"); }
From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTextAggregator.java
License:Apache License
@Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { final BytesRefBuilder previous = new BytesRefBuilder(); return new LeafBucketCollectorBase(sub, null) { @Override/* w w w . j av a2s .co m*/ public void collect(int doc, long bucket) throws IOException { collectFromSource(doc, bucket, fieldName, sourceFieldNames); numCollectedDocs++; if (dupSequenceSpotter != null) { dupSequenceSpotter.startNewSequence(); } } private void processTokenStream(int doc, long bucket, TokenStream ts, BytesRefHash inDocTerms, String fieldText) throws IOException { if (dupSequenceSpotter != null) { ts = new DeDuplicatingTokenFilter(ts, dupSequenceSpotter); } CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); try { while (ts.incrementToken()) { if (dupSequenceSpotter != null) { long newTrieSize = dupSequenceSpotter.getEstimatedSizeInBytes(); long growth = newTrieSize - lastTrieSize; // Only update the circuitbreaker after if (growth > MEMORY_GROWTH_REPORTING_INTERVAL_BYTES) { addRequestCircuitBreakerBytes(growth); lastTrieSize = newTrieSize; } } previous.clear(); previous.copyChars(termAtt); BytesRef bytes = previous.get(); if (inDocTerms.add(bytes) >= 0) { if (includeExclude == null || includeExclude.accept(bytes)) { long bucketOrdinal = bucketOrds.add(bytes); if (bucketOrdinal < 0) { // already seen bucketOrdinal = -1 - bucketOrdinal; collectExistingBucket(sub, doc, bucketOrdinal); } else { collectBucket(sub, doc, bucketOrdinal); } } } } } finally { ts.close(); } } private void collectFromSource(int doc, long bucket, String indexedFieldName, String[] sourceFieldNames) throws IOException { MappedFieldType fieldType = context.getQueryShardContext().fieldMapper(indexedFieldName); if (fieldType == null) { throw new IllegalArgumentException("Aggregation [" + name + "] cannot process field [" + indexedFieldName + "] since it is not present"); } SourceLookup sourceLookup = context.lookup().source(); sourceLookup.setSegmentAndDocument(ctx, doc); BytesRefHash inDocTerms = new BytesRefHash(256, context.bigArrays()); try { for (String sourceField : sourceFieldNames) { List<Object> textsToHighlight = sourceLookup.extractRawValues(sourceField); textsToHighlight = textsToHighlight.stream().map(obj -> { if (obj instanceof BytesRef) { return fieldType.valueForDisplay(obj).toString(); } else { return obj; } }).collect(Collectors.toList()); Analyzer analyzer = fieldType.indexAnalyzer(); for (Object fieldValue : textsToHighlight) { String fieldText = fieldValue.toString(); TokenStream ts = analyzer.tokenStream(indexedFieldName, fieldText); processTokenStream(doc, bucket, ts, inDocTerms, fieldText); } } } finally { Releasables.close(inDocTerms); } } }; }
From source file:org.elasticsearch.search.highlight.PlainHighlighter.java
License:Apache License
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException { try {/*from w w w. j a va 2 s .co m*/ if (!tokenStream.hasAttribute(OffsetAttribute.class)) { // Can't split on term boundaries without offsets return -1; } int end = -1; tokenStream.reset(); while (tokenStream.incrementToken()) { OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class); if (attr.endOffset() >= noMatchSize) { // Jump to the end of this token if it wouldn't put us past the boundary if (attr.endOffset() == noMatchSize) { end = noMatchSize; } return end; } end = attr.endOffset(); } // We've exhausted the token stream so we should just highlight everything. return end; } finally { tokenStream.end(); tokenStream.close(); } }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java
License:Apache License
@Test public void testValidNumberOfExpansions() throws IOException { Builder builder = new SynonymMap.Builder(true); for (int i = 0; i < 256; i++) { builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true); }/*from w w w . java2 s . c o m*/ StringBuilder valueBuilder = new StringBuilder(); for (int i = 0; i < 8; i++) { valueBuilder.append(i + 1); valueBuilder.append(" "); } MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()), MockTokenizer.WHITESPACE, true); SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true); TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() { @Override public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { Set<IntsRef> finiteStrings = suggester .toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream); return finiteStrings; } }); suggestTokenStream.reset(); ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class); PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class); int maxPos = 0; int count = 0; while (suggestTokenStream.incrementToken()) { count++; assertNotNull(attr.getBytesRef()); assertTrue(attr.getBytesRef().length > 0); maxPos += posAttr.getPositionIncrement(); } suggestTokenStream.close(); assertEquals(count, 256); assertEquals(count, maxPos); }