List of usage examples for org.apache.lucene.analysis TokenStream end
public void end() throws IOException
false
(using the new TokenStream
API). From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter fieldFilter(String value, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {//from www.j ava 2s.c o m tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanFilter f = new BooleanFilter(); try { while (tok.incrementToken()) { Term term = names().createIndexNameTerm(termAtt.toString()); f.add(new TermFilter(term), BooleanClause.Occur.MUST); } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); f = null; } return f; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {// ww w. j av a2 s . c om tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanQuery q = new BooleanQuery(); try { int remainingSize = sizeIsVariable ? 0 : sizeValue; // note: prefixes are not included while (tok.incrementToken()) { Term term = names().createIndexNameTerm(termAtt.toString()); if (termAtt.length() < 1 + chunkLength) { if (remainingSize > 0) { // implies size is fixed if (remainingSize < chunkLength) q.add(new PrefixLengthQuery(term, 1 + remainingSize, 1 + remainingSize), BooleanClause.Occur.MUST); else q.add(new PrefixLengthQuery(term, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); } else { // varying size: only limit to the chunkLength q.add(new PrefixLengthQuery(term, 0, 1 + chunkLength), BooleanClause.Occur.MUST); } } else { q.add(new TermQuery(term), BooleanClause.Occur.MUST); } remainingSize -= termAtt.length() - 1; // termAtt contains the prefix, remainingSize doesn't take it into account } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); q = null; } return q; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter prefixFilter(String value, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real filter TokenStream tok = null; try {/*from ww w . j a va 2s . c o m*/ tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanFilter f = new BooleanFilter(); try { int remainingSize = sizeIsVariable ? 0 : sizeValue; // note: prefixes are not included while (tok.incrementToken()) { Term term = names().createIndexNameTerm(termAtt.toString()); if (termAtt.length() < 1 + chunkLength) { if (remainingSize > 0) { // implies size is fixed if (remainingSize < chunkLength) f.add(new PrefixLengthFilter(term, 1 + remainingSize, 1 + remainingSize), BooleanClause.Occur.MUST); else f.add(new PrefixLengthFilter(term, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); } else { // varying size: only limit to the chunkLength f.add(new PrefixLengthFilter(term, 0, 1 + chunkLength), BooleanClause.Occur.MUST); } } else { f.add(new TermFilter(term), BooleanClause.Occur.MUST); } remainingSize -= termAtt.length() - 1; // termAtt contains the prefix, remainingSize doesn't take it into account } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); f = null; } return f; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter rangeFilter(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) { // Special case: -infinity to +infinity if (lowerTerm == null && upperTerm == null) { if (sizeIsVariable) return null; StringBuilder sbWildcardPart = new StringBuilder(); for (int i = 0; i < chunkLength; i++) sbWildcardPart.append(wildcardOne); String wildcardPart = sbWildcardPart.toString(); BooleanFilter filter = new BooleanFilter(); for (int i = sizeValue / chunkLength - 1; i >= 0; i--) { filter.add(new WildcardFilter(names().createIndexNameTerm(prefixes.charAt(i) + wildcardPart)), BooleanClause.Occur.MUST); }/*from ww w . j a v a2 s .com*/ if (sizeValue % chunkLength != 0) { // If the size is not dividible by chunkLength, // we still have a last chunk, but that has a shorter length filter.add( new WildcardFilter(names().createIndexNameTerm(prefixes.charAt(sizeValue / chunkLength + 1) + wildcardPart.substring(0, sizeValue % chunkLength))), BooleanClause.Occur.MUST); } return filter; } // Check for emptyness if (lowerTerm != null && upperTerm != null) { int cmp = lowerTerm.compareTo(upperTerm); // Bound invertion if (cmp > 0) return MatchNoDocsFilter.INSTANCE; // Equal bounds if (cmp == 0) { // and both inclusive bounds: singleton if (includeLower && includeUpper) { // Special case: equal terms return fieldFilter(lowerTerm, context); } // otherwise, empty range return MatchNoDocsFilter.INSTANCE; } } // Analyze lower and upper terms List<String> lowerTerms = new LinkedList<String>(); List<String> upperTerms = new LinkedList<String>(); if (lowerTerm != null) { TokenStream tok = null; try { tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(lowerTerm)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); try { while (tok.incrementToken()) lowerTerms.add(termAtt.toString()); tok.end(); tok.close(); } catch (IOException e) { return null; } } if (upperTerm != null) { TokenStream tok = null; try { tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(upperTerm)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); try { while (tok.incrementToken()) upperTerms.add(termAtt.toString()); tok.end(); tok.close(); } catch (IOException e) { return null; } } // Generate the filter BooleanFilter topLevelAndFilter = new BooleanFilter(); Iterator<String> lowers = lowerTerms.iterator(); Iterator<String> uppers = upperTerms.iterator(); String currLower = null; String currUpper = null; int remainingLowerSize = sizeIsVariable ? 0 : sizeValue; int remainingUpperSize = sizeIsVariable ? 0 : sizeValue; // First, the common prefix while (lowers.hasNext() && uppers.hasNext()) { currLower = lowers.next(); currUpper = uppers.next(); // The last part cannot be part of the prefix // because that special case has already been handled if (!lowers.hasNext() || !uppers.hasNext()) break; if (!currLower.equals(currUpper)) break; topLevelAndFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST); remainingLowerSize -= currLower.length() - 1; remainingUpperSize -= currUpper.length() - 1; } String subPrefixLower = currLower; BooleanFilter secondLevelOrFilter = new BooleanFilter(); BooleanFilter lastFilter; // Add the range part of the query (secondLevelOrFilter) to the prefix part is already in topLevelAndFilter topLevelAndFilter.add(secondLevelOrFilter, BooleanClause.Occur.MUST); // We still have secondLevelOrFilter to populate lastFilter = new BooleanFilter(); // Handle the first diverging token of the lowerTerm (if it's not also the last available!) if (lowers.hasNext()) { lastFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST); remainingLowerSize -= currLower.length() - 1; currLower = lowers.next(); } secondLevelOrFilter.add(lastFilter, BooleanClause.Occur.SHOULD); // Then get to the last token of the lowerTerm while (lowers.hasNext()) { BooleanFilter orFilter = new BooleanFilter(); lastFilter.add(orFilter, BooleanClause.Occur.MUST); orFilter.add(new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD); BooleanFilter nextFilter = new BooleanFilter(); nextFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST); orFilter.add(nextFilter, BooleanClause.Occur.SHOULD); lastFilter = nextFilter; remainingLowerSize -= currLower.length() - 1; currLower = lowers.next(); } // Handle the last token of the lowerTerm if (remainingLowerSize < 0) lastFilter.add(new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), includeLower, false, 0, 1 + chunkLength), BooleanClause.Occur.MUST); else if (remainingLowerSize < chunkLength) lastFilter.add( new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), includeLower, false, 1 + remainingLowerSize, 1 + remainingLowerSize), BooleanClause.Occur.MUST); else lastFilter.add(new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower), includeLower, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); // Range from the non prefix part of the lowerTerm to the non prefix part of the upperTerm if (remainingUpperSize < 0) secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false, false, 0, 1 + chunkLength), BooleanClause.Occur.SHOULD); else if (remainingUpperSize < chunkLength) secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false, false, 1 + remainingUpperSize, 1 + remainingUpperSize), BooleanClause.Occur.SHOULD); else secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD); lastFilter = new BooleanFilter(); // Handle the first diverging token of the upperTerm (if it's not also the last available!) if (uppers.hasNext()) { lastFilter.add(new TermFilter(names().createIndexNameTerm(currUpper)), BooleanClause.Occur.MUST); remainingUpperSize -= currUpper.length() - 1; currUpper = uppers.next(); } secondLevelOrFilter.add(lastFilter, BooleanClause.Occur.SHOULD); // Then get to the last token of the upperTerm while (uppers.hasNext()) { BooleanFilter orFilter = new BooleanFilter(); lastFilter.add(orFilter, BooleanClause.Occur.MUST); orFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD); BooleanFilter nextFilter = new BooleanFilter(); nextFilter.add(new TermFilter(names().createIndexNameTerm(currUpper)), BooleanClause.Occur.MUST); orFilter.add(nextFilter, BooleanClause.Occur.SHOULD); lastFilter = nextFilter; remainingUpperSize -= currUpper.length() - 1; currUpper = uppers.next(); } // Handle the last token of the upperTerm if (remainingUpperSize < 0) lastFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, includeUpper, 0, 1 + chunkLength), BooleanClause.Occur.MUST); else if (remainingUpperSize < chunkLength) lastFilter.add( new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, includeUpper, 1 + remainingUpperSize, 1 + remainingUpperSize), BooleanClause.Occur.MUST); else lastFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper, false, includeUpper, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); return topLevelAndFilter; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {/*w ww. j av a 2 s .c o m*/ tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanQuery q = new BooleanQuery(); try { while (tok.incrementToken()) { q.add(new WildcardQuery(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny), BooleanClause.Occur.MUST); } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); q = null; } return q; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter wildcardFilter(String value, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {//from w w w . j a va 2 s .c o m tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanFilter f = new BooleanFilter(); try { while (tok.incrementToken()) { f.add(new WildcardFilter(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny), BooleanClause.Occur.MUST); } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); f = null; } return f; }
From source file:org.elasticsearch.index.mapper.token.AnalyzedTextFieldMapper.java
License:Apache License
static List<String> getAnalyzedText(TokenStream tokenStream) throws IOException { try {//www .j a v a 2s . c o m List<String> analyzedText = new ArrayList<>(); CharTermAttribute terms = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { analyzedText.add(new String(terms.toString())); } tokenStream.end(); return analyzedText; } finally { tokenStream.close(); } }
From source file:org.elasticsearch.search.highlight.PlainHighlighter.java
License:Apache License
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException { try {/*from ww w . j a v a2 s .c o m*/ if (!tokenStream.hasAttribute(OffsetAttribute.class)) { // Can't split on term boundaries without offsets return -1; } int end = -1; tokenStream.reset(); while (tokenStream.incrementToken()) { OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class); if (attr.endOffset() >= noMatchSize) { // Jump to the end of this token if it wouldn't put us past the boundary if (attr.endOffset() == noMatchSize) { end = noMatchSize; } return end; } end = attr.endOffset(); } // We've exhausted the token stream so we should just highlight everything. return end; } finally { tokenStream.end(); tokenStream.close(); } }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java
License:Apache License
@Test public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception { TokenStream tokenStream = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true); BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter( new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() { @Override// w w w.ja va2s.c o m public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream); } })); TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef ref = termAtt.getBytesRef(); assertNotNull(ref); suggestTokenStream.reset(); while (suggestTokenStream.incrementToken()) { termAtt.fillBytesRef(); assertThat(ref.utf8ToString(), equalTo("mykeyword")); } suggestTokenStream.end(); suggestTokenStream.close(); }
From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTests.java
License:Apache License
@Test public void testSuggestTokenFilterProperlyDelegateInputStream() throws Exception { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true); tokenizer.setReader(new StringReader("mykeyword")); BytesRef payload = new BytesRef("Surface keyword|friggin payload|10"); TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter( new CompletionTokenStream(tokenizer, payload, new CompletionTokenStream.ToFiniteStrings() { @Override//from ww w.ja v a2 s . co m public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { return suggester.toFiniteStrings(stream); } })); TermToBytesRefAttribute termAtt = suggestTokenStream.getAttribute(TermToBytesRefAttribute.class); assertNotNull(termAtt.getBytesRef()); suggestTokenStream.reset(); while (suggestTokenStream.incrementToken()) { assertThat(termAtt.getBytesRef().utf8ToString(), equalTo("mykeyword")); } suggestTokenStream.end(); suggestTokenStream.close(); }