Example usage for org.apache.lucene.analysis TokenStream reset

List of usage examples for org.apache.lucene.analysis TokenStream reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream reset.

Prototype

public void reset() throws IOException 

Source Link

Document

This method is called by a consumer before it begins consumption using #incrementToken() .

Usage

From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java

License:Apache License

@Override
public Filter rangeFilter(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
        @Nullable QueryParseContext context) {
    // Special case: -infinity to +infinity
    if (lowerTerm == null && upperTerm == null) {
        if (sizeIsVariable)
            return null;
        StringBuilder sbWildcardPart = new StringBuilder();
        for (int i = 0; i < chunkLength; i++)
            sbWildcardPart.append(wildcardOne);
        String wildcardPart = sbWildcardPart.toString();
        BooleanFilter filter = new BooleanFilter();
        for (int i = sizeValue / chunkLength - 1; i >= 0; i--) {
            filter.add(new WildcardFilter(names().createIndexNameTerm(prefixes.charAt(i) + wildcardPart)),
                    BooleanClause.Occur.MUST);
        }/*  ww  w  .  j  a v a2 s . c o  m*/
        if (sizeValue % chunkLength != 0) {
            // If the size is not dividible by chunkLength,
            // we still have a last chunk, but that has a shorter length
            filter.add(
                    new WildcardFilter(names().createIndexNameTerm(prefixes.charAt(sizeValue / chunkLength + 1)
                            + wildcardPart.substring(0, sizeValue % chunkLength))),
                    BooleanClause.Occur.MUST);
        }
        return filter;
    }
    // Check for emptyness
    if (lowerTerm != null && upperTerm != null) {
        int cmp = lowerTerm.compareTo(upperTerm);
        // Bound invertion
        if (cmp > 0)
            return MatchNoDocsFilter.INSTANCE;
        // Equal bounds
        if (cmp == 0) {
            // and both inclusive bounds: singleton
            if (includeLower && includeUpper) {
                // Special case: equal terms
                return fieldFilter(lowerTerm, context);
            }
            // otherwise, empty range
            return MatchNoDocsFilter.INSTANCE;
        }
    }
    // Analyze lower and upper terms
    List<String> lowerTerms = new LinkedList<String>();
    List<String> upperTerms = new LinkedList<String>();
    if (lowerTerm != null) {
        TokenStream tok = null;
        try {
            tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(lowerTerm));
            tok.reset();
        } catch (IOException e) {
            return null;
        }
        CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
        try {
            while (tok.incrementToken())
                lowerTerms.add(termAtt.toString());
            tok.end();
            tok.close();
        } catch (IOException e) {
            return null;
        }
    }
    if (upperTerm != null) {
        TokenStream tok = null;
        try {
            tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(upperTerm));
            tok.reset();
        } catch (IOException e) {
            return null;
        }
        CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
        try {
            while (tok.incrementToken())
                upperTerms.add(termAtt.toString());
            tok.end();
            tok.close();
        } catch (IOException e) {
            return null;
        }
    }
    // Generate the filter
    BooleanFilter topLevelAndFilter = new BooleanFilter();
    Iterator<String> lowers = lowerTerms.iterator();
    Iterator<String> uppers = upperTerms.iterator();
    String currLower = null;
    String currUpper = null;
    int remainingLowerSize = sizeIsVariable ? 0 : sizeValue;
    int remainingUpperSize = sizeIsVariable ? 0 : sizeValue;

    // First, the common prefix
    while (lowers.hasNext() && uppers.hasNext()) {
        currLower = lowers.next();
        currUpper = uppers.next();
        // The last part cannot be part of the prefix
        // because that special case has already been handled
        if (!lowers.hasNext() || !uppers.hasNext())
            break;
        if (!currLower.equals(currUpper))
            break;
        topLevelAndFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST);
        remainingLowerSize -= currLower.length() - 1;
        remainingUpperSize -= currUpper.length() - 1;
    }

    String subPrefixLower = currLower;
    BooleanFilter secondLevelOrFilter = new BooleanFilter();
    BooleanFilter lastFilter;
    // Add the range part of the query (secondLevelOrFilter) to the prefix part is already in topLevelAndFilter
    topLevelAndFilter.add(secondLevelOrFilter, BooleanClause.Occur.MUST);
    // We still have secondLevelOrFilter to populate

    lastFilter = new BooleanFilter();
    // Handle the first diverging token of the lowerTerm (if it's not also the last available!)
    if (lowers.hasNext()) {
        lastFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST);
        remainingLowerSize -= currLower.length() - 1;
        currLower = lowers.next();
    }
    secondLevelOrFilter.add(lastFilter, BooleanClause.Occur.SHOULD);
    // Then get to the last token of the lowerTerm
    while (lowers.hasNext()) {
        BooleanFilter orFilter = new BooleanFilter();
        lastFilter.add(orFilter, BooleanClause.Occur.MUST);
        orFilter.add(new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower),
                false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD);
        BooleanFilter nextFilter = new BooleanFilter();
        nextFilter.add(new TermFilter(names().createIndexNameTerm(currLower)), BooleanClause.Occur.MUST);
        orFilter.add(nextFilter, BooleanClause.Occur.SHOULD);
        lastFilter = nextFilter;
        remainingLowerSize -= currLower.length() - 1;
        currLower = lowers.next();
    }
    // Handle the last token of the lowerTerm
    if (remainingLowerSize < 0)
        lastFilter.add(new TermRangeLengthFilter(names().indexName(), currLower,
                luceneTermUpperBound(currLower), includeLower, false, 0, 1 + chunkLength),
                BooleanClause.Occur.MUST);
    else if (remainingLowerSize < chunkLength)
        lastFilter.add(
                new TermRangeLengthFilter(names().indexName(), currLower, luceneTermUpperBound(currLower),
                        includeLower, false, 1 + remainingLowerSize, 1 + remainingLowerSize),
                BooleanClause.Occur.MUST);
    else
        lastFilter.add(new TermRangeLengthFilter(names().indexName(), currLower,
                luceneTermUpperBound(currLower), includeLower, false, 1 + chunkLength, 1 + chunkLength),
                BooleanClause.Occur.MUST);

    // Range from the non prefix part of the lowerTerm to the non prefix part of the upperTerm
    if (remainingUpperSize < 0)
        secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false,
                false, 0, 1 + chunkLength), BooleanClause.Occur.SHOULD);
    else if (remainingUpperSize < chunkLength)
        secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false,
                false, 1 + remainingUpperSize, 1 + remainingUpperSize), BooleanClause.Occur.SHOULD);
    else
        secondLevelOrFilter.add(new TermRangeLengthFilter(names().indexName(), subPrefixLower, currUpper, false,
                false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD);

    lastFilter = new BooleanFilter();
    // Handle the first diverging token of the upperTerm (if it's not also the last available!)
    if (uppers.hasNext()) {
        lastFilter.add(new TermFilter(names().createIndexNameTerm(currUpper)), BooleanClause.Occur.MUST);
        remainingUpperSize -= currUpper.length() - 1;
        currUpper = uppers.next();
    }
    secondLevelOrFilter.add(lastFilter, BooleanClause.Occur.SHOULD);
    // Then get to the last token of the upperTerm
    while (uppers.hasNext()) {
        BooleanFilter orFilter = new BooleanFilter();
        lastFilter.add(orFilter, BooleanClause.Occur.MUST);
        orFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper,
                false, false, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.SHOULD);
        BooleanFilter nextFilter = new BooleanFilter();
        nextFilter.add(new TermFilter(names().createIndexNameTerm(currUpper)), BooleanClause.Occur.MUST);
        orFilter.add(nextFilter, BooleanClause.Occur.SHOULD);
        lastFilter = nextFilter;
        remainingUpperSize -= currUpper.length() - 1;
        currUpper = uppers.next();
    }
    // Handle the last token of the upperTerm
    if (remainingUpperSize < 0)
        lastFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper),
                currUpper, false, includeUpper, 0, 1 + chunkLength), BooleanClause.Occur.MUST);
    else if (remainingUpperSize < chunkLength)
        lastFilter.add(
                new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper), currUpper,
                        false, includeUpper, 1 + remainingUpperSize, 1 + remainingUpperSize),
                BooleanClause.Occur.MUST);
    else
        lastFilter.add(new TermRangeLengthFilter(names().indexName(), luceneTermLowerBound(currUpper),
                currUpper, false, includeUpper, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST);

    return topLevelAndFilter;
}

From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java

License:Apache License

@Override
public Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method,
        @Nullable QueryParseContext context) {
    // Use HashSplitterSearch* analysis and post-process it to create the real query
    TokenStream tok = null;
    try {//from   w ww  .  j ava2 s  . c om
        tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
        tok.reset();
    } catch (IOException e) {
        return null;
    }
    CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
    BooleanQuery q = new BooleanQuery();
    try {
        while (tok.incrementToken()) {
            q.add(new WildcardQuery(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny),
                    BooleanClause.Occur.MUST);
        }
        tok.end();
        tok.close();
    } catch (IOException e) {
        e.printStackTrace();
        q = null;
    }
    return q;
}

From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java

License:Apache License

@Override
public Filter wildcardFilter(String value, @Nullable QueryParseContext context) {
    // Use HashSplitterSearch* analysis and post-process it to create the real query
    TokenStream tok = null;
    try {/*from  ww  w. jav a 2  s.  c  o m*/
        tok = searchAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
        tok.reset();
    } catch (IOException e) {
        return null;
    }
    CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
    BooleanFilter f = new BooleanFilter();
    try {
        while (tok.incrementToken()) {
            f.add(new WildcardFilter(names().createIndexNameTerm(termAtt.toString()), wildcardOne, wildcardAny),
                    BooleanClause.Occur.MUST);
        }
        tok.end();
        tok.close();
    } catch (IOException e) {
        e.printStackTrace();
        f = null;
    }
    return f;
}

From source file:org.elasticsearch.index.mapper.token.AnalyzedTextFieldMapper.java

License:Apache License

static List<String> getAnalyzedText(TokenStream tokenStream) throws IOException {
    try {//from   ww  w  . j a  v  a 2  s . c  o  m
        List<String> analyzedText = new ArrayList<>();
        CharTermAttribute terms = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();

        while (tokenStream.incrementToken()) {
            analyzedText.add(new String(terms.toString()));
        }
        tokenStream.end();
        return analyzedText;
    } finally {
        tokenStream.close();
    }
}

From source file:org.elasticsearch.index.query.CommonTermsQueryParser.java

License:Apache License

private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName,
        QueryParseContext parseContext, String queryAnalyzer, String lowFreqMinimumShouldMatch,
        String highFreqMinimumShouldMatch) throws IOException {

    FieldMapper<?> mapper = null;/* ww  w .  jav  a2s.  c  o  m*/
    String field;
    MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
    if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
        mapper = smartNameFieldMappers.mapper();
        field = mapper.names().indexName();
    } else {
        field = fieldName;
    }

    Analyzer analyzer = null;
    if (queryAnalyzer == null) {
        if (mapper != null) {
            analyzer = mapper.searchAnalyzer();
        }
        if (analyzer == null && smartNameFieldMappers != null) {
            analyzer = smartNameFieldMappers.searchAnalyzer();
        }
        if (analyzer == null) {
            analyzer = parseContext.mapperService().searchAnalyzer();
        }
    } else {
        analyzer = parseContext.mapperService().analysisService().analyzer(queryAnalyzer);
        if (analyzer == null) {
            throw new ElasticsearchIllegalArgumentException("No analyzer found for [" + queryAnalyzer + "]");
        }
    }

    // Logic similar to QueryParser#getFieldQuery
    TokenStream source = analyzer.tokenStream(field, queryString.toString());
    int count = 0;
    try {
        source.reset();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        while (source.incrementToken()) {
            BytesRef ref = new BytesRef(termAtt.length() * 4); // oversize for
                                                               // UTF-8
            UnicodeUtil.UTF16toUTF8(termAtt.buffer(), 0, termAtt.length(), ref);
            query.add(new Term(field, ref));
            count++;
        }
    } finally {
        source.close();
    }

    if (count == 0) {
        return null;
    }
    query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
    query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
    return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext);
}

From source file:org.elasticsearch.index.search.QueryStringQueryParser.java

License:Apache License

private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
    if (analyzeWildcard == false) {
        return super.getPrefixQuery(field, termStr);
    }//w  w w .j a v a 2  s.  c  om
    List<List<String>> tlist;
    // get Analyzer from superclass and tokenize the term
    TokenStream source = null;
    try {
        try {
            source = getAnalyzer().tokenStream(field, termStr);
            source.reset();
        } catch (IOException e) {
            return super.getPrefixQuery(field, termStr);
        }
        tlist = new ArrayList<>();
        List<String> currentPos = new ArrayList<>();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);

        while (true) {
            try {
                if (!source.incrementToken())
                    break;
            } catch (IOException e) {
                break;
            }
            if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
                tlist.add(currentPos);
                currentPos = new ArrayList<>();
            }
            currentPos.add(termAtt.toString());
        }
        if (currentPos.isEmpty() == false) {
            tlist.add(currentPos);
        }
    } finally {
        if (source != null) {
            IOUtils.closeWhileHandlingException(source);
        }
    }

    if (tlist.size() == 0) {
        return new MatchNoDocsQuery("analysis was empty for " + field + ":" + termStr);
    }

    if (tlist.size() == 1 && tlist.get(0).size() == 1) {
        return super.getPrefixQuery(field, tlist.get(0).get(0));
    }

    // build a boolean query with prefix on the last position only.
    List<BooleanClause> clauses = new ArrayList<>();
    for (int pos = 0; pos < tlist.size(); pos++) {
        List<String> plist = tlist.get(pos);
        boolean isLastPos = (pos == tlist.size() - 1);
        Query posQuery;
        if (plist.size() == 1) {
            if (isLastPos) {
                posQuery = super.getPrefixQuery(field, plist.get(0));
            } else {
                posQuery = newTermQuery(new Term(field, plist.get(0)));
            }
        } else if (isLastPos == false) {
            // build a synonym query for terms in the same position.
            Term[] terms = new Term[plist.size()];
            for (int i = 0; i < plist.size(); i++) {
                terms[i] = new Term(field, plist.get(i));
            }
            posQuery = new SynonymQuery(terms);
        } else {
            List<BooleanClause> innerClauses = new ArrayList<>();
            for (String token : plist) {
                innerClauses
                        .add(new BooleanClause(super.getPrefixQuery(field, token), BooleanClause.Occur.SHOULD));
            }
            posQuery = getBooleanQuery(innerClauses);
        }
        clauses.add(new BooleanClause(posQuery,
                getDefaultOperator() == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD));
    }
    return getBooleanQuery(clauses);
}

From source file:org.elasticsearch.index.search.TextQueryParser.java

License:Apache License

public Query parse(Type type) {
    FieldMapper mapper = null;//from   www .  j  a va  2 s. c o m
    String field = fieldName;
    MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
    if (smartNameFieldMappers != null) {
        if (smartNameFieldMappers.hasMapper()) {
            mapper = smartNameFieldMappers.mapper();
            if (mapper != null) {
                field = mapper.names().indexName();
            }
        }
    }

    if (mapper != null && mapper.useFieldQueryWithQueryString()) {
        return wrapSmartNameQuery(mapper.fieldQuery(text, parseContext), smartNameFieldMappers, parseContext);
    }

    Analyzer analyzer = null;
    if (this.analyzer == null) {
        if (mapper != null) {
            analyzer = mapper.searchAnalyzer();
        }
        if (analyzer == null) {
            analyzer = parseContext.mapperService().searchAnalyzer();
        }
    } else {
        analyzer = parseContext.mapperService().analysisService().analyzer(this.analyzer);
        if (analyzer == null) {
            throw new ElasticSearchIllegalArgumentException("No analyzer found for [" + this.analyzer + "]");
        }
    }

    // Logic similar to QueryParser#getFieldQuery

    TokenStream source;
    try {
        source = analyzer.reusableTokenStream(field, new FastStringReader(text));
        source.reset();
    } catch (IOException e) {
        source = analyzer.tokenStream(field, new FastStringReader(text));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    CharTermAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    boolean success = false;
    try {
        buffer.reset();
        success = true;
    } catch (IOException e) {
        // success==false if we hit an exception
    }
    if (success) {
        if (buffer.hasAttribute(CharTermAttribute.class)) {
            termAtt = buffer.getAttribute(CharTermAttribute.class);
        }
        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
        try {
            hasMoreTokens = buffer.incrementToken();
            while (hasMoreTokens) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
                hasMoreTokens = buffer.incrementToken();
            }
        } catch (IOException e) {
            // ignore
        }
    }
    try {
        // rewind the buffer stream
        buffer.reset();

        // close original stream - all tokens buffered
        source.close();
    } catch (IOException e) {
        // ignore
    }

    Term termFactory = new Term(field);
    if (numTokens == 0) {
        return MatchNoDocsQuery.INSTANCE;
    } else if (type == Type.BOOLEAN) {
        if (numTokens == 1) {
            String term = null;
            try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }
            Query q = newTermQuery(mapper, termFactory.createTerm(term));
            return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
        }
        BooleanQuery q = new BooleanQuery(positionCount == 1);
        for (int i = 0; i < numTokens; i++) {
            String term = null;
            try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }

            Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term));
            q.add(currentQuery, occur);
        }
        return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
    } else if (type == Type.PHRASE) {
        if (severalTokensAtSamePosition) {
            MultiPhraseQuery mpq = new MultiPhraseQuery();
            mpq.setSlop(phraseSlop);
            List<Term> multiTerms = new ArrayList<Term>();
            int position = -1;
            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;
                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (positionIncrement > 0 && multiTerms.size() > 0) {
                    if (enablePositionIncrements) {
                        mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
                    } else {
                        mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
                    }
                    multiTerms.clear();
                }
                position += positionIncrement;
                multiTerms.add(termFactory.createTerm(term));
            }
            if (enablePositionIncrements) {
                mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
            } else {
                mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
            }
            return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext);
        } else {
            PhraseQuery pq = new PhraseQuery();
            pq.setSlop(phraseSlop);
            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (enablePositionIncrements) {
                    position += positionIncrement;
                    pq.add(termFactory.createTerm(term), position);
                } else {
                    pq.add(termFactory.createTerm(term));
                }
            }
            return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext);
        }
    } else if (type == Type.PHRASE_PREFIX) {
        MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery();
        mpq.setSlop(phraseSlop);
        mpq.setMaxExpansions(maxExpansions);
        List<Term> multiTerms = new ArrayList<Term>();
        int position = -1;
        for (int i = 0; i < numTokens; i++) {
            String term = null;
            int positionIncrement = 1;
            try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
                if (posIncrAtt != null) {
                    positionIncrement = posIncrAtt.getPositionIncrement();
                }
            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }

            if (positionIncrement > 0 && multiTerms.size() > 0) {
                if (enablePositionIncrements) {
                    mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
                } else {
                    mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
                }
                multiTerms.clear();
            }
            position += positionIncrement;
            multiTerms.add(termFactory.createTerm(term));
        }
        if (enablePositionIncrements) {
            mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
        } else {
            mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
        }
        return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext);
    }

    throw new ElasticSearchIllegalStateException("No type found for [" + type + "]");
}

From source file:org.elasticsearch.search.aggregations.bucket.significant.SignificantTextAggregator.java

License:Apache License

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub)
        throws IOException {
    final BytesRefBuilder previous = new BytesRefBuilder();
    return new LeafBucketCollectorBase(sub, null) {

        @Override/*from  ww w.  j  a  v a 2 s  .  c  o  m*/
        public void collect(int doc, long bucket) throws IOException {
            collectFromSource(doc, bucket, fieldName, sourceFieldNames);
            numCollectedDocs++;
            if (dupSequenceSpotter != null) {
                dupSequenceSpotter.startNewSequence();
            }
        }

        private void processTokenStream(int doc, long bucket, TokenStream ts, BytesRefHash inDocTerms,
                String fieldText) throws IOException {
            if (dupSequenceSpotter != null) {
                ts = new DeDuplicatingTokenFilter(ts, dupSequenceSpotter);
            }
            CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            try {
                while (ts.incrementToken()) {
                    if (dupSequenceSpotter != null) {
                        long newTrieSize = dupSequenceSpotter.getEstimatedSizeInBytes();
                        long growth = newTrieSize - lastTrieSize;
                        // Only update the circuitbreaker after
                        if (growth > MEMORY_GROWTH_REPORTING_INTERVAL_BYTES) {
                            addRequestCircuitBreakerBytes(growth);
                            lastTrieSize = newTrieSize;
                        }
                    }
                    previous.clear();
                    previous.copyChars(termAtt);
                    BytesRef bytes = previous.get();
                    if (inDocTerms.add(bytes) >= 0) {
                        if (includeExclude == null || includeExclude.accept(bytes)) {
                            long bucketOrdinal = bucketOrds.add(bytes);
                            if (bucketOrdinal < 0) { // already seen
                                bucketOrdinal = -1 - bucketOrdinal;
                                collectExistingBucket(sub, doc, bucketOrdinal);
                            } else {
                                collectBucket(sub, doc, bucketOrdinal);
                            }
                        }
                    }
                }

            } finally {
                ts.close();
            }
        }

        private void collectFromSource(int doc, long bucket, String indexedFieldName, String[] sourceFieldNames)
                throws IOException {
            MappedFieldType fieldType = context.getQueryShardContext().fieldMapper(indexedFieldName);
            if (fieldType == null) {
                throw new IllegalArgumentException("Aggregation [" + name + "] cannot process field ["
                        + indexedFieldName + "] since it is not present");
            }

            SourceLookup sourceLookup = context.lookup().source();
            sourceLookup.setSegmentAndDocument(ctx, doc);
            BytesRefHash inDocTerms = new BytesRefHash(256, context.bigArrays());

            try {
                for (String sourceField : sourceFieldNames) {
                    List<Object> textsToHighlight = sourceLookup.extractRawValues(sourceField);
                    textsToHighlight = textsToHighlight.stream().map(obj -> {
                        if (obj instanceof BytesRef) {
                            return fieldType.valueForDisplay(obj).toString();
                        } else {
                            return obj;
                        }
                    }).collect(Collectors.toList());

                    Analyzer analyzer = fieldType.indexAnalyzer();
                    for (Object fieldValue : textsToHighlight) {
                        String fieldText = fieldValue.toString();
                        TokenStream ts = analyzer.tokenStream(indexedFieldName, fieldText);
                        processTokenStream(doc, bucket, ts, inDocTerms, fieldText);
                    }
                }
            } finally {
                Releasables.close(inDocTerms);
            }
        }
    };
}

From source file:org.elasticsearch.search.highlight.PlainHighlighter.java

License:Apache License

private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream)
        throws IOException {
    try {/*w w  w.  j ava  2s  .  co m*/
        if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
            // Can't split on term boundaries without offsets
            return -1;
        }
        int end = -1;
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            OffsetAttribute attr = tokenStream.getAttribute(OffsetAttribute.class);
            if (attr.endOffset() >= noMatchSize) {
                // Jump to the end of this token if it wouldn't put us past the boundary
                if (attr.endOffset() == noMatchSize) {
                    end = noMatchSize;
                }
                return end;
            }
            end = attr.endOffset();
        }
        // We've exhausted the token stream so we should just highlight everything.
        return end;
    } finally {
        tokenStream.end();
        tokenStream.close();
    }
}

From source file:org.elasticsearch.search.suggest.CompletionTokenStreamTest.java

License:Apache License

@Test
public void testValidNumberOfExpansions() throws IOException {
    Builder builder = new SynonymMap.Builder(true);
    for (int i = 0; i < 256; i++) {
        builder.add(new CharsRef("" + (i + 1)), new CharsRef("" + (1000 + (i + 1))), true);
    }/*from  w w  w  . java 2 s .  c om*/
    StringBuilder valueBuilder = new StringBuilder();
    for (int i = 0; i < 8; i++) {
        valueBuilder.append(i + 1);
        valueBuilder.append(" ");
    }
    MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()),
            MockTokenizer.WHITESPACE, true);
    SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);

    TokenStream suggestTokenStream = new CompletionTokenStream(filter,
            new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
                @Override
                public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
                    Set<IntsRef> finiteStrings = suggester
                            .toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
                    return finiteStrings;
                }
            });

    suggestTokenStream.reset();
    ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class);
    PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class);
    int maxPos = 0;
    int count = 0;
    while (suggestTokenStream.incrementToken()) {
        count++;
        assertNotNull(attr.getBytesRef());
        assertTrue(attr.getBytesRef().length > 0);
        maxPos += posAttr.getPositionIncrement();
    }
    suggestTokenStream.close();
    assertEquals(count, 256);
    assertEquals(count, maxPos);

}