Example usage for org.apache.lucene.analysis CachingTokenFilter reset

List of usage examples for org.apache.lucene.analysis CachingTokenFilter reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis CachingTokenFilter reset.

Prototype

@Override
public void reset() throws IOException 

Source Link

Document

Propagates reset if incrementToken has not yet been called.

Usage

From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.processors.SKOSQueryNodeProcessor.java

License:Apache License

@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {

    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node.getParent() instanceof RangeQueryNode)) {

        FieldQueryNode fieldNode = ((FieldQueryNode) node);
        String text = fieldNode.getTextAsString();
        String field = fieldNode.getFieldAsString();

        TokenStream source;//from www  .  j  a v a 2s.  co  m
        try {
            source = this.analyzer.tokenStream(field, text);
            source.reset();
        } catch (IOException e1) {
            throw new RuntimeException(e1);
        }
        CachingTokenFilter buffer = new CachingTokenFilter(source);

        PositionIncrementAttribute posIncrAtt = null;
        int numTokens = 0;
        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;

        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }

        try {

            while (buffer.incrementToken()) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;

                } else {
                    severalTokensAtSamePosition = true;
                }

            }

        } catch (IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();

            // close original stream - all tokens buffered
            source.close();
        } catch (IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }

        CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            return new NoTokenFoundQueryNode();

        } else if (numTokens == 1) {
            String term = null;
            try {
                boolean hasNext;
                hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();

            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }

            fieldNode.setText(term);

            return fieldNode;

        } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
            if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
                // no phrase query:
                LinkedList<QueryNode> children = new LinkedList<QueryNode>();

                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();

                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (buffer.hasAttribute(SKOSTypeAttribute.class) && boosts != null) {

                        SKOSTypeAttribute skosAttr = buffer.getAttribute(SKOSTypeAttribute.class);
                        children.add(new BoostQueryNode(new FieldQueryNode(field, term, -1, -1),
                                getBoost(skosAttr.getSkosType())));

                    } else {

                        children.add(new FieldQueryNode(field, term, -1, -1));

                    }

                }
                return new GroupQueryNode(new StandardBooleanQueryNode(children, positionCount == 1));
            } else {
                // phrase query:
                MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();

                List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
                int position = -1;
                int i = 0;
                int termGroupCount = 0;
                for (; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }

                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (positionIncrement > 0 && multiTerms.size() > 0) {

                        for (FieldQueryNode termNode : multiTerms) {

                            if (this.positionIncrementsEnabled) {
                                termNode.setPositionIncrement(position);
                            } else {
                                termNode.setPositionIncrement(termGroupCount);
                            }

                            mpq.add(termNode);

                        }

                        // Only increment once for each "group" of
                        // terms that were in the same position:
                        termGroupCount++;

                        multiTerms.clear();

                    }

                    position += positionIncrement;
                    multiTerms.add(new FieldQueryNode(field, term, -1, -1));

                }

                for (FieldQueryNode termNode : multiTerms) {

                    if (this.positionIncrementsEnabled) {
                        termNode.setPositionIncrement(position);

                    } else {
                        termNode.setPositionIncrement(termGroupCount);
                    }

                    mpq.add(termNode);

                }

                return mpq;

            }

        } else {

            TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }

                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                if (this.positionIncrementsEnabled) {
                    position += positionIncrement;
                    newFieldNode.setPositionIncrement(position);

                } else {
                    newFieldNode.setPositionIncrement(i);
                }

                pq.add(newFieldNode);

            }

            return pq;

        }

    }

    return node;

}

From source file:com.bewsia.script.safe.lucene.SEntity.java

License:Open Source License

public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments,
        String separator) throws Exception {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
    CachingTokenFilter tokenStream = new CachingTokenFilter(
            analyzer.tokenStream(field, new StringReader(text)));
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
    tokenStream.reset();
    String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
    return rv.length() == 0 ? text : rv;
}

From source file:com.sindicetech.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java

License:Open Source License

@Override
protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node instanceof ProtectedQueryNode) && !(node.getParent() instanceof RangeQueryNode)) {

        final FieldQueryNode fieldNode = ((FieldQueryNode) node);
        final String text = fieldNode.getTextAsString();
        final String field = fieldNode.getFieldAsString();

        final TokenStream source;
        try {//from   w  w w.ja v  a2s. co  m
            source = this.analyzer.tokenStream(field, new StringReader(text));
            source.reset();
        } catch (final IOException e1) {
            throw new RuntimeException(e1);
        }
        final CachingTokenFilter buffer = new CachingTokenFilter(source);

        int numTokens = 0;
        try {
            while (buffer.incrementToken()) {
                numTokens++;
            }
        } catch (final IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();
            // close original stream - all tokens buffered
            source.close();
        } catch (final IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }
        final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            return new NoTokenFoundQueryNode();
        }
        // phrase query
        else if (numTokens != 1) {
            String datatype = (String) DatatypeProcessor.getDatatype(this.getQueryConfigHandler(), node);
            final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
            // assign datatype
            pq.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype);

            for (int i = 0; i < numTokens; i++) {
                String term = null;

                try {
                    final boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
                // set position increment
                newFieldNode.setPositionIncrement(i);
                // assign datatype
                newFieldNode.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype);
                pq.add(newFieldNode);
            }
            return pq;
        }
    }
    return node;
}

From source file:org.allenai.blacklab.queryParser.lucene.QueryParserBase.java

License:Apache License

/**
 * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
 *//*from   w ww .j  av  a2s.  c  o m*/
protected TextPattern newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted)
        throws ParseException {
    // Use the analyzer to get all the tokens, and then build a TermQuery,
    // PhraseQuery, or nothing based on the term count

    TokenStream source;
    try {
        source = analyzer.tokenStream(field, new StringReader(queryText));
        source.reset();
    } catch (IOException e) {
        ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text");
        p.initCause(e);
        throw p;
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermToBytesRefAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    buffer.reset();

    if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
        termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
    }
    if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
        try {
            hasMoreTokens = buffer.incrementToken();
            while (hasMoreTokens) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
                hasMoreTokens = buffer.incrementToken();
            }
        } catch (IOException e) {
            // ignore
        }
    }
    try {
        // rewind the buffer stream
        buffer.reset();

        // close original stream - all tokens buffered
        source.close();
    } catch (IOException e) {
        ParseException p = new ParseException("Cannot close TokenStream analyzing query text");
        p.initCause(e);
        throw p;
    }

    BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();

    if (numTokens == 0)
        return null;
    else if (numTokens == 1) {
        try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            termAtt.fillBytesRef();
        } catch (IOException e) {
            // safe to ignore, because we know the number of tokens
        }
        return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
    } else {
        if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
            if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
                // no phrase query:
                TextPatternBoolean q = newBooleanQuery(positionCount == 1); // BL: BooleanQuery -> TextPatternBoolean

                BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR
                        ? BooleanClause.Occur.MUST
                        : BooleanClause.Occur.SHOULD;

                for (int i = 0; i < numTokens; i++) {
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        termAtt.fillBytesRef();
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }
                    TextPattern currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
                    q.add(currentQuery, occur);
                }
                return q;
            } else {
                // phrase query:
                TPMultiPhrase mpq = newMultiPhraseQuery(); // BL: MultiPhraseQuery -> TPMultiPhrase
                mpq.setSlop(phraseSlop);
                List<Term> multiTerms = new ArrayList<Term>();
                int position = -1;
                for (int i = 0; i < numTokens; i++) {
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        termAtt.fillBytesRef();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (positionIncrement > 0 && multiTerms.size() > 0) {
                        if (enablePositionIncrements) {
                            mpq.add(multiTerms.toArray(new Term[0]), position);
                        } else {
                            mpq.add(multiTerms.toArray(new Term[0]));
                        }
                        multiTerms.clear();
                    }
                    position += positionIncrement;
                    multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
                }
                if (enablePositionIncrements) {
                    mpq.add(multiTerms.toArray(new Term[0]), position);
                } else {
                    mpq.add(multiTerms.toArray(new Term[0]));
                }
                return mpq;
            }
        } else {
            TPPhrase pq = newPhraseQuery(); // BL: PhraseQuery -> TPPhrase
            pq.setSlop(phraseSlop);
            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    termAtt.fillBytesRef();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (enablePositionIncrements) {
                    position += positionIncrement;
                    pq.add(new Term(field, BytesRef.deepCopyOf(bytes)), position);
                } else {
                    pq.add(new Term(field, BytesRef.deepCopyOf(bytes)));
                }
            }
            return pq;
        }
    }
}

From source file:org.apache.fuzzydb.queryParser.QueryParser.java

License:Open Source License

/**
 * @exception ParseException throw in overridden method to disallow
 *///from  www.  j ava 2s .  c o  m
protected Query getFieldQuery(String field, String queryText) throws ParseException {
    // Use the analyzer to get all the tokens, and then build a TermQuery,
    // PhraseQuery, or nothing based on the term count

    TokenStream source;
    try {
        source = analyzer.reusableTokenStream(field, new StringReader(queryText));
        source.reset();
    } catch (IOException e) {
        source = analyzer.tokenStream(field, new StringReader(queryText));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    boolean success = false;
    try {
        buffer.reset();
        success = true;
    } catch (IOException e) {
        // success==false if we hit an exception
    }
    if (success) {
        if (buffer.hasAttribute(TermAttribute.class)) {
            termAtt = buffer.getAttribute(TermAttribute.class);
        }
        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
        try {
            hasMoreTokens = buffer.incrementToken();
            while (hasMoreTokens) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
                hasMoreTokens = buffer.incrementToken();
            }
        } catch (IOException e) {
            // ignore
        }
    }
    try {
        // rewind the buffer stream
        buffer.reset();

        // close original stream - all tokens buffered
        source.close();
    } catch (IOException e) {
        // ignore
    }

    if (numTokens == 0)
        return null;
    else if (numTokens == 1) {
        String term = null;
        try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.term();
        } catch (IOException e) {
            // safe to ignore, because we know the number of tokens
        }
        return newTermQuery(new Term(field, term));
    } else {
        if (severalTokensAtSamePosition) {
            if (positionCount == 1) {
                // no phrase query:
                BooleanQuery q = newBooleanQuery(true);
                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.term();
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    Query currentQuery = newTermQuery(new Term(field, term));
                    q.add(currentQuery, BooleanClause.Occur.SHOULD);
                }
                return q;
            } else {
                // phrase query:
                MultiPhraseQuery mpq = newMultiPhraseQuery();
                mpq.setSlop(phraseSlop);
                List<Term> multiTerms = new ArrayList<Term>();
                int position = -1;
                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.term();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (positionIncrement > 0 && multiTerms.size() > 0) {
                        if (enablePositionIncrements) {
                            mpq.add(multiTerms.toArray(new Term[0]), position);
                        } else {
                            mpq.add(multiTerms.toArray(new Term[0]));
                        }
                        multiTerms.clear();
                    }
                    position += positionIncrement;
                    multiTerms.add(new Term(field, term));
                }
                if (enablePositionIncrements) {
                    mpq.add(multiTerms.toArray(new Term[0]), position);
                } else {
                    mpq.add(multiTerms.toArray(new Term[0]));
                }
                return mpq;
            }
        } else {
            PhraseQuery pq = newPhraseQuery();
            pq.setSlop(phraseSlop);
            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.term();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (enablePositionIncrements) {
                    position += positionIncrement;
                    pq.add(new Term(field, term), position);
                } else {
                    pq.add(new Term(field, term));
                }
            }
            return pq;
        }
    }
}

From source file:org.apache.maven.index.DefaultIteratorResultSet.java

License:Apache License

/**
 * Returns a string that contains match fragment highlighted in style as user requested.
 * //w  w  w.j  a v  a  2s.c o m
 * @param context
 * @param hr
 * @param field
 * @param text
 * @return
 * @throws IOException
 */
protected List<String> highlightField(IndexingContext context, MatchHighlightRequest hr, IndexerField field,
        String text) throws IOException {
    // exception with classnames
    if (MAVEN.CLASSNAMES.equals(field.getOntology())) {
        text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n");
    }

    Analyzer analyzer = context.getAnalyzer();
    TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text));

    CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream);

    Formatter formatter = null;

    if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) {
        formatter = new SimpleHTMLFormatter();
    } else {
        tokenStream.reset();
        tokenStream.end();
        tokenStream.close();
        throw new UnsupportedOperationException(
                "Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!");
    }

    List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3);

    return bestFragments;
}

From source file:org.elasticsearch.index.search.TextQueryParser.java

License:Apache License

public Query parse(Type type) {
    FieldMapper mapper = null;//w  ww .j a va 2 s  .  c om
    String field = fieldName;
    MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
    if (smartNameFieldMappers != null) {
        if (smartNameFieldMappers.hasMapper()) {
            mapper = smartNameFieldMappers.mapper();
            if (mapper != null) {
                field = mapper.names().indexName();
            }
        }
    }

    if (mapper != null && mapper.useFieldQueryWithQueryString()) {
        return wrapSmartNameQuery(mapper.fieldQuery(text, parseContext), smartNameFieldMappers, parseContext);
    }

    Analyzer analyzer = null;
    if (this.analyzer == null) {
        if (mapper != null) {
            analyzer = mapper.searchAnalyzer();
        }
        if (analyzer == null) {
            analyzer = parseContext.mapperService().searchAnalyzer();
        }
    } else {
        analyzer = parseContext.mapperService().analysisService().analyzer(this.analyzer);
        if (analyzer == null) {
            throw new ElasticSearchIllegalArgumentException("No analyzer found for [" + this.analyzer + "]");
        }
    }

    // Logic similar to QueryParser#getFieldQuery

    TokenStream source;
    try {
        source = analyzer.reusableTokenStream(field, new FastStringReader(text));
        source.reset();
    } catch (IOException e) {
        source = analyzer.tokenStream(field, new FastStringReader(text));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    CharTermAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    boolean success = false;
    try {
        buffer.reset();
        success = true;
    } catch (IOException e) {
        // success==false if we hit an exception
    }
    if (success) {
        if (buffer.hasAttribute(CharTermAttribute.class)) {
            termAtt = buffer.getAttribute(CharTermAttribute.class);
        }
        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
        try {
            hasMoreTokens = buffer.incrementToken();
            while (hasMoreTokens) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
                hasMoreTokens = buffer.incrementToken();
            }
        } catch (IOException e) {
            // ignore
        }
    }
    try {
        // rewind the buffer stream
        buffer.reset();

        // close original stream - all tokens buffered
        source.close();
    } catch (IOException e) {
        // ignore
    }

    Term termFactory = new Term(field);
    if (numTokens == 0) {
        return MatchNoDocsQuery.INSTANCE;
    } else if (type == Type.BOOLEAN) {
        if (numTokens == 1) {
            String term = null;
            try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }
            Query q = newTermQuery(mapper, termFactory.createTerm(term));
            return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
        }
        BooleanQuery q = new BooleanQuery(positionCount == 1);
        for (int i = 0; i < numTokens; i++) {
            String term = null;
            try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }

            Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term));
            q.add(currentQuery, occur);
        }
        return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
    } else if (type == Type.PHRASE) {
        if (severalTokensAtSamePosition) {
            MultiPhraseQuery mpq = new MultiPhraseQuery();
            mpq.setSlop(phraseSlop);
            List<Term> multiTerms = new ArrayList<Term>();
            int position = -1;
            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;
                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (positionIncrement > 0 && multiTerms.size() > 0) {
                    if (enablePositionIncrements) {
                        mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
                    } else {
                        mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
                    }
                    multiTerms.clear();
                }
                position += positionIncrement;
                multiTerms.add(termFactory.createTerm(term));
            }
            if (enablePositionIncrements) {
                mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
            } else {
                mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
            }
            return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext);
        } else {
            PhraseQuery pq = new PhraseQuery();
            pq.setSlop(phraseSlop);
            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (enablePositionIncrements) {
                    position += positionIncrement;
                    pq.add(termFactory.createTerm(term), position);
                } else {
                    pq.add(termFactory.createTerm(term));
                }
            }
            return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext);
        }
    } else if (type == Type.PHRASE_PREFIX) {
        MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery();
        mpq.setSlop(phraseSlop);
        mpq.setMaxExpansions(maxExpansions);
        List<Term> multiTerms = new ArrayList<Term>();
        int position = -1;
        for (int i = 0; i < numTokens; i++) {
            String term = null;
            int positionIncrement = 1;
            try {
                boolean hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
                if (posIncrAtt != null) {
                    positionIncrement = posIncrAtt.getPositionIncrement();
                }
            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }

            if (positionIncrement > 0 && multiTerms.size() > 0) {
                if (enablePositionIncrements) {
                    mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
                } else {
                    mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
                }
                multiTerms.clear();
            }
            position += positionIncrement;
            multiTerms.add(termFactory.createTerm(term));
        }
        if (enablePositionIncrements) {
            mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position);
        } else {
            mpq.add(multiTerms.toArray(new Term[multiTerms.size()]));
        }
        return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext);
    }

    throw new ElasticSearchIllegalStateException("No type found for [" + type + "]");
}

From source file:org.janusgraph.diskstorage.solr.SolrIndex.java

License:Apache License

@SuppressWarnings("unchecked")
private List<String> customTokenize(String tokenizerClass, String value) {
    CachingTokenFilter stream = null;
    try {//from  w ww  . j  a  va2 s .  c om
        final List<String> terms = new ArrayList<>();
        final Tokenizer tokenizer = ((Constructor<Tokenizer>) ClassLoader.getSystemClassLoader()
                .loadClass(tokenizerClass).getConstructor()).newInstance();
        tokenizer.setReader(new StringReader(value));
        stream = new CachingTokenFilter(tokenizer);
        final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            terms.add(termAtt.getBytesRef().utf8ToString());
        }
        return terms;
    } catch (ReflectiveOperationException | IOException e) {
        throw new IllegalArgumentException(e.getMessage(), e);
    } finally {
        IOUtils.closeQuietly(stream);
    }
}

From source file:org.sindice.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java

License:Apache License

@Override
protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node.getParent() instanceof RangeQueryNode)) {

        this.positionIncrementsEnabled = false;
        final Boolean positionIncrementsEnabled = this.getQueryConfigHandler()
                .get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
        if (positionIncrementsEnabled != null) {
            this.positionIncrementsEnabled = positionIncrementsEnabled;
        }/*w ww  . j  a v  a2 s .c  o m*/

        final FieldQueryNode fieldNode = ((FieldQueryNode) node);
        final String text = fieldNode.getTextAsString();
        final String field = fieldNode.getFieldAsString();
        final String datatype = (String) fieldNode.getTag(DatatypeQueryNode.DATATYPE_TAGID);

        if (datatype == null) {
            return node;
        }

        final Analyzer analyzer = this.getQueryConfigHandler().get(KeywordConfigurationKeys.DATATYPES_ANALYZERS)
                .get(datatype);
        if (analyzer == null) {
            throw new QueryNodeException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX,
                    "No analyzer associated with " + datatype));
        }

        PositionIncrementAttribute posIncrAtt = null;
        int numTokens = 0;
        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;

        final TokenStream source;
        try {
            source = analyzer.tokenStream(field, new StringReader(text));
            source.reset();
        } catch (final IOException e1) {
            throw new RuntimeException(e1);
        }
        final CachingTokenFilter buffer = new CachingTokenFilter(source);

        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }

        try {
            while (buffer.incrementToken()) {
                numTokens++;
                final int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
            }
        } catch (final IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();
            // close original stream - all tokens buffered
            source.close();
        } catch (final IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }
        final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            if (nbTwigs != 0) { // Twig special case
                return new WildcardNodeQueryNode();
            }
            return new NoTokenFoundQueryNode();
        } else if (numTokens == 1) {
            String term = null;
            try {
                boolean hasNext;
                hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
            } catch (final IOException e) {
                // safe to ignore, because we know the number of tokens
            }
            fieldNode.setText(term);
            return fieldNode;
        } else {
            // no phrase query:
            final LinkedList<QueryNode> children = new LinkedList<QueryNode>();

            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                final int positionIncrement = 1;

                try {
                    final boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                if (this.positionIncrementsEnabled) {
                    position += positionIncrement;
                    newFieldNode.setPositionIncrement(position);
                } else {
                    newFieldNode.setPositionIncrement(i);
                }

                children.add(new FieldQueryNode(field, term, -1, -1));
            }

            if (node.getParent() instanceof TokenizedPhraseQueryNode) {
                throw new QueryNodeException(new MessageImpl("Cannot build a MultiPhraseQuery"));
            }
            // If multiple terms at one single position, this must be a query
            // expansion. Perform a OR between the terms.
            if (severalTokensAtSamePosition && positionCount == 1) {
                return new GroupQueryNode(new OrQueryNode(children));
            }
            // if several tokens at same position && position count > 1, then
            // results can be unexpected
            else {
                final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
                for (int i = 0; i < children.size(); i++) {
                    pq.add(children.get(i));
                }
                return pq;
            }
        }
    } else if (node instanceof TwigQueryNode) {
        nbTwigs--;
        assert nbTwigs >= 0;
    }
    return node;
}

From source file:org.sindice.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java

License:Apache License

@Override
protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node.getParent() instanceof RangeQueryNode)) {

        final FieldQueryNode fieldNode = ((FieldQueryNode) node);
        final String text = fieldNode.getTextAsString();
        final String field = fieldNode.getFieldAsString();

        final TokenStream source;
        try {//www.j  a  v a  2 s  .  c  o m
            source = this.analyzer.tokenStream(field, new StringReader(text));
            source.reset();
        } catch (final IOException e1) {
            throw new RuntimeException(e1);
        }
        final CachingTokenFilter buffer = new CachingTokenFilter(source);

        int numTokens = 0;
        try {
            while (buffer.incrementToken()) {
                numTokens++;
            }
        } catch (final IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();
            // close original stream - all tokens buffered
            source.close();
        } catch (final IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }
        final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            return new NoTokenFoundQueryNode();
        } else if (numTokens != 1) {
            // phrase query
            final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

            for (int i = 0; i < numTokens; i++) {
                String term = null;

                try {
                    final boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
                newFieldNode.setPositionIncrement(i);
                pq.add(newFieldNode);
            }
            return pq;
        }
    }
    return node;
}