Example usage for org.apache.lucene.analysis CachingTokenFilter CachingTokenFilter

List of usage examples for org.apache.lucene.analysis CachingTokenFilter CachingTokenFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis CachingTokenFilter CachingTokenFilter.

Prototype

public CachingTokenFilter(TokenStream input) 

Source Link

Document

Create a new CachingTokenFilter around input.

Usage

From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.processors.SKOSQueryNodeProcessor.java

License:Apache License

@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {

    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node.getParent() instanceof RangeQueryNode)) {

        FieldQueryNode fieldNode = ((FieldQueryNode) node);
        String text = fieldNode.getTextAsString();
        String field = fieldNode.getFieldAsString();

        TokenStream source;/*w  ww.jav  a2  s . c om*/
        try {
            source = this.analyzer.tokenStream(field, text);
            source.reset();
        } catch (IOException e1) {
            throw new RuntimeException(e1);
        }
        CachingTokenFilter buffer = new CachingTokenFilter(source);

        PositionIncrementAttribute posIncrAtt = null;
        int numTokens = 0;
        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;

        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }

        try {

            while (buffer.incrementToken()) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;

                } else {
                    severalTokensAtSamePosition = true;
                }

            }

        } catch (IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();

            // close original stream - all tokens buffered
            source.close();
        } catch (IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }

        CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            return new NoTokenFoundQueryNode();

        } else if (numTokens == 1) {
            String term = null;
            try {
                boolean hasNext;
                hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();

            } catch (IOException e) {
                // safe to ignore, because we know the number of tokens
            }

            fieldNode.setText(term);

            return fieldNode;

        } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) {
            if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) {
                // no phrase query:
                LinkedList<QueryNode> children = new LinkedList<QueryNode>();

                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();

                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (buffer.hasAttribute(SKOSTypeAttribute.class) && boosts != null) {

                        SKOSTypeAttribute skosAttr = buffer.getAttribute(SKOSTypeAttribute.class);
                        children.add(new BoostQueryNode(new FieldQueryNode(field, term, -1, -1),
                                getBoost(skosAttr.getSkosType())));

                    } else {

                        children.add(new FieldQueryNode(field, term, -1, -1));

                    }

                }
                return new GroupQueryNode(new StandardBooleanQueryNode(children, positionCount == 1));
            } else {
                // phrase query:
                MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();

                List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>();
                int position = -1;
                int i = 0;
                int termGroupCount = 0;
                for (; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.toString();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }

                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (positionIncrement > 0 && multiTerms.size() > 0) {

                        for (FieldQueryNode termNode : multiTerms) {

                            if (this.positionIncrementsEnabled) {
                                termNode.setPositionIncrement(position);
                            } else {
                                termNode.setPositionIncrement(termGroupCount);
                            }

                            mpq.add(termNode);

                        }

                        // Only increment once for each "group" of
                        // terms that were in the same position:
                        termGroupCount++;

                        multiTerms.clear();

                    }

                    position += positionIncrement;
                    multiTerms.add(new FieldQueryNode(field, term, -1, -1));

                }

                for (FieldQueryNode termNode : multiTerms) {

                    if (this.positionIncrementsEnabled) {
                        termNode.setPositionIncrement(position);

                    } else {
                        termNode.setPositionIncrement(termGroupCount);
                    }

                    mpq.add(termNode);

                }

                return mpq;

            }

        } else {

            TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }

                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                if (this.positionIncrementsEnabled) {
                    position += positionIncrement;
                    newFieldNode.setPositionIncrement(position);

                } else {
                    newFieldNode.setPositionIncrement(i);
                }

                pq.add(newFieldNode);

            }

            return pq;

        }

    }

    return node;

}

From source file:biospectra.classify.Classifier.java

License:Apache License

protected BooleanQuery createQueryClauses(KmerQueryAnalyzer analyzer, String field, String queryText,
        QueryGenerationAlgorithm queryGenerationAlgorithm) {
    try (TokenStream source = analyzer.tokenStream(field, queryText);
            CachingTokenFilter stream = new CachingTokenFilter(source)) {

        TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);

        if (termAtt == null) {
            return null;
        }/*  w  w w  .j a v  a 2s.  c o m*/

        // phase 1: read through the stream and assess the situation:
        // counting the number of tokens/positions and marking if we have any synonyms.
        int numTokens = 0;

        stream.reset();
        while (stream.incrementToken()) {
            numTokens++;
        }

        // phase 2: based on token count, presence of synonyms, and options
        // formulate a single term, boolean, or phrase.
        if (numTokens == 0) {
            return null;
        } else if (numTokens == 1) {
            // single term
            return null;
        } else {
            BooleanQuery.Builder q = new BooleanQuery.Builder();
            q.setDisableCoord(false);

            TermToBytesRefAttribute termAttB = stream.getAttribute(TermToBytesRefAttribute.class);
            OffsetAttribute offsetAtt = stream.getAttribute(OffsetAttribute.class);

            stream.reset();

            if (queryGenerationAlgorithm.equals(QueryGenerationAlgorithm.NAIVE_KMER)) {
                createNaiveKmerQueryClauses(q, field, stream, termAttB, offsetAtt);
            } else if (queryGenerationAlgorithm.equals(QueryGenerationAlgorithm.CHAIN_PROXIMITY)) {
                createChainProximityQueryClauses(q, field, stream, termAttB, offsetAtt);
            } else if (queryGenerationAlgorithm.equals(QueryGenerationAlgorithm.PAIRED_PROXIMITY)) {
                createPairedProximityQueryClauses(q, field, stream, termAttB, offsetAtt);
            }

            return q.build();
        }
    } catch (IOException e) {
        throw new RuntimeException("Error analyzing query text", e);
    }
}

From source file:com.bewsia.script.safe.lucene.SEntity.java

License:Open Source License

public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments,
        String separator) throws Exception {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
    CachingTokenFilter tokenStream = new CachingTokenFilter(
            analyzer.tokenStream(field, new StringReader(text)));
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
    tokenStream.reset();/*from   w  w w  . j  av a  2 s  . co m*/
    String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
    return rv.length() == 0 ? text : rv;
}

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId,
        Document doc, String fieldName) throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)))
        return;// www. j a  v a  2  s .c  o m
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {
        listFields.add(field.stringValue());
    }

    String[] docTexts = listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
    if (docTexts.length == 0)
        return;

    TokenStream tokenStream;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
        //      TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
        //      if (tvStream != null) {
        //        tots = new TermOffsetsTokenStream(tvStream);
        //      }
    } catch (IllegalArgumentException e) {
        // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
        if (tots != null) {
            // if we're using TermOffsets optimization, then get the next
            // field value's TokenStream (i.e. get field j's TokenStream) from tots:
            tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length());
        } else {
            // fall back to analyzer
            tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
        }

        int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,
                Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

        Highlighter highlighter;
        if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
            if (maxCharsToAnalyze < 0) {
                tokenStream = new CachingTokenFilter(tokenStream);
            } else {
                tokenStream = new CachingTokenFilter(
                        new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze));
            }

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream);

            // after highlighter initialization, reset tstream since construction of highlighter already used it
            tokenStream.reset();
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);
        }

        if (maxCharsToAnalyze < 0) {
            highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
        } else {
            highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
        }

        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j],
                    mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
                if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                    frags.add(bestTextFragments[k]);
                }
            }
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    }
    // sort such that the fragments with the highest score come first
    Collections.sort(frags, new Comparator<TextFragment>() {
        public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());
        }
    });

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    String[] summaries = null;
    if (frags.size() > 0) {
        ArrayList<String> fragTexts = new ArrayList<String>();
        for (TextFragment fragment : frags) {
            if ((fragment != null) && (fragment.getScore() > 0)) {
                fragTexts.add(fragment.toString());
            }
            if (fragTexts.size() >= numFragments)
                break;
        }
        summaries = (String[]) fragTexts.toArray();
        if (summaries.length > 0)
            docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
        alternateField(docSummaries, params, doc, fieldName);
    }
}

From source file:com.sindicetech.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java

License:Open Source License

private CachingTokenFilter getBuffer(Analyzer analyzer, FieldQueryNode fieldNode) {
    final TokenStream source;
    final String text = fieldNode.getTextAsString();
    final String field = fieldNode.getFieldAsString();

    try {/*from   www .j  av a2s.  c om*/
        source = analyzer.tokenStream(field, new StringReader(text));
        source.reset();
    } catch (final IOException e1) {
        throw new RuntimeException(e1);
    }
    return new CachingTokenFilter(source);
}

From source file:com.sindicetech.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java

License:Open Source License

@Override
protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node instanceof ProtectedQueryNode) && !(node.getParent() instanceof RangeQueryNode)) {

        final FieldQueryNode fieldNode = ((FieldQueryNode) node);
        final String text = fieldNode.getTextAsString();
        final String field = fieldNode.getFieldAsString();

        final TokenStream source;
        try {//from  www . j  av a2 s. c  om
            source = this.analyzer.tokenStream(field, new StringReader(text));
            source.reset();
        } catch (final IOException e1) {
            throw new RuntimeException(e1);
        }
        final CachingTokenFilter buffer = new CachingTokenFilter(source);

        int numTokens = 0;
        try {
            while (buffer.incrementToken()) {
                numTokens++;
            }
        } catch (final IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();
            // close original stream - all tokens buffered
            source.close();
        } catch (final IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }
        final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            return new NoTokenFoundQueryNode();
        }
        // phrase query
        else if (numTokens != 1) {
            String datatype = (String) DatatypeProcessor.getDatatype(this.getQueryConfigHandler(), node);
            final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
            // assign datatype
            pq.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype);

            for (int i = 0; i < numTokens; i++) {
                String term = null;

                try {
                    final boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
                // set position increment
                newFieldNode.setPositionIncrement(i);
                // assign datatype
                newFieldNode.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype);
                pq.add(newFieldNode);
            }
            return pq;
        }
    }
    return node;
}

From source file:de.powerstaff.business.service.impl.ProfileSearchServiceImpl.java

License:Open Source License

protected String getHighlightedSearchResult(Analyzer aAnalyzer, Highlighter aHighlighter, String aContent,
        Query aQuery) throws IOException, InvalidTokenOffsetsException {

    CachingTokenFilter tokenStream = new CachingTokenFilter(
            aAnalyzer.tokenStream(ProfileIndexerService.CONTENT, new StringReader(aContent)));

    return aHighlighter.getBestFragments(tokenStream, aContent, 5, "&nbsp;...&nbsp;");
}

From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java

License:Apache License

private IndexReader getReaderForField(String field) throws IOException {
    if (wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
        tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
        cachedTokenStream = true;//from  w  ww.j  a  va  2 s  .  c  o m
    }
    IndexReader reader = readers.get(field);
    if (reader == null) {
        MemoryIndex indexer = new MemoryIndex();
        indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
        tokenStream.reset();
        IndexSearcher searcher = indexer.createSearcher();
        reader = searcher.getIndexReader();
        readers.put(field, reader);
    }

    return reader;
}

From source file:org.allenai.blacklab.queryParser.lucene.QueryParserBase.java

License:Apache License

/**
 * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
 *///w  w w  . j a  va  2s.  c om
protected TextPattern newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted)
        throws ParseException {
    // Use the analyzer to get all the tokens, and then build a TermQuery,
    // PhraseQuery, or nothing based on the term count

    TokenStream source;
    try {
        source = analyzer.tokenStream(field, new StringReader(queryText));
        source.reset();
    } catch (IOException e) {
        ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text");
        p.initCause(e);
        throw p;
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermToBytesRefAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    buffer.reset();

    if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
        termAtt = buffer.getAttribute(TermToBytesRefAttribute.class);
    }
    if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
        posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
        try {
            hasMoreTokens = buffer.incrementToken();
            while (hasMoreTokens) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
                hasMoreTokens = buffer.incrementToken();
            }
        } catch (IOException e) {
            // ignore
        }
    }
    try {
        // rewind the buffer stream
        buffer.reset();

        // close original stream - all tokens buffered
        source.close();
    } catch (IOException e) {
        ParseException p = new ParseException("Cannot close TokenStream analyzing query text");
        p.initCause(e);
        throw p;
    }

    BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();

    if (numTokens == 0)
        return null;
    else if (numTokens == 1) {
        try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            termAtt.fillBytesRef();
        } catch (IOException e) {
            // safe to ignore, because we know the number of tokens
        }
        return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
    } else {
        if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
            if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
                // no phrase query:
                TextPatternBoolean q = newBooleanQuery(positionCount == 1); // BL: BooleanQuery -> TextPatternBoolean

                BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR
                        ? BooleanClause.Occur.MUST
                        : BooleanClause.Occur.SHOULD;

                for (int i = 0; i < numTokens; i++) {
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        termAtt.fillBytesRef();
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }
                    TextPattern currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
                    q.add(currentQuery, occur);
                }
                return q;
            } else {
                // phrase query:
                TPMultiPhrase mpq = newMultiPhraseQuery(); // BL: MultiPhraseQuery -> TPMultiPhrase
                mpq.setSlop(phraseSlop);
                List<Term> multiTerms = new ArrayList<Term>();
                int position = -1;
                for (int i = 0; i < numTokens; i++) {
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        termAtt.fillBytesRef();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (positionIncrement > 0 && multiTerms.size() > 0) {
                        if (enablePositionIncrements) {
                            mpq.add(multiTerms.toArray(new Term[0]), position);
                        } else {
                            mpq.add(multiTerms.toArray(new Term[0]));
                        }
                        multiTerms.clear();
                    }
                    position += positionIncrement;
                    multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes)));
                }
                if (enablePositionIncrements) {
                    mpq.add(multiTerms.toArray(new Term[0]), position);
                } else {
                    mpq.add(multiTerms.toArray(new Term[0]));
                }
                return mpq;
            }
        } else {
            TPPhrase pq = newPhraseQuery(); // BL: PhraseQuery -> TPPhrase
            pq.setSlop(phraseSlop);
            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    termAtt.fillBytesRef();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (enablePositionIncrements) {
                    position += positionIncrement;
                    pq.add(new Term(field, BytesRef.deepCopyOf(bytes)), position);
                } else {
                    pq.add(new Term(field, BytesRef.deepCopyOf(bytes)));
                }
            }
            return pq;
        }
    }
}

From source file:org.apache.fuzzydb.queryParser.QueryParser.java

License:Open Source License

/**
 * @exception ParseException throw in overridden method to disallow
 *//*from   w w  w .  ja va  2 s  .c  o  m*/
protected Query getFieldQuery(String field, String queryText) throws ParseException {
    // Use the analyzer to get all the tokens, and then build a TermQuery,
    // PhraseQuery, or nothing based on the term count

    TokenStream source;
    try {
        source = analyzer.reusableTokenStream(field, new StringReader(queryText));
        source.reset();
    } catch (IOException e) {
        source = analyzer.tokenStream(field, new StringReader(queryText));
    }
    CachingTokenFilter buffer = new CachingTokenFilter(source);
    TermAttribute termAtt = null;
    PositionIncrementAttribute posIncrAtt = null;
    int numTokens = 0;

    boolean success = false;
    try {
        buffer.reset();
        success = true;
    } catch (IOException e) {
        // success==false if we hit an exception
    }
    if (success) {
        if (buffer.hasAttribute(TermAttribute.class)) {
            termAtt = buffer.getAttribute(TermAttribute.class);
        }
        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }
    }

    int positionCount = 0;
    boolean severalTokensAtSamePosition = false;

    boolean hasMoreTokens = false;
    if (termAtt != null) {
        try {
            hasMoreTokens = buffer.incrementToken();
            while (hasMoreTokens) {
                numTokens++;
                int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
                hasMoreTokens = buffer.incrementToken();
            }
        } catch (IOException e) {
            // ignore
        }
    }
    try {
        // rewind the buffer stream
        buffer.reset();

        // close original stream - all tokens buffered
        source.close();
    } catch (IOException e) {
        // ignore
    }

    if (numTokens == 0)
        return null;
    else if (numTokens == 1) {
        String term = null;
        try {
            boolean hasNext = buffer.incrementToken();
            assert hasNext == true;
            term = termAtt.term();
        } catch (IOException e) {
            // safe to ignore, because we know the number of tokens
        }
        return newTermQuery(new Term(field, term));
    } else {
        if (severalTokensAtSamePosition) {
            if (positionCount == 1) {
                // no phrase query:
                BooleanQuery q = newBooleanQuery(true);
                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.term();
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    Query currentQuery = newTermQuery(new Term(field, term));
                    q.add(currentQuery, BooleanClause.Occur.SHOULD);
                }
                return q;
            } else {
                // phrase query:
                MultiPhraseQuery mpq = newMultiPhraseQuery();
                mpq.setSlop(phraseSlop);
                List<Term> multiTerms = new ArrayList<Term>();
                int position = -1;
                for (int i = 0; i < numTokens; i++) {
                    String term = null;
                    int positionIncrement = 1;
                    try {
                        boolean hasNext = buffer.incrementToken();
                        assert hasNext == true;
                        term = termAtt.term();
                        if (posIncrAtt != null) {
                            positionIncrement = posIncrAtt.getPositionIncrement();
                        }
                    } catch (IOException e) {
                        // safe to ignore, because we know the number of tokens
                    }

                    if (positionIncrement > 0 && multiTerms.size() > 0) {
                        if (enablePositionIncrements) {
                            mpq.add(multiTerms.toArray(new Term[0]), position);
                        } else {
                            mpq.add(multiTerms.toArray(new Term[0]));
                        }
                        multiTerms.clear();
                    }
                    position += positionIncrement;
                    multiTerms.add(new Term(field, term));
                }
                if (enablePositionIncrements) {
                    mpq.add(multiTerms.toArray(new Term[0]), position);
                } else {
                    mpq.add(multiTerms.toArray(new Term[0]));
                }
                return mpq;
            }
        } else {
            PhraseQuery pq = newPhraseQuery();
            pq.setSlop(phraseSlop);
            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                int positionIncrement = 1;

                try {
                    boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.term();
                    if (posIncrAtt != null) {
                        positionIncrement = posIncrAtt.getPositionIncrement();
                    }
                } catch (IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                if (enablePositionIncrements) {
                    position += positionIncrement;
                    pq.add(new Term(field, term), position);
                } else {
                    pq.add(new Term(field, term));
                }
            }
            return pq;
        }
    }
}