Example usage for org.apache.lucene.analysis.standard StandardTokenizer TOKEN_TYPES

List of usage examples for org.apache.lucene.analysis.standard StandardTokenizer TOKEN_TYPES

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.standard StandardTokenizer TOKEN_TYPES.

Prototype

String[] TOKEN_TYPES

To view the source code for org.apache.lucene.analysis.standard StandardTokenizer TOKEN_TYPES.

Click Source Link

Document

String token types that correspond to token type int constants

Usage

From source file:com.piza.search.AutoPhraseTokenizer.java

License:Apache License

@Override
public final boolean incrementToken() throws IOException {
    clearAttributes();/*  w w w  . ja va  2 s .c  om*/
    skippedPositions = 0;

    while (true) {
        int tokenType = scanner.getNextToken();

        if (tokenType == StandardTokenizerImpl.YYEOF) {
            return false;
        }

        if (scanner.yylength() <= maxTokenLength) {
            posIncrAtt.setPositionIncrement(skippedPositions + 1);
            scanner.getText(termAtt);
            final int start = scanner.yychar();
            offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
            typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
            this.phraseProcess.addTerm(termAtt.toString(), offsetAtt.startOffset(), offsetAtt.endOffset());
            return true;
        } else
            // When we skip a too-long term, we still increment the
            // position increment
            skippedPositions++;
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser.java

License:Apache License

/**
 * {@inheritDoc}/*from www . j a  va 2s.c o m*/
 */
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
    // only create a prefix query when the term is a single word / token
    Analyzer a = getAnalyzer();
    TokenStream ts = a.tokenStream(field, new StringReader(termStr));
    int count = 0;
    boolean isCJ = false;
    try {
        TypeAttribute t = ts.addAttribute(TypeAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            count++;
            isCJ = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.CJ].equals(t.type());
        }
        ts.end();
    } catch (IOException e) {
        throw new ParseException(e.getMessage());
    } finally {
        try {
            ts.close();
        } catch (IOException e) {
            // ignore
        }
    }
    if (count > 1 && isCJ) {
        return getFieldQuery(field, termStr);
    } else {
        return getWildcardQuery(field, termStr + "*");
    }
}

From source file:org.sindice.siren.analysis.TestTupleTokenizer.java

License:Apache License

@Test
public void testLanguage() throws Exception {
    this.assertTokenizesTo(_t, "\"test\"@en", new String[] { "test", "en" },
            new String[] { "<ALPHANUM>", "<LANGUAGE>" });
    this.assertTokenizesTo(_t, "\"toto@titi.fr \"@fr", new String[] { "toto", "titi.fr", "fr" },
            new String[] { StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM],
                    StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM],
                    TupleTokenizer.TOKEN_TYPES[TupleTokenizer.LANGUAGE] });
}