Example usage for org.apache.lucene.analysis Token toString

List of usage examples for org.apache.lucene.analysis Token toString

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Token toString.

Prototype

@Override
public String toString() 

Source Link

Document

Returns solely the term text as specified by the CharSequence interface.

Usage

From source file:com.globalsight.ling.lucene.analysis.cn.ChineseFilter.java

License:Apache License

public final Token next() throws IOException {
    Iterator<AttributeImpl> attIt = input.getAttributeImplsIterator();
    while (attIt.hasNext()) {
        AttributeImpl att = attIt.next();
        Token token = null;
        if (att instanceof GSAttributeImpl) {
            token = ((GSAttributeImpl) att).getToken();
        }//w w w  .  jav  a 2  s .  c  om
        if (token == null) {
            continue;
        }

        String text = token.toString();

        // why not key off token type here assuming
        // ChineseTokenizer comes first?
        if (stopTable.get(text) == null) {
            switch (Character.getType(text.charAt(0))) {

            case Character.LOWERCASE_LETTER:
            case Character.UPPERCASE_LETTER:

                // English word/token should be larger than 1 character.
                if (text.length() > 1) {
                    return token;
                }
                break;

            case Character.OTHER_LETTER:

                // One Chinese character as one Chinese word.
                // Chinese word extraction to be added later here.

                return token;
            }
        }
    }

    return null;
}

From source file:com.globalsight.ling.lucene.analysis.GSTokenFilter.java

License:Apache License

@Override
public boolean incrementToken() throws IOException {
    clearAttributes();/*  w ww  . j  a va2 s .c o  m*/
    Token tt = next();
    if (tt == null) {
        return false;
    } else {
        gsAtt.setToken(tt);
        termAtt.append(tt.toString());
        if (termAtt instanceof TermToBytesRefAttribute) {
            ((TermToBytesRefAttribute) termAtt).fillBytesRef();
        }
        return true;
    }
}

From source file:com.globalsight.ling.lucene.analysis.GSTokenizer.java

License:Apache License

@Override
public boolean incrementToken() throws IOException {
    clearAttributes();// w ww  . j  a  va2  s  . com
    Token tt = next();
    if (tt == null) {
        return false;
    } else {
        gsAtt.setToken(tt);
        termAtt.append(tt.toString());
        offsetAtt.setOffset(tt.startOffset(), tt.endOffset());
        posIncrAtt.setPositionIncrement(tt.getPositionIncrement());

        return true;
    }
}

From source file:com.globalsight.ling.lucene.analysis.GSTokenStream.java

License:Apache License

@Override
public boolean incrementToken() throws IOException {
    clearAttributes();//from   w  w w. j  av  a 2s  .  c  o m
    Token tt = next();
    if (tt == null) {
        return false;
    } else {
        termAtt.append(tt.toString());
        gsAtt.setToken(tt);
        return true;
    }
}

From source file:com.globalsight.ling.lucene.analysis.GSTokenTokenizer.java

License:Apache License

@Override
public boolean incrementToken() throws IOException {
    clearAttributes();//from  ww w  . java 2  s .  c om
    Token tt = next();
    if (tt == null) {
        return false;
    } else {
        gsAtt.setToken(tt);
        termAtt.append(tt.toString());
        return true;
    }
}

From source file:com.globalsight.ling.lucene.analysis.ngram.NgramAnalyzer.java

License:Apache License

static void test(String p_text) throws java.io.IOException {
    NgramAnalyzer x = new NgramAnalyzer(3);
    NgramTokenizer y = new NgramTokenizer(new java.io.StringReader(p_text), 3);

    System.out.println("Text = " + p_text);

    Token t;
    while ((t = y.next()) != null) {
        System.out.println(t.toString() + " (" + t.startOffset() + ":" + t.endOffset() + ")");
    }/*from w  w w.  j a v  a 2  s.c o m*/
}

From source file:com.globalsight.ling.lucene.analysis.ngram.NgramNoPunctuationAnalyzer.java

License:Apache License

static void test(String p_text) throws java.io.IOException {
    Analyzer x = new NgramNoPunctuationAnalyzer(3);
    NgramNoPunctuationTokenizer y = new NgramNoPunctuationTokenizer(new java.io.StringReader(p_text), 3);

    System.out.println("Text = " + p_text);

    Token t;
    while ((t = y.next()) != null) {
        System.out.println(t.toString() + " (" + t.startOffset() + ":" + t.endOffset() + ")");
    }//from  ww w . j  a  v a  2 s. c o  m
}

From source file:com.globalsight.ling.lucene.analysis.pl.PolishFilter.java

License:Apache License

/** Returns the next input Token, after being stemmed */
public final Token next() throws IOException {
    Token token = getNextToken();

    if (token == null) {
        return null;
    } else {/*from w  w w.  ja va  2  s . c  om*/
        String s = stemmer.stem(token.toString(), true);

        if (!s.equals(token.toString())) {
            // reconstruct the input token. This is silly...
            Token res = new Token(s, token.startOffset(), token.endOffset(), token.type());
            res.setPositionIncrement(token.getPositionIncrement());
            return res;
        }

        return token;
    }
}

From source file:com.globalsight.ling.lucene.analysis.ru.RussianLowerCaseFilter.java

License:Apache License

public final Token next() throws java.io.IOException {
    Token t = getNextToken();

    if (t == null)
        return null;

    String txt = t.toString();

    char[] chArray = txt.toCharArray();
    for (int i = 0; i < chArray.length; i++) {
        chArray[i] = RussianCharsets.toLowerCase(chArray[i], charset);
    }//w ww.  j  a va  2s. c  o m

    String newTxt = new String(chArray);
    // create new token
    Token newToken = new Token(newTxt, t.startOffset(), t.endOffset());

    return newToken;
}

From source file:com.globalsight.ling.lucene.analysis.snowball.SnowballFilter.java

License:Apache License

/**
 * Returns the next input Token, after being stemmed.
 *//*from www .j  a  v  a2s.  c  om*/
public final Token next() throws IOException {
    Token token = getNextToken();

    if (token == null) {
        return null;
    }

    stemmer.setCurrent(token.toString());
    stemmer.stem();

    return new Token(stemmer.getCurrent(), token.startOffset(), token.endOffset(), token.type());
}