Example usage for org.apache.lucene.analysis TokenStream hasAttributes

List of usage examples for org.apache.lucene.analysis TokenStream hasAttributes

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream hasAttributes.

Prototype

public final boolean hasAttributes() 

Source Link

Document

Returns true, iff this AttributeSource has any attributes

Usage

From source file:com.github.riccardove.easyjasub.lucene.LuceneParser.java

License:Apache License

private List<LuceneToken> readTokens(TokenStream tokenStream) throws IOException {
    ArrayList<LuceneToken> tokens = new ArrayList<LuceneToken>();
    HashMap<Integer, LuceneToken> tokensByStartOffset = new HashMap<Integer, LuceneToken>();
    addAttributes(tokenStream);/*  w  w w  . j a va2s. c o m*/
    tokenStream.reset();

    while (tokenStream.incrementToken()) {
        if (tokenStream.hasAttributes()) {
            LuceneToken token = new LuceneToken();

            readOffset(tokenStream, token);

            // Lucene may output multiple tokens for compound words
            LuceneToken tokenWithSameStartOffset = tokensByStartOffset.get(token.getStartOffset());
            if (tokenWithSameStartOffset != null) {
                if (token.getEndOffset() >= tokenWithSameStartOffset.getEndOffset()) {
                    continue;
                } else {
                    tokens.remove(tokenWithSameStartOffset);
                }
            }

            readReading(tokenStream, token);
            readPartOfSpeech(tokenStream, token);
            readInflection(tokenStream, token);
            readBaseForm(tokenStream, token);

            tokensByStartOffset.put(token.getStartOffset(), token);
            tokens.add(token);
        }
    }

    tokenStream.end();
    tokenStream.close();
    return tokens;
}