com.zb.mmseg.analysis.TokenUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.zb.mmseg.analysis.TokenUtils.java

Source

/*
 * Copyright 2011-2016 ZuoBian.com All right reserved. This software is the confidential and proprietary information of
 * ZuoBian.com ("Confidential Information"). You shall not disclose such Confidential Information and shall use it only
 * in accordance with the terms of the license agreement you entered into with ZuoBian.com.
 */
package com.zb.mmseg.analysis;

import java.io.IOException;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

/**
 * lucene 3.0  TokenStream  Token 
 * 
 * @author zxc Sep 3, 2014 2:39:35 PM
 */
public class TokenUtils {

    /**
     * @param input
     * @param reusableToken is null well new one auto.
     * @return null - if not next token or input is null.
     * @throws IOException
     */
    public static Token nextToken(TokenStream input, Token reusableToken) throws IOException {
        if (input == null) {
            return null;
        }
        if (!input.incrementToken()) {
            return null;
        }

        CharTermAttribute termAtt = input.getAttribute(CharTermAttribute.class);
        OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class);
        TypeAttribute typeAtt = input.getAttribute(TypeAttribute.class);

        if (reusableToken == null) {
            reusableToken = new Token();
        }

        reusableToken.clear();
        if (termAtt != null) {
            // lucene 3.0
            // reusableToken.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
            // lucene 3.1
            reusableToken.copyBuffer(termAtt.buffer(), 0, termAtt.length());
        }
        if (offsetAtt != null) {
            // lucene 3.1
            // reusableToken.setStartOffset(offsetAtt.startOffset());
            // reusableToken.setEndOffset(offsetAtt.endOffset());
            // lucene 4.0
            reusableToken.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
        }

        if (typeAtt != null) {
            reusableToken.setType(typeAtt.type());
        }

        return reusableToken;
    }
}