org.wltea.analyzer.sample.ThulacAnalzyerDemo.java Source code

Introduction

Here is the source code for org.wltea.analyzer.sample.ThulacAnalzyerDemo.java
Source

/**
 * IK ?   5.0.1
 * IK Analyzer release 5.0.1
 * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ??(linliangyi2005@gmail.com)??
 * ? 2012
 * provided by Linliangyi and copyright 2012 by Oolong studio
 * 
 * 
 */
package org.wltea.analyzer.sample;

import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

import com.dumpcache.thulac4solr.lucene.ThulacAnalyzer;

/**
 * ThulacAnalyzer?
 */
public class ThulacAnalzyerDemo {

    public static void main(String[] args) {
        //Thulac?smart??
        Analyzer analyzer = new ThulacAnalyzer(true);
        //?LuceneTokenStream
        TokenStream ts = null;
        try {
            long start = System.currentTimeMillis();
            ts = analyzer.tokenStream("myfield", new StringReader(
                    "?????IKAnalyer can analysis english text too"));
            //???
            OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
            //??
            CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
            //??
            TypeAttribute type = ts.addAttribute(TypeAttribute.class);

            //?TokenStream?StringReader
            ts.reset();
            //??
            while (ts.incrementToken()) {
                System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                        + " | " + type.type());
            }
            //TokenStreamStringReader
            ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
            System.out.println("wast:" + (System.currentTimeMillis() - start));
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            //TokenStream?
            if (ts != null) {
                try {
                    ts.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }

}