org.wltea.analyzer.ikanalyzer.IKAnalzyerCase.java Source code

Java tutorial

Introduction

Here is the source code for org.wltea.analyzer.ikanalyzer.IKAnalzyerCase.java

Source

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.wltea.analyzer.ikanalyzer;

 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.wltea.analyzer.lucene.IKAnalyzer;

 /**
  * IKAnalyzer?
  *
  */
 public class IKAnalzyerCase {
public static void main(String[] args) {
   ArrayList<String> Data = getTopicWord("?1");
   System.out.println(Data);
}

     public static ArrayList<String> getTopicWord(String str) {
         // IK?smart??
         Analyzer analyzer = new IKAnalyzer(true);
         ArrayList<String> retData = new ArrayList<String>();
         // ?LuceneTokenStream
         TokenStream ts = null;
         try {
             ts = analyzer.tokenStream("myfield", new StringReader(str));
             // ???
             OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
             // ??
             CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
             // ??
             TypeAttribute type = ts.addAttribute(TypeAttribute.class);

             // ?TokenStream?StringReader
             ts.reset();
             // ??
             while (ts.incrementToken()) {
                 System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                         + " | " + type.type());
                 if (term.toString().length() > 1 || term.toString().matches("^[0-9]*$")) {
                     retData.add(term.toString());
                 }
             }
             // TokenStreamStringReader
             ts.end(); // Perform end-of-stream operations, e.g. set the final
                       // offset.

         } catch (IOException e) {
             e.printStackTrace();
         } finally {
             // TokenStream?
             if (ts != null) {
                 try {
                     ts.close();
                 } catch (IOException e) {
                     e.printStackTrace();
                 }
             }
         }
         return retData;
     }

 }