Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.wltea.analyzer.ikanalyzer; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.wltea.analyzer.lucene.IKAnalyzer; /** * IKAnalyzer? * */ public class IKAnalzyerCase { public static void main(String[] args) { ArrayList<String> Data = getTopicWord("?1"); System.out.println(Data); } public static ArrayList<String> getTopicWord(String str) { // IK?smart?? Analyzer analyzer = new IKAnalyzer(true); ArrayList<String> retData = new ArrayList<String>(); // ?LuceneTokenStream TokenStream ts = null; try { ts = analyzer.tokenStream("myfield", new StringReader(str)); // ??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); // ?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); // ?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); // ?TokenStream?StringReader ts.reset(); // ?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); if (term.toString().length() > 1 || term.toString().matches("^[0-9]*$")) { retData.add(term.toString()); } } // TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final // offset. } catch (IOException e) { e.printStackTrace(); } finally { // TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } return retData; } }