Java tutorial
/**************************************************************** * Licensed to the AOS Community (AOS) under one or more * * contributor license agreements. See the NOTICE file * * distributed with this work for additional information * * regarding copyright ownership. The AOS licenses this file * * to you under the Apache License, Version 2.0 (the * * "License"); you may not use this file except in compliance * * with the License. You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, * * software distributed under the License is distributed on an * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * * KIND, either express or implied. See the License for the * * specific language governing permissions and limitations * * under the License. * ****************************************************************/ package aos.lucene.analysis.i18n; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cn.ChineseAnalyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util.Version; import java.awt.Font; import java.awt.FontMetrics; import java.awt.Frame; import java.awt.Label; import java.io.IOException; import java.io.StringReader; // From chapter 4 public class ChineseDemo { private static String[] strings = { "?" }; //A private static Analyzer[] analyzers = { new SimpleAnalyzer(), new StandardAnalyzer(Version.LUCENE_46), new ChineseAnalyzer(), //B new CJKAnalyzer(Version.LUCENE_46), new SmartChineseAnalyzer(Version.LUCENE_46) }; public static void main(String... args) throws Exception { for (String string : strings) { for (Analyzer analyzer : analyzers) { analyze(string, analyzer); } } } private static void analyze(String string, Analyzer analyzer) throws IOException { StringBuffer buffer = new StringBuffer(); TokenStream stream = analyzer.tokenStream("contents", new StringReader(string)); TermAttribute term = stream.addAttribute(TermAttribute.class); while (stream.incrementToken()) { //C buffer.append("["); buffer.append(term.term()); buffer.append("] "); } String output = buffer.toString(); Frame f = new Frame(); f.setTitle(analyzer.getClass().getSimpleName() + " : " + string); f.setResizable(true); Font font = new Font(null, Font.PLAIN, 36); int width = getWidth(f.getFontMetrics(font), output); f.setSize((width < 250) ? 250 : width + 50, 75); // NOTE: if Label doesn't render the Chinese characters // properly, try using javax.swing.JLabel instead Label label = new Label(output); //D label.setSize(width, 75); label.setAlignment(Label.CENTER); label.setFont(font); f.add(label); f.setVisible(true); } private static int getWidth(FontMetrics metrics, String s) { int size = 0; int length = s.length(); for (int i = 0; i < length; i++) { size += metrics.charWidth(s.charAt(i)); } return size; } } /* #A Analyze this text #B Test these analyzers #C Retrieve tokens #D Display analysis */