Java tutorial
/** * BeehiveZ is a business process model and instance management system. * Copyright (C) 2011 * Institute of Information System and Engineering, School of Software, Tsinghua University, * Beijing, China * * Contact: jintao05@gmail.com * * This program is a free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation with the version of 2. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package cn.edu.thss.iise.beehivez.server.index.luceneindex.analyzer; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; /** * @author Tao Jin * */ public class SemicolonAnalyzer extends Analyzer { /* * (non-Javadoc) * * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, * java.io.Reader) */ @Override public TokenStream tokenStream(String fieldName, Reader reader) { return new SemicolonTokenizer(reader); } @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); if (tokenizer == null) { tokenizer = new SemicolonTokenizer(reader); setPreviousTokenStream(tokenizer); } else tokenizer.reset(reader); return tokenizer; } /** * @param args */ public static void main(String[] args) throws IOException { // text to tokenize final String text = "This is a demo of , the new TokenStream API"; SemicolonAnalyzer analyzer = new SemicolonAnalyzer(); TokenStream stream = analyzer.tokenStream("field", new StringReader(text)); // get the TermAttribute from the TokenStream TermAttribute termAtt = stream.addAttribute(TermAttribute.class); stream.reset(); // print all tokens until stream is exhausted while (stream.incrementToken()) { System.out.println(termAtt.term()); } stream.end(); stream.close(); } }