List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer close
@Override public void close()
From source file:de.escidoc.sb.common.lucene.analyzer.EscidocJapaneseAnalyzer.java
License:Open Source License
/** * Constructs a token stream with JapaneseAnalyzer or WhitespaceTokenizer * depending if text is japanese or not. * /*from ww w .j av a 2 s.co m*/ * @param fieldName * name of the Lucene Indexfield. * @param reader * reader with field-value * * @return TokenStream tokenStream * * @sb */ @Override public TokenStream tokenStream(final String fieldName, final Reader reader) { if (log.isDebugEnabled()) { log.debug("tokenizing with EscidocJapaneseAnalyzer"); } //checkJapanese /////////////////////////////////////////////////////// boolean isJapanese = false; TokenStream whitespaceTokens = new WhitespaceTokenizer(Constants.LUCENE_VERSION, reader); Reader reader1 = null; try { StringBuffer tokenBuffer = new StringBuffer(""); CharTermAttribute termAtt = whitespaceTokens.addAttribute(CharTermAttribute.class); whitespaceTokens.reset(); while (whitespaceTokens.incrementToken()) { if (tokenBuffer.length() > 0) { tokenBuffer.append(" "); } tokenBuffer.append(termAtt.toString()); } for (int i = 0; i < tokenBuffer.length(); i++) { int hexInt = Integer.parseInt(charToHex(tokenBuffer.charAt(i)), 16); if (hexInt > 12287 && hexInt < 13328) { isJapanese = true; break; } } reader1 = new StringReader(tokenBuffer.toString()); } catch (Exception e) { log.error(e); } /////////////////////////////////////////////////////////////////////// //No Japanese, so return whitespace-tokens if (!isJapanese) { TokenStream result = new XmlWhitespaceTokenizer(reader1); result = new JunkFilter(result); result = new LowerCaseFilter(Constants.LUCENE_VERSION, result); return result; } //Get Japanese Tokens JapaneseAnalyzer analyzer = new JapaneseAnalyzer(Constants.LUCENE_VERSION); TokenStream japaneseTokens = analyzer.tokenStream("", reader1); if (analyzer != null) { try { analyzer.close(); } catch (Exception e) { } } return japaneseTokens; }