Example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer close

List of usage examples for org.apache.lucene.analysis.ja JapaneseAnalyzer close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.ja JapaneseAnalyzer close.

Prototype

@Override
public void close() 

Source Link

Document

Frees persistent resources used by this Analyzer

Usage

From source file:de.escidoc.sb.common.lucene.analyzer.EscidocJapaneseAnalyzer.java

License:Open Source License

/**
 * Constructs a token stream with JapaneseAnalyzer or WhitespaceTokenizer
 * depending if text is japanese or not.
 * /*from ww  w .j av a 2 s.co  m*/
 * @param fieldName
 *            name of the Lucene Indexfield.
 * @param reader
 *            reader with field-value
 * 
 * @return TokenStream tokenStream
 * 
 * @sb
 */
@Override
public TokenStream tokenStream(final String fieldName, final Reader reader) {
    if (log.isDebugEnabled()) {
        log.debug("tokenizing with EscidocJapaneseAnalyzer");
    }
    //checkJapanese ///////////////////////////////////////////////////////
    boolean isJapanese = false;
    TokenStream whitespaceTokens = new WhitespaceTokenizer(Constants.LUCENE_VERSION, reader);
    Reader reader1 = null;
    try {
        StringBuffer tokenBuffer = new StringBuffer("");
        CharTermAttribute termAtt = whitespaceTokens.addAttribute(CharTermAttribute.class);
        whitespaceTokens.reset();
        while (whitespaceTokens.incrementToken()) {
            if (tokenBuffer.length() > 0) {
                tokenBuffer.append(" ");
            }
            tokenBuffer.append(termAtt.toString());
        }
        for (int i = 0; i < tokenBuffer.length(); i++) {
            int hexInt = Integer.parseInt(charToHex(tokenBuffer.charAt(i)), 16);
            if (hexInt > 12287 && hexInt < 13328) {
                isJapanese = true;
                break;
            }
        }
        reader1 = new StringReader(tokenBuffer.toString());
    } catch (Exception e) {
        log.error(e);
    }
    ///////////////////////////////////////////////////////////////////////

    //No Japanese, so return whitespace-tokens
    if (!isJapanese) {
        TokenStream result = new XmlWhitespaceTokenizer(reader1);
        result = new JunkFilter(result);
        result = new LowerCaseFilter(Constants.LUCENE_VERSION, result);
        return result;
    }

    //Get Japanese Tokens
    JapaneseAnalyzer analyzer = new JapaneseAnalyzer(Constants.LUCENE_VERSION);
    TokenStream japaneseTokens = analyzer.tokenStream("", reader1);
    if (analyzer != null) {
        try {
            analyzer.close();
        } catch (Exception e) {
        }
    }
    return japaneseTokens;
}