Example usage for org.apache.lucene.analysis.icu.segmentation ICUTokenizer ICUTokenizer

List of usage examples for org.apache.lucene.analysis.icu.segmentation ICUTokenizer ICUTokenizer

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.icu.segmentation ICUTokenizer ICUTokenizer.

Prototype

public ICUTokenizer() 

Source Link

Document

Construct a new ICUTokenizer that breaks text into words from the given Reader.

Usage

From source file:org.elasticsearch.index.analysis.IcuTokenizerFactory.java

License:Apache License

@Override
public Tokenizer create() {
    return new ICUTokenizer();
}

From source file:org.elasticsearch.indices.analysis.IcuIndicesAnalysis.java

License:Apache License

@Inject
public IcuIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) {
    super(settings);

    indicesAnalysisService.tokenizerFactories().put("icu_tokenizer",
            new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
                @Override/*from ww  w .j a v  a  2  s  .co  m*/
                public String name() {
                    return "icu_tokenizer";
                }

                @Override
                public Tokenizer create() {
                    return new ICUTokenizer();
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_normalizer",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_normalizer";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream,
                            Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_folding",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_folding";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new ICUFoldingFilter(tokenStream);
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_collation",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_collation";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new ICUCollationKeyFilter(tokenStream, Collator.getInstance());
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_transform",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_transform";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new ICUTransformFilter(tokenStream,
                            Transliterator.getInstance("Null", Transliterator.FORWARD));
                }
            }));

    indicesAnalysisService.charFilterFactories().put("icu_normalizer",
            new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
                @Override
                public String name() {
                    return "icu_normalizer";
                }

                @Override
                public Reader create(Reader reader) {
                    return new ICUNormalizer2CharFilter(reader);
                }
            }));
}