Example usage for org.apache.lucene.analysis.icu ICUNormalizer2CharFilter ICUNormalizer2CharFilter

List of usage examples for org.apache.lucene.analysis.icu ICUNormalizer2CharFilter ICUNormalizer2CharFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.icu ICUNormalizer2CharFilter ICUNormalizer2CharFilter.

Prototype

public ICUNormalizer2CharFilter(Reader in) 

Source Link

Document

Create a new Normalizer2CharFilter that combines NFKC normalization, Case Folding, and removes Default Ignorables (NFKC_Casefold)

Usage

From source file:org.elasticsearch.indices.analysis.IcuIndicesAnalysis.java

License:Apache License

@Inject
public IcuIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) {
    super(settings);

    indicesAnalysisService.tokenizerFactories().put("icu_tokenizer",
            new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
                @Override//  www  .jav a  2s.  c o  m
                public String name() {
                    return "icu_tokenizer";
                }

                @Override
                public Tokenizer create() {
                    return new ICUTokenizer();
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_normalizer",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_normalizer";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream,
                            Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_folding",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_folding";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new ICUFoldingFilter(tokenStream);
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_collation",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_collation";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new ICUCollationKeyFilter(tokenStream, Collator.getInstance());
                }
            }));

    indicesAnalysisService.tokenFilterFactories().put("icu_transform",
            new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
                @Override
                public String name() {
                    return "icu_transform";
                }

                @Override
                public TokenStream create(TokenStream tokenStream) {
                    return new ICUTransformFilter(tokenStream,
                            Transliterator.getInstance("Null", Transliterator.FORWARD));
                }
            }));

    indicesAnalysisService.charFilterFactories().put("icu_normalizer",
            new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() {
                @Override
                public String name() {
                    return "icu_normalizer";
                }

                @Override
                public Reader create(Reader reader) {
                    return new ICUNormalizer2CharFilter(reader);
                }
            }));
}