Example usage for org.apache.commons.codec.language.bm PhoneticEngine PhoneticEngine

List of usage examples for org.apache.commons.codec.language.bm PhoneticEngine PhoneticEngine

Introduction

In this page you can find the example usage for org.apache.commons.codec.language.bm PhoneticEngine PhoneticEngine.

Prototype

public PhoneticEngine(NameType nameType, RuleType ruleType, boolean concat) 

Source Link

Usage

From source file:com.example.PhoneticTokenFilterFactory.java

@Override
public TokenStream create(TokenStream tokenStream) {
    if (encoder == null) {
        if (ruletype != null && nametype != null) {
            if (languageset != null) {
                final LanguageSet languages = LanguageSet.from(new HashSet<>(Arrays.asList(languageset)));
                return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true),
                        languages);/*from w w  w . j  a va2  s .  c  o  m*/
            }
            return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true));
        }
        if (maxcodelength > 0) {
            return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace);
        }
    } else {
        return new PhoneticFilter(tokenStream, encoder, !replace);
    }
    throw new IllegalArgumentException("encoder error");
}

From source file:org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory.java

/** Creates a new BeiderMorseFilterFactory */
public BeiderMorseFilterFactory(Map<String, String> args) {
    super(args);/*w ww .jav  a2s  .c  o  m*/
    // PhoneticEngine = NameType + RuleType + concat
    // we use common-codec's defaults: GENERIC + APPROX + true
    NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString()));
    RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString()));

    boolean concat = getBoolean(args, "concat", true);
    engine = new PhoneticEngine(nameType, ruleType, concat);

    // LanguageSet: defaults to automagic, otherwise a comma-separated list.
    Set<String> langs = getSet(args, "languageSet");
    languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null
            : LanguageSet.from(langs);
    if (!args.isEmpty()) {
        throw new IllegalArgumentException("Unknown parameters: " + args);
    }
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override//from  w  w w.  j a  va 2  s.  c om
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer,
                    new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
        }
    };
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

/** restrict the output to a set of possible origin languages */
public void testLanguageSet() throws Exception {
    final LanguageSet languages = LanguageSet.from(new HashSet<String>() {
        {//  w ww . ja va 2 s. c om
            add("italian");
            add("greek");
            add("spanish");
        }
    });
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer,
                    new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
        }
    };
    assertAnalyzesTo(analyzer, "Angelo", new String[] { "andZelo", "angelo", "anxelo" }, new int[] { 0, 0, 0, },
            new int[] { 6, 6, 6, }, new int[] { 1, 0, 0, });
    analyzer.close();
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
        @Override//  w w w  .  j a v  a 2 s  .c  o m
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new KeywordTokenizer();
            return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer,
                    new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
        }
    };
    checkOneTerm(a, "", "");
    a.close();
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

public void testCustomAttribute() throws IOException {
    TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false);
    ((Tokenizer) stream).setReader(new StringReader("D'Angelo"));
    stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
    stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
    KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
    stream.reset();//w  w w.  jav a  2 s  . c o  m
    int i = 0;
    while (stream.incrementToken()) {
        assertTrue(keyAtt.isKeyword());
        i++;
    }
    assertEquals(12, i);
    stream.end();
    stream.close();
}

From source file:org.apache.solr.analysis.BeiderMorseFilterFactory.java

public void init(Map<String, String> args) {
    super.init(args);

    // PhoneticEngine = NameType + RuleType + concat
    // we use common-codec's defaults: GENERIC + APPROX + true
    String nameTypeArg = args.get("nameType");
    NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg);

    String ruleTypeArg = args.get("ruleType");
    RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);

    boolean concat = getBoolean("concat", true);
    engine = new PhoneticEngine(nameType, ruleType, concat);

    // LanguageSet: defaults to automagic, otherwise a comma-separated list.
    String languageSetArg = args.get("languageSet");
    if (languageSetArg == null || languageSetArg.equals("auto")) {
        languageSet = null;/*from w  ww  . j a  v  a 2s . co  m*/
    } else {
        languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(","))));
    }
}

From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java

@Override
public TokenStream create(TokenStream tokenStream) {
    if (encoder == null) {
        if (ruletype != null && nametype != null) {
            if (languageset != null) {
                final LanguageSet languages = LanguageSet.from(new HashSet<>(languageset));
                return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true),
                        languages);//from   w  w w.j a v a 2  s  . c om
            }
            return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true));
        }
        if (maxcodelength > 0) {
            return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace);
        }
    } else {
        return new PhoneticFilter(tokenStream, encoder, !replace);
    }
    throw new IllegalArgumentException("encoder error");
}