Example usage for org.apache.commons.codec.language.bm NameType GENERIC

List of usage examples for org.apache.commons.codec.language.bm NameType GENERIC

Introduction

In this page you can find the example usage for org.apache.commons.codec.language.bm NameType GENERIC.

Prototype

NameType GENERIC

To view the source code for org.apache.commons.codec.language.bm NameType GENERIC.

Click Source Link

Usage

From source file:com.jaeksoft.searchlib.analysis.filter.phonetic.BeiderMorseTokenFilter.java

public static void main(String[] args) {
    PhoneticEngine encoder = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true, 20);
    for (int i = 0; i < 10; i++) {
        System.out.println(encoder.encode("test"));
        System.out.println(encoder.encode("sample"));
    }//w w w  . j a v a2s .  c  o m
}

From source file:com.jaeksoft.searchlib.analysis.filter.phonetic.BeiderMorseCache.java

public PhoneticEngine getEncoder(EncoderKey encoderKey) {
    PhoneticEngine encoder;/*  w ww.  jav a  2 s . c o m*/
    encodersLock.r.lock();
    try {
        if ((encoder = encoders.get(encoderKey)) != null)
            return encoder;
    } finally {
        encodersLock.r.unlock();
    }
    encodersLock.w.lock();
    try {
        if ((encoder = encoders.get(encoderKey)) != null)
            return encoder;
        encoder = new PhoneticEngine(NameType.GENERIC, encoderKey.type, true, encoderKey.maxPhonemes);
        encoders.put(encoderKey, encoder);
        return encoder;
    } finally {
        encodersLock.w.unlock();
    }
}

From source file:com.example.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name,
        @Assisted Settings settings) {//from   w  w  w.  j a  va2 s.c  o  m
    super(index, indexSettingsService.getSettings(), name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsArray("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}

From source file:org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory.java

/** Creates a new BeiderMorseFilterFactory */
public BeiderMorseFilterFactory(Map<String, String> args) {
    super(args);/*www.  java  2  s.co m*/
    // PhoneticEngine = NameType + RuleType + concat
    // we use common-codec's defaults: GENERIC + APPROX + true
    NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString()));
    RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString()));

    boolean concat = getBoolean(args, "concat", true);
    engine = new PhoneticEngine(nameType, ruleType, concat);

    // LanguageSet: defaults to automagic, otherwise a comma-separated list.
    Set<String> langs = getSet(args, "languageSet");
    languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null
            : LanguageSet.from(langs);
    if (!args.isEmpty()) {
        throw new IllegalArgumentException("Unknown parameters: " + args);
    }
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

@Override
public void setUp() throws Exception {
    super.setUp();
    analyzer = new Analyzer() {
        @Override/*www . j a v a  2s. c o  m*/
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer,
                    new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
        }
    };
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

/** restrict the output to a set of possible origin languages */
public void testLanguageSet() throws Exception {
    final LanguageSet languages = LanguageSet.from(new HashSet<String>() {
        {/*from   w w  w .ja  v  a  2s  .co  m*/
            add("italian");
            add("greek");
            add("spanish");
        }
    });
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer,
                    new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
        }
    };
    assertAnalyzesTo(analyzer, "Angelo", new String[] { "andZelo", "angelo", "anxelo" }, new int[] { 0, 0, 0, },
            new int[] { 6, 6, 6, }, new int[] { 1, 0, 0, });
    analyzer.close();
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

public void testEmptyTerm() throws IOException {
    Analyzer a = new Analyzer() {
        @Override//  ww  w  .  ja  v a  2 s .c  o  m
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new KeywordTokenizer();
            return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer,
                    new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
        }
    };
    checkOneTerm(a, "", "");
    a.close();
}

From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java

public void testCustomAttribute() throws IOException {
    TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false);
    ((Tokenizer) stream).setReader(new StringReader("D'Angelo"));
    stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
    stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
    KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
    stream.reset();/*  w ww. j  av  a  2 s  .c  o m*/
    int i = 0;
    while (stream.incrementToken()) {
        assertTrue(keyAtt.isKeyword());
        i++;
    }
    assertEquals(12, i);
    stream.end();
    stream.close();
}

From source file:org.apache.solr.analysis.BeiderMorseFilterFactory.java

public void init(Map<String, String> args) {
    super.init(args);

    // PhoneticEngine = NameType + RuleType + concat
    // we use common-codec's defaults: GENERIC + APPROX + true
    String nameTypeArg = args.get("nameType");
    NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg);

    String ruleTypeArg = args.get("ruleType");
    RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);

    boolean concat = getBoolean("concat", true);
    engine = new PhoneticEngine(nameType, ruleType, concat);

    // LanguageSet: defaults to automagic, otherwise a comma-separated list.
    String languageSetArg = args.get("languageSet");
    if (languageSetArg == null || languageSetArg.equals("auto")) {
        languageSet = null;/*from w ww.j  av a 2 s .  c  o m*/
    } else {
        languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(","))));
    }
}

From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java

public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name,
        Settings settings) {//  w  w  w .j  a v  a 2 s .  c  o m
    super(indexSettings, name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsList("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}