Example usage for org.apache.commons.codec.language.bm RuleType APPROX

List of usage examples for org.apache.commons.codec.language.bm RuleType APPROX

Introduction

In this page you can find the example usage for org.apache.commons.codec.language.bm RuleType APPROX.

Prototype

RuleType APPROX

To view the source code for org.apache.commons.codec.language.bm RuleType APPROX.

Click Source Link

Usage

From source file:com.jaeksoft.searchlib.analysis.filter.phonetic.BeiderMorseTokenFilter.java

public static void main(String[] args) {
    PhoneticEngine encoder = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true, 20);
    for (int i = 0; i < 10; i++) {
        System.out.println(encoder.encode("test"));
        System.out.println(encoder.encode("sample"));
    }//  w  w  w  .ja  v a 2s.  com
}

From source file:com.example.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name,
        @Assisted Settings settings) {/* ww  w . ja  v  a  2  s.c o m*/
    super(index, indexSettingsService.getSettings(), name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsArray("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}

From source file:org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory.java

/** Creates a new BeiderMorseFilterFactory */
public BeiderMorseFilterFactory(Map<String, String> args) {
    super(args);//  w ww. j  a va 2 s .  c o m
    // PhoneticEngine = NameType + RuleType + concat
    // we use common-codec's defaults: GENERIC + APPROX + true
    NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString()));
    RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString()));

    boolean concat = getBoolean(args, "concat", true);
    engine = new PhoneticEngine(nameType, ruleType, concat);

    // LanguageSet: defaults to automagic, otherwise a comma-separated list.
    Set<String> langs = getSet(args, "languageSet");
    languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null
            : LanguageSet.from(langs);
    if (!args.isEmpty()) {
        throw new IllegalArgumentException("Unknown parameters: " + args);
    }
}

From source file:org.apache.solr.analysis.BeiderMorseFilterFactory.java

public void init(Map<String, String> args) {
    super.init(args);

    // PhoneticEngine = NameType + RuleType + concat
    // we use common-codec's defaults: GENERIC + APPROX + true
    String nameTypeArg = args.get("nameType");
    NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg);

    String ruleTypeArg = args.get("ruleType");
    RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);

    boolean concat = getBoolean("concat", true);
    engine = new PhoneticEngine(nameType, ruleType, concat);

    // LanguageSet: defaults to automagic, otherwise a comma-separated list.
    String languageSetArg = args.get("languageSet");
    if (languageSetArg == null || languageSetArg.equals("auto")) {
        languageSet = null;/*w w w  . j  a  v a 2 s  .com*/
    } else {
        languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(","))));
    }
}

From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java

public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name,
        Settings settings) {/*from w w w.j a  v a  2s  .co  m*/
    super(indexSettings, name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsList("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}

From source file:org.mitre.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {

    // populate the algorithms Map with an instance of each encoder

    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);/*from   w  ww .j  ava2  s  . c om*/
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();

    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();

    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();

    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);

    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);

    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);

}

From source file:org.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {
    // populate the algorithms Map with an instance of each encoder
    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);// w w  w.  j  a  v  a  2 s.c  o m
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();
    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple0Solr = new SimplePhonetic0SolrEncoder();
    StringEncoder simple0SolrPlus = new SimplePhonetic0SolrPlusEncoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();
    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();
    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);
    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic0Solr", simple0Solr);
    algorithms.put("Simple_Phonetic0SolrPlus", simple0SolrPlus);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);
    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);
}