Example usage for org.apache.commons.codec.language Caverphone1 Caverphone1

List of usage examples for org.apache.commons.codec.language Caverphone1 Caverphone1

Introduction

In this page you can find the example usage for org.apache.commons.codec.language Caverphone1 Caverphone1.

Prototype

Caverphone1

Source Link

Usage

From source file:com.vangent.hieos.empi.transform.Caverphone1TransformFunction.java

/**
 * //w  w  w .jav  a  2s.c  o m
 * @param obj
 * @return
 */
public Object transform(Object obj) {
    Caverphone1 encoder = new Caverphone1();
    return encoder.encode((String) obj);
}

From source file:com.example.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name,
        @Assisted Settings settings) {/*from  w  w  w  .  j a va2  s . c  o m*/
    super(index, indexSettingsService.getSettings(), name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsArray("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}

From source file:com.jaeksoft.searchlib.analysis.filter.PhoneticFilter.java

@Override
public TokenStream create(TokenStream tokenStream) {
    if (BEIDER_MORSE.equals(codec))
        return new BeiderMorseTokenFilter(tokenStream, new EncoderKey(ruleType, maxPhonemes));
    if (COLOGNE_PHONETIC.equals(codec))
        return new EncoderTokenFilter(tokenStream, new ColognePhonetic());
    if (SOUNDEX.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Soundex());
    if (REFINED_SOUNDEX.equals(codec))
        return new EncoderTokenFilter(tokenStream, new RefinedSoundex());
    if (METAPHONE.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Metaphone());
    if (CAVERPHONE1.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Caverphone1());
    if (CAVERPHONE2.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Caverphone2());
    return null;//from w w  w  .  j a v a  2 s.c o  m
}

From source file:org.elasticsearch.index.analysis.phonetic.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name,
        @Assisted Settings settings) {// w  w w .j ava 2  s . co m
    super(index, indexSettings, name, settings);
    this.replace = settings.getAsBoolean("replace", true);
    String encoder = settings.get("encoder");
    if (encoder == null) {
        throw new ElasticSearchIllegalArgumentException("encoder must be set on phonetic token filter");
    }
    if ("metaphone".equalsIgnoreCase(encoder)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encoder)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encoder)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encoder) || "refinedSoundex".equalsIgnoreCase(encoder)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encoder)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encoder) || "doubleMetaphone".equalsIgnoreCase(encoder)) {
        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
        doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen()));
        this.encoder = doubleMetaphone;
    } else {
        throw new ElasticSearchIllegalArgumentException(
                "unknown encoder [" + encoder + "] for phonetic token filter");
    }
}

From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java

public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name,
        Settings settings) {//from   w ww  .j  av  a2 s . c om
    super(indexSettings, name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsList("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}

From source file:org.mitre.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {

    // populate the algorithms Map with an instance of each encoder

    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);//from w  w  w  .  j  a  v  a2 s  .c  o m
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();

    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();

    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();

    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);

    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);

    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);

}

From source file:org.opensextant.phonetic.Phoneticizer.java

public Phoneticizer() {
    // populate the algorithms Map with an instance of each encoder
    // first the ones from Apache Commons
    BeiderMorseEncoder bmExact = new BeiderMorseEncoder();
    bmExact.setRuleType(RuleType.EXACT);
    bmExact.setConcat(false);/* w w  w . j  a v  a2 s  .co m*/
    BeiderMorseEncoder bmApprox = new BeiderMorseEncoder();
    bmApprox.setRuleType(RuleType.APPROX);
    bmApprox.setConcat(false);
    // StringEncoder caver = new Caverphone();
    StringEncoder caver1 = new Caverphone1();
    StringEncoder caver2 = new Caverphone2();
    StringEncoder colgne = new ColognePhonetic();
    DoubleMetaphone doubleMeta = new DoubleMetaphone();
    doubleMeta.setMaxCodeLen(10);
    StringEncoder meta = new Metaphone();
    StringEncoder refinedSound = new RefinedSoundex();
    StringEncoder sound = new Soundex();
    // now, the home-brewed ones
    StringEncoder noop = new NullEncoder();
    StringEncoder caser = new CaseEncoder();
    StringEncoder diaRemover = new DiacriticEncoder();
    StringEncoder punctRemover = new PunctEncoder();
    StringEncoder simple0 = new SimplePhonetic0Encoder();
    StringEncoder simple0Solr = new SimplePhonetic0SolrEncoder();
    StringEncoder simple0SolrPlus = new SimplePhonetic0SolrPlusEncoder();
    StringEncoder simple1 = new SimplePhonetic1Encoder();
    StringEncoder simple2 = new SimplePhonetic2Encoder();
    // not really language encodings
    // StringEncoder qcode = new QCodec();
    // StringEncoder qpcode = new QuotedPrintableCodec();
    // StringEncoder urlcode = new URLCodec();
    algorithms.put("Beider-Morse-Exact", bmExact);
    algorithms.put("Beider-Morse-Approximate", bmApprox);
    // algorithms.put("CaverPhone", caver);
    algorithms.put("CaverPhone_1.0", caver1);
    algorithms.put("CaverPhone_2.0", caver2);
    algorithms.put("Cologne_Phonetic", colgne);
    algorithms.put("Double_Metaphone", doubleMeta);
    algorithms.put("Metaphone", meta);
    algorithms.put("Refined_Soundex", refinedSound);
    algorithms.put("Soundex", sound);
    algorithms.put("Nothing", noop);
    algorithms.put("Case_Insensitive", caser);
    algorithms.put("Diacritic_Insensitive", diaRemover);
    algorithms.put("Puncuation_Insensitive", punctRemover);
    algorithms.put("Simple_Phonetic0", simple0);
    algorithms.put("Simple_Phonetic0Solr", simple0Solr);
    algorithms.put("Simple_Phonetic0SolrPlus", simple0SolrPlus);
    algorithms.put("Simple_Phonetic1", simple1);
    algorithms.put("Simple_Phonetic2", simple2);
    // not really language encodings
    // algorithms.put("Q Code", qcode);
    // algorithms.put("Q Printable", qpcode);
    // algorithms.put("URL Code", urlcode);
}