Example usage for org.apache.commons.codec.language DaitchMokotoffSoundex DaitchMokotoffSoundex

List of usage examples for org.apache.commons.codec.language DaitchMokotoffSoundex DaitchMokotoffSoundex

Introduction

In this page you can find the example usage for org.apache.commons.codec.language DaitchMokotoffSoundex DaitchMokotoffSoundex.

Prototype

public DaitchMokotoffSoundex() 

Source Link

Document

Creates a new instance with ASCII-folding enabled.

Usage

From source file:com.github.stagirs.lingvo.morph.MorphPredictor.java

public static Morph get(String word) {
    MorphStateMachine.State state = MorphStateMachine.begin();
    int finish = word.length() - 1;
    for (; finish >= word.length() - 2; finish--) {
        MorphStateMachine.State curState = MorphStateMachine.getState(state, word.charAt(finish));
        if (curState == null) {
            break;
        }/*from   w  w  w.  java  2s.  c  om*/
        state = curState;
    }
    MorphIterator iterator = new MorphIterator(state);
    Morph minMorph = null;
    int minDistanceLev = Integer.MAX_VALUE;
    int minDistanceSoundex = Integer.MAX_VALUE;
    DaitchMokotoffSoundex soundex = new DaitchMokotoffSoundex();
    String translitWord = toTranslit(word);
    byte[] encoded = getBytes(soundex, translitWord);
    while (iterator.hasNext()) {
        Morph morph = iterator.next();
        int distanceLev = StringUtils.getLevenshteinDistance(word, morph.getRaw());
        if (minDistanceLev < distanceLev) {
            continue;
        }
        int distanceSoundex = getDistance(soundex, morph.getRaw(), encoded);
        if (minDistanceLev == distanceLev && minDistanceSoundex < distanceSoundex) {
            continue;
        }
        minMorph = morph;
        minDistanceLev = distanceLev;
        minDistanceSoundex = distanceSoundex;
    }
    if (minMorph == null) {
        return null;
    }
    minMorph.setWord(word);
    return minMorph;
}

From source file:com.example.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name,
        @Assisted Settings settings) {//w  w  w  .java  2s .c o m
    super(index, indexSettingsService.getSettings(), name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsArray("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}

From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java

public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name,
        Settings settings) {/*from  ww  w  . j a  v a 2  s . c  o  m*/
    super(indexSettings, name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsList("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}