List of usage examples for org.apache.commons.codec.language.bm RuleType APPROX
RuleType APPROX
To view the source code for org.apache.commons.codec.language.bm RuleType APPROX.
Click Source Link
From source file:com.jaeksoft.searchlib.analysis.filter.phonetic.BeiderMorseTokenFilter.java
public static void main(String[] args) { PhoneticEngine encoder = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true, 20); for (int i = 0; i < 10; i++) { System.out.println(encoder.encode("test")); System.out.println(encoder.encode("sample")); }// w w w .ja v a 2s. com }
From source file:com.example.PhoneticTokenFilterFactory.java
@Inject public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {/* ww w . ja v a 2 s.c o m*/ super(index, indexSettingsService.getSettings(), name, settings); this.languageset = null; this.nametype = null; this.ruletype = null; this.maxcodelength = 0; this.replace = settings.getAsBoolean("replace", true); // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default String encodername = settings.get("encoder", "metaphone"); if ("metaphone".equalsIgnoreCase(encodername)) { this.encoder = new Metaphone(); } else if ("soundex".equalsIgnoreCase(encodername)) { this.encoder = new Soundex(); } else if ("caverphone1".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone1(); } else if ("caverphone2".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("caverphone".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) { this.encoder = new RefinedSoundex(); } else if ("cologne".equalsIgnoreCase(encodername)) { this.encoder = new ColognePhonetic(); } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { this.encoder = null; this.maxcodelength = settings.getAsInt("max_code_len", 4); } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { this.encoder = null; this.languageset = settings.getAsArray("languageset"); String ruleType = settings.get("rule_type", "approx"); if ("approx".equalsIgnoreCase(ruleType)) { ruletype = RuleType.APPROX; } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { throw new IllegalArgumentException( "No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { nametype = NameType.GENERIC; } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { nametype = NameType.ASHKENAZI; } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { nametype = NameType.SEPHARDIC; } } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) { this.encoder = new KoelnerPhonetik(); } else if ("haasephonetik".equalsIgnoreCase(encodername)) { this.encoder = new HaasePhonetik(); } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { this.encoder = new DaitchMokotoffSoundex(); } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } }
From source file:org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory.java
/** Creates a new BeiderMorseFilterFactory */ public BeiderMorseFilterFactory(Map<String, String> args) { super(args);// w ww. j a va 2 s . c o m // PhoneticEngine = NameType + RuleType + concat // we use common-codec's defaults: GENERIC + APPROX + true NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString())); RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString())); boolean concat = getBoolean(args, "concat", true); engine = new PhoneticEngine(nameType, ruleType, concat); // LanguageSet: defaults to automagic, otherwise a comma-separated list. Set<String> langs = getSet(args, "languageSet"); languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null : LanguageSet.from(langs); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
From source file:org.apache.solr.analysis.BeiderMorseFilterFactory.java
public void init(Map<String, String> args) { super.init(args); // PhoneticEngine = NameType + RuleType + concat // we use common-codec's defaults: GENERIC + APPROX + true String nameTypeArg = args.get("nameType"); NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg); String ruleTypeArg = args.get("ruleType"); RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg); boolean concat = getBoolean("concat", true); engine = new PhoneticEngine(nameType, ruleType, concat); // LanguageSet: defaults to automagic, otherwise a comma-separated list. String languageSetArg = args.get("languageSet"); if (languageSetArg == null || languageSetArg.equals("auto")) { languageSet = null;/*w w w . j a v a 2 s .com*/ } else { languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(",")))); } }
From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java
public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {/*from w w w.j a v a 2s .co m*/ super(indexSettings, name, settings); this.languageset = null; this.nametype = null; this.ruletype = null; this.maxcodelength = 0; this.replace = settings.getAsBoolean("replace", true); // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default String encodername = settings.get("encoder", "metaphone"); if ("metaphone".equalsIgnoreCase(encodername)) { this.encoder = new Metaphone(); } else if ("soundex".equalsIgnoreCase(encodername)) { this.encoder = new Soundex(); } else if ("caverphone1".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone1(); } else if ("caverphone2".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("caverphone".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) { this.encoder = new RefinedSoundex(); } else if ("cologne".equalsIgnoreCase(encodername)) { this.encoder = new ColognePhonetic(); } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { this.encoder = null; this.maxcodelength = settings.getAsInt("max_code_len", 4); } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { this.encoder = null; this.languageset = settings.getAsList("languageset"); String ruleType = settings.get("rule_type", "approx"); if ("approx".equalsIgnoreCase(ruleType)) { ruletype = RuleType.APPROX; } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { throw new IllegalArgumentException( "No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { nametype = NameType.GENERIC; } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { nametype = NameType.ASHKENAZI; } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { nametype = NameType.SEPHARDIC; } } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) { this.encoder = new KoelnerPhonetik(); } else if ("haasephonetik".equalsIgnoreCase(encodername)) { this.encoder = new HaasePhonetik(); } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { this.encoder = new DaitchMokotoffSoundex(); } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } }
From source file:org.mitre.opensextant.phonetic.Phoneticizer.java
public Phoneticizer() { // populate the algorithms Map with an instance of each encoder // first the ones from Apache Commons BeiderMorseEncoder bmExact = new BeiderMorseEncoder(); bmExact.setRuleType(RuleType.EXACT); bmExact.setConcat(false);/*from w ww .j ava2 s . c om*/ BeiderMorseEncoder bmApprox = new BeiderMorseEncoder(); bmApprox.setRuleType(RuleType.APPROX); bmApprox.setConcat(false); // StringEncoder caver = new Caverphone(); StringEncoder caver1 = new Caverphone1(); StringEncoder caver2 = new Caverphone2(); StringEncoder colgne = new ColognePhonetic(); DoubleMetaphone doubleMeta = new DoubleMetaphone(); doubleMeta.setMaxCodeLen(10); StringEncoder meta = new Metaphone(); StringEncoder refinedSound = new RefinedSoundex(); StringEncoder sound = new Soundex(); // now, the home-brewed ones StringEncoder noop = new NullEncoder(); StringEncoder caser = new CaseEncoder(); StringEncoder diaRemover = new DiacriticEncoder(); StringEncoder punctRemover = new PunctEncoder(); StringEncoder simple0 = new SimplePhonetic0Encoder(); StringEncoder simple1 = new SimplePhonetic1Encoder(); StringEncoder simple2 = new SimplePhonetic2Encoder(); // not really language encodings // StringEncoder qcode = new QCodec(); // StringEncoder qpcode = new QuotedPrintableCodec(); // StringEncoder urlcode = new URLCodec(); algorithms.put("Beider-Morse-Exact", bmExact); algorithms.put("Beider-Morse-Approximate", bmApprox); // algorithms.put("CaverPhone", caver); algorithms.put("CaverPhone_1.0", caver1); algorithms.put("CaverPhone_2.0", caver2); algorithms.put("Cologne_Phonetic", colgne); algorithms.put("Double_Metaphone", doubleMeta); algorithms.put("Metaphone", meta); algorithms.put("Refined_Soundex", refinedSound); algorithms.put("Soundex", sound); algorithms.put("Nothing", noop); algorithms.put("Case_Insensitive", caser); algorithms.put("Diacritic_Insensitive", diaRemover); algorithms.put("Puncuation_Insensitive", punctRemover); algorithms.put("Simple_Phonetic0", simple0); algorithms.put("Simple_Phonetic1", simple1); algorithms.put("Simple_Phonetic2", simple2); // not really language encodings // algorithms.put("Q Code", qcode); // algorithms.put("Q Printable", qpcode); // algorithms.put("URL Code", urlcode); }
From source file:org.opensextant.phonetic.Phoneticizer.java
public Phoneticizer() { // populate the algorithms Map with an instance of each encoder // first the ones from Apache Commons BeiderMorseEncoder bmExact = new BeiderMorseEncoder(); bmExact.setRuleType(RuleType.EXACT); bmExact.setConcat(false);// w w w. j a v a 2 s.c o m BeiderMorseEncoder bmApprox = new BeiderMorseEncoder(); bmApprox.setRuleType(RuleType.APPROX); bmApprox.setConcat(false); // StringEncoder caver = new Caverphone(); StringEncoder caver1 = new Caverphone1(); StringEncoder caver2 = new Caverphone2(); StringEncoder colgne = new ColognePhonetic(); DoubleMetaphone doubleMeta = new DoubleMetaphone(); doubleMeta.setMaxCodeLen(10); StringEncoder meta = new Metaphone(); StringEncoder refinedSound = new RefinedSoundex(); StringEncoder sound = new Soundex(); // now, the home-brewed ones StringEncoder noop = new NullEncoder(); StringEncoder caser = new CaseEncoder(); StringEncoder diaRemover = new DiacriticEncoder(); StringEncoder punctRemover = new PunctEncoder(); StringEncoder simple0 = new SimplePhonetic0Encoder(); StringEncoder simple0Solr = new SimplePhonetic0SolrEncoder(); StringEncoder simple0SolrPlus = new SimplePhonetic0SolrPlusEncoder(); StringEncoder simple1 = new SimplePhonetic1Encoder(); StringEncoder simple2 = new SimplePhonetic2Encoder(); // not really language encodings // StringEncoder qcode = new QCodec(); // StringEncoder qpcode = new QuotedPrintableCodec(); // StringEncoder urlcode = new URLCodec(); algorithms.put("Beider-Morse-Exact", bmExact); algorithms.put("Beider-Morse-Approximate", bmApprox); // algorithms.put("CaverPhone", caver); algorithms.put("CaverPhone_1.0", caver1); algorithms.put("CaverPhone_2.0", caver2); algorithms.put("Cologne_Phonetic", colgne); algorithms.put("Double_Metaphone", doubleMeta); algorithms.put("Metaphone", meta); algorithms.put("Refined_Soundex", refinedSound); algorithms.put("Soundex", sound); algorithms.put("Nothing", noop); algorithms.put("Case_Insensitive", caser); algorithms.put("Diacritic_Insensitive", diaRemover); algorithms.put("Puncuation_Insensitive", punctRemover); algorithms.put("Simple_Phonetic0", simple0); algorithms.put("Simple_Phonetic0Solr", simple0Solr); algorithms.put("Simple_Phonetic0SolrPlus", simple0SolrPlus); algorithms.put("Simple_Phonetic1", simple1); algorithms.put("Simple_Phonetic2", simple2); // not really language encodings // algorithms.put("Q Code", qcode); // algorithms.put("Q Printable", qpcode); // algorithms.put("URL Code", urlcode); }