List of usage examples for org.apache.commons.codec.language.bm NameType GENERIC
NameType GENERIC
To view the source code for org.apache.commons.codec.language.bm NameType GENERIC.
Click Source Link
From source file:com.jaeksoft.searchlib.analysis.filter.phonetic.BeiderMorseTokenFilter.java
public static void main(String[] args) { PhoneticEngine encoder = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true, 20); for (int i = 0; i < 10; i++) { System.out.println(encoder.encode("test")); System.out.println(encoder.encode("sample")); }//w w w . j a v a2s . c o m }
From source file:com.jaeksoft.searchlib.analysis.filter.phonetic.BeiderMorseCache.java
public PhoneticEngine getEncoder(EncoderKey encoderKey) { PhoneticEngine encoder;/* w ww. jav a 2 s . c o m*/ encodersLock.r.lock(); try { if ((encoder = encoders.get(encoderKey)) != null) return encoder; } finally { encodersLock.r.unlock(); } encodersLock.w.lock(); try { if ((encoder = encoders.get(encoderKey)) != null) return encoder; encoder = new PhoneticEngine(NameType.GENERIC, encoderKey.type, true, encoderKey.maxPhonemes); encoders.put(encoderKey, encoder); return encoder; } finally { encodersLock.w.unlock(); } }
From source file:com.example.PhoneticTokenFilterFactory.java
@Inject public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {//from w w w. j a va2 s.c o m super(index, indexSettingsService.getSettings(), name, settings); this.languageset = null; this.nametype = null; this.ruletype = null; this.maxcodelength = 0; this.replace = settings.getAsBoolean("replace", true); // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default String encodername = settings.get("encoder", "metaphone"); if ("metaphone".equalsIgnoreCase(encodername)) { this.encoder = new Metaphone(); } else if ("soundex".equalsIgnoreCase(encodername)) { this.encoder = new Soundex(); } else if ("caverphone1".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone1(); } else if ("caverphone2".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("caverphone".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) { this.encoder = new RefinedSoundex(); } else if ("cologne".equalsIgnoreCase(encodername)) { this.encoder = new ColognePhonetic(); } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { this.encoder = null; this.maxcodelength = settings.getAsInt("max_code_len", 4); } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { this.encoder = null; this.languageset = settings.getAsArray("languageset"); String ruleType = settings.get("rule_type", "approx"); if ("approx".equalsIgnoreCase(ruleType)) { ruletype = RuleType.APPROX; } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { throw new IllegalArgumentException( "No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { nametype = NameType.GENERIC; } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { nametype = NameType.ASHKENAZI; } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { nametype = NameType.SEPHARDIC; } } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) { this.encoder = new KoelnerPhonetik(); } else if ("haasephonetik".equalsIgnoreCase(encodername)) { this.encoder = new HaasePhonetik(); } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { this.encoder = new DaitchMokotoffSoundex(); } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } }
From source file:org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory.java
/** Creates a new BeiderMorseFilterFactory */ public BeiderMorseFilterFactory(Map<String, String> args) { super(args);/*www. java 2 s.co m*/ // PhoneticEngine = NameType + RuleType + concat // we use common-codec's defaults: GENERIC + APPROX + true NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString())); RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString())); boolean concat = getBoolean(args, "concat", true); engine = new PhoneticEngine(nameType, ruleType, concat); // LanguageSet: defaults to automagic, otherwise a comma-separated list. Set<String> langs = getSet(args, "languageSet"); languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null : LanguageSet.from(langs); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override/*www . j a v a 2s. c o m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true))); } }; }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
/** restrict the output to a set of possible origin languages */ public void testLanguageSet() throws Exception { final LanguageSet languages = LanguageSet.from(new HashSet<String>() { {/*from w w w .ja v a 2s .co m*/ add("italian"); add("greek"); add("spanish"); } }); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages)); } }; assertAnalyzesTo(analyzer, "Angelo", new String[] { "andZelo", "angelo", "anxelo" }, new int[] { 0, 0, 0, }, new int[] { 6, 6, 6, }, new int[] { 1, 0, 0, }); analyzer.close(); }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
public void testEmptyTerm() throws IOException { Analyzer a = new Analyzer() { @Override// ww w . ja v a 2 s .c o m protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true))); } }; checkOneTerm(a, "", ""); a.close(); }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
public void testCustomAttribute() throws IOException { TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false); ((Tokenizer) stream).setReader(new StringReader("D'Angelo")); stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*")); stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)); KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class); stream.reset();/* w ww. j av a 2 s .c o m*/ int i = 0; while (stream.incrementToken()) { assertTrue(keyAtt.isKeyword()); i++; } assertEquals(12, i); stream.end(); stream.close(); }
From source file:org.apache.solr.analysis.BeiderMorseFilterFactory.java
public void init(Map<String, String> args) { super.init(args); // PhoneticEngine = NameType + RuleType + concat // we use common-codec's defaults: GENERIC + APPROX + true String nameTypeArg = args.get("nameType"); NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg); String ruleTypeArg = args.get("ruleType"); RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg); boolean concat = getBoolean("concat", true); engine = new PhoneticEngine(nameType, ruleType, concat); // LanguageSet: defaults to automagic, otherwise a comma-separated list. String languageSetArg = args.get("languageSet"); if (languageSetArg == null || languageSetArg.equals("auto")) { languageSet = null;/*from w ww.j av a 2 s . c o m*/ } else { languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(",")))); } }
From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java
public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {// w w w .j a v a 2 s . c o m super(indexSettings, name, settings); this.languageset = null; this.nametype = null; this.ruletype = null; this.maxcodelength = 0; this.replace = settings.getAsBoolean("replace", true); // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default String encodername = settings.get("encoder", "metaphone"); if ("metaphone".equalsIgnoreCase(encodername)) { this.encoder = new Metaphone(); } else if ("soundex".equalsIgnoreCase(encodername)) { this.encoder = new Soundex(); } else if ("caverphone1".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone1(); } else if ("caverphone2".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("caverphone".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) { this.encoder = new RefinedSoundex(); } else if ("cologne".equalsIgnoreCase(encodername)) { this.encoder = new ColognePhonetic(); } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { this.encoder = null; this.maxcodelength = settings.getAsInt("max_code_len", 4); } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { this.encoder = null; this.languageset = settings.getAsList("languageset"); String ruleType = settings.get("rule_type", "approx"); if ("approx".equalsIgnoreCase(ruleType)) { ruletype = RuleType.APPROX; } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { throw new IllegalArgumentException( "No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { nametype = NameType.GENERIC; } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { nametype = NameType.ASHKENAZI; } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { nametype = NameType.SEPHARDIC; } } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) { this.encoder = new KoelnerPhonetik(); } else if ("haasephonetik".equalsIgnoreCase(encodername)) { this.encoder = new HaasePhonetik(); } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { this.encoder = new DaitchMokotoffSoundex(); } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } }