List of usage examples for org.apache.commons.codec.language.bm PhoneticEngine PhoneticEngine
public PhoneticEngine(NameType nameType, RuleType ruleType, boolean concat)
From source file:com.example.PhoneticTokenFilterFactory.java
@Override public TokenStream create(TokenStream tokenStream) { if (encoder == null) { if (ruletype != null && nametype != null) { if (languageset != null) { final LanguageSet languages = LanguageSet.from(new HashSet<>(Arrays.asList(languageset))); return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), languages);/*from w w w . j a va2 s . c o m*/ } return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true)); } if (maxcodelength > 0) { return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace); } } else { return new PhoneticFilter(tokenStream, encoder, !replace); } throw new IllegalArgumentException("encoder error"); }
From source file:org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory.java
/** Creates a new BeiderMorseFilterFactory */ public BeiderMorseFilterFactory(Map<String, String> args) { super(args);/*w ww .jav a2s .c o m*/ // PhoneticEngine = NameType + RuleType + concat // we use common-codec's defaults: GENERIC + APPROX + true NameType nameType = NameType.valueOf(get(args, "nameType", NameType.GENERIC.toString())); RuleType ruleType = RuleType.valueOf(get(args, "ruleType", RuleType.APPROX.toString())); boolean concat = getBoolean(args, "concat", true); engine = new PhoneticEngine(nameType, ruleType, concat); // LanguageSet: defaults to automagic, otherwise a comma-separated list. Set<String> langs = getSet(args, "languageSet"); languageSet = (null == langs || (1 == langs.size() && langs.contains("auto"))) ? null : LanguageSet.from(langs); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
@Override public void setUp() throws Exception { super.setUp(); analyzer = new Analyzer() { @Override//from w w w. j a va 2 s. c om protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true))); } }; }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
/** restrict the output to a set of possible origin languages */ public void testLanguageSet() throws Exception { final LanguageSet languages = LanguageSet.from(new HashSet<String>() { {// w ww . ja va 2 s. c om add("italian"); add("greek"); add("spanish"); } }); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages)); } }; assertAnalyzesTo(analyzer, "Angelo", new String[] { "andZelo", "angelo", "anxelo" }, new int[] { 0, 0, 0, }, new int[] { 6, 6, 6, }, new int[] { 1, 0, 0, }); analyzer.close(); }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
public void testEmptyTerm() throws IOException { Analyzer a = new Analyzer() { @Override// w w w . j a v a 2 s .c o m protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true))); } }; checkOneTerm(a, "", ""); a.close(); }
From source file:org.apache.lucene.analysis.phonetic.TestBeiderMorseFilter.java
public void testCustomAttribute() throws IOException { TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false); ((Tokenizer) stream).setReader(new StringReader("D'Angelo")); stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*")); stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)); KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class); stream.reset();//w w w. jav a 2 s . c o m int i = 0; while (stream.incrementToken()) { assertTrue(keyAtt.isKeyword()); i++; } assertEquals(12, i); stream.end(); stream.close(); }
From source file:org.apache.solr.analysis.BeiderMorseFilterFactory.java
public void init(Map<String, String> args) { super.init(args); // PhoneticEngine = NameType + RuleType + concat // we use common-codec's defaults: GENERIC + APPROX + true String nameTypeArg = args.get("nameType"); NameType nameType = (nameTypeArg == null) ? NameType.GENERIC : NameType.valueOf(nameTypeArg); String ruleTypeArg = args.get("ruleType"); RuleType ruleType = (ruleTypeArg == null) ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg); boolean concat = getBoolean("concat", true); engine = new PhoneticEngine(nameType, ruleType, concat); // LanguageSet: defaults to automagic, otherwise a comma-separated list. String languageSetArg = args.get("languageSet"); if (languageSetArg == null || languageSetArg.equals("auto")) { languageSet = null;/*from w ww . j a v a 2s . co m*/ } else { languageSet = LanguageSet.from(new HashSet<String>(Arrays.asList(languageSetArg.split(",")))); } }
From source file:org.elasticsearch.index.analysis.PhoneticTokenFilterFactory.java
@Override public TokenStream create(TokenStream tokenStream) { if (encoder == null) { if (ruletype != null && nametype != null) { if (languageset != null) { final LanguageSet languages = LanguageSet.from(new HashSet<>(languageset)); return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), languages);//from w w w.j a v a 2 s . c om } return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true)); } if (maxcodelength > 0) { return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace); } } else { return new PhoneticFilter(tokenStream, encoder, !replace); } throw new IllegalArgumentException("encoder error"); }