List of usage examples for org.apache.commons.codec.language RefinedSoundex RefinedSoundex
public RefinedSoundex()
From source file:com.vangent.hieos.empi.transform.RefinedSoundexTransformFunction.java
/** * //from ww w . ja v a2s . c o m * @param obj * @return */ public Object transform(Object obj) { RefinedSoundex encoder = new RefinedSoundex(); return encoder.encode((String) obj); }
From source file:ca.sqlpower.matchmaker.munge.RefinedSoundexMungeStep.java
public Boolean doCall() throws Exception { MungeStepOutput<String> out = getOut(); MungeStepOutput<String> in = getMSOInputs().get(0); String data = in.getData();//w ww .j av a 2 s.c om if (data != null) { out.setData(new RefinedSoundex().soundex(data)); } else { out.setData(null); } return true; }
From source file:com.example.PhoneticTokenFilterFactory.java
@Inject public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {//from ww w . j ava 2 s . co m super(index, indexSettingsService.getSettings(), name, settings); this.languageset = null; this.nametype = null; this.ruletype = null; this.maxcodelength = 0; this.replace = settings.getAsBoolean("replace", true); // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default String encodername = settings.get("encoder", "metaphone"); if ("metaphone".equalsIgnoreCase(encodername)) { this.encoder = new Metaphone(); } else if ("soundex".equalsIgnoreCase(encodername)) { this.encoder = new Soundex(); } else if ("caverphone1".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone1(); } else if ("caverphone2".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("caverphone".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) { this.encoder = new RefinedSoundex(); } else if ("cologne".equalsIgnoreCase(encodername)) { this.encoder = new ColognePhonetic(); } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { this.encoder = null; this.maxcodelength = settings.getAsInt("max_code_len", 4); } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { this.encoder = null; this.languageset = settings.getAsArray("languageset"); String ruleType = settings.get("rule_type", "approx"); if ("approx".equalsIgnoreCase(ruleType)) { ruletype = RuleType.APPROX; } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { throw new IllegalArgumentException( "No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { nametype = NameType.GENERIC; } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { nametype = NameType.ASHKENAZI; } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { nametype = NameType.SEPHARDIC; } } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) { this.encoder = new KoelnerPhonetik(); } else if ("haasephonetik".equalsIgnoreCase(encodername)) { this.encoder = new HaasePhonetik(); } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) { this.encoder = new DaitchMokotoffSoundex(); } else { throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } }
From source file:com.jaeksoft.searchlib.analysis.filter.PhoneticFilter.java
@Override public TokenStream create(TokenStream tokenStream) { if (BEIDER_MORSE.equals(codec)) return new BeiderMorseTokenFilter(tokenStream, new EncoderKey(ruleType, maxPhonemes)); if (COLOGNE_PHONETIC.equals(codec)) return new EncoderTokenFilter(tokenStream, new ColognePhonetic()); if (SOUNDEX.equals(codec)) return new EncoderTokenFilter(tokenStream, new Soundex()); if (REFINED_SOUNDEX.equals(codec)) return new EncoderTokenFilter(tokenStream, new RefinedSoundex()); if (METAPHONE.equals(codec)) return new EncoderTokenFilter(tokenStream, new Metaphone()); if (CAVERPHONE1.equals(codec)) return new EncoderTokenFilter(tokenStream, new Caverphone1()); if (CAVERPHONE2.equals(codec)) return new EncoderTokenFilter(tokenStream, new Caverphone2()); return null;/* w w w .j a va2 s . com*/ }
From source file:at.jps.sanction.core.util.TokenTool.java
public static float compareRefinedSoundex(final String text1, final String text2, final boolean fuzzy, final int minlen, final double fuzzyValue) { final RefinedSoundex encoder = new RefinedSoundex(); // TODO: in reallife make // this//from www .j a va 2 s . c o m // go away !! return (compareCheck(encoder.encode(text1), encoder.encode(text2), fuzzy, minlen, fuzzyValue)); }
From source file:com.perceptive.epm.perkolcentral.action.ajax.EmployeeDetailsAction.java
public String executeGetAllEmployees() throws ExceptionWrapper { try {// w w w . j a v a2 s . c om Soundex sndx = new Soundex(); DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); final StringEncoderComparator comparator1 = new StringEncoderComparator(doubleMetaphone); LoggingHelpUtil.printDebug("Page " + getPage() + " Rows " + getRows() + " Sorting Order " + getSord() + " Index Row :" + getSidx()); LoggingHelpUtil.printDebug("Search :" + searchField + " " + searchOper + " " + searchString); // Calcalate until rows ware selected int to = (rows * page); // Calculate the first row to read int from = to - rows; LinkedHashMap<Long, EmployeeBO> employeeLinkedHashMap = new LinkedHashMap<Long, EmployeeBO>(); employeeLinkedHashMap = employeeBL.getAllEmployees(); ArrayList<EmployeeBO> allEmployees = new ArrayList<EmployeeBO>(employeeLinkedHashMap.values()); //Handle search if (searchOper != null && !searchOper.trim().equalsIgnoreCase("") && searchString != null && !searchString.trim().equalsIgnoreCase("")) { if (searchOper.trim().equalsIgnoreCase("eq")) { CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return ((EmployeeBO) o).getEmployeeName().equalsIgnoreCase(searchString.trim()); //To change body of implemented methods use File | Settings | File Templates. } }); } else if (searchOper.trim().equalsIgnoreCase("slk")) { CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return (new StringEncoderComparator(new Soundex()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new DoubleMetaphone()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new Metaphone()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new RefinedSoundex()).compare( ((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0); //To change body of implemented methods use File | Settings | File Templates. } }); } else { //First check whether there is an exact match if (CollectionUtils.exists(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return (((EmployeeBO) o).getEmployeeName().toLowerCase() .contains(searchString.trim().toLowerCase())); //To change body of implemented methods use File | Settings | File Templates. } })) { CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { return (((EmployeeBO) o).getEmployeeName().toLowerCase() .contains(searchString.trim().toLowerCase())); } }); } else { ArrayList<String> matchedEmployeeIds = employeeBL.getLuceneUtil() .getBestMatchEmployeeName(searchString.trim().toLowerCase()); allEmployees = new ArrayList<EmployeeBO>(); for (String id : matchedEmployeeIds) { allEmployees.add(employeeBL.getAllEmployees().get(Long.valueOf(id))); } } } /*{ CollectionUtils.filter(allEmployees, new Predicate() { @Override public boolean evaluate(Object o) { if (((EmployeeBO) o).getEmployeeName().toLowerCase().contains(searchString.trim().toLowerCase())) return true; else if(new StringEncoderComparator(new Soundex()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new DoubleMetaphone()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0) { return true; } else { for (String empNameParts : ((EmployeeBO) o).getEmployeeName().trim().split(" ")) { if (new StringEncoderComparator(new Soundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 || new StringEncoderComparator(new DoubleMetaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 // || new StringEncoderComparator(new Metaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 // || new StringEncoderComparator(new RefinedSoundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0 ) { return true; } } return false; } } }); } */ } //// Handle Order By if (sidx != null && !sidx.equals("")) { Collections.sort(allEmployees, new Comparator<EmployeeBO>() { public int compare(EmployeeBO e1, EmployeeBO e2) { if (sidx.equalsIgnoreCase("employeeName")) return sord.equalsIgnoreCase("asc") ? e1.getEmployeeName().compareTo(e2.getEmployeeName()) : e2.getEmployeeName().compareTo(e1.getEmployeeName()); else if (sidx.equalsIgnoreCase("jobTitle")) return sord.equalsIgnoreCase("asc") ? e1.getJobTitle().compareTo(e2.getJobTitle()) : e2.getJobTitle().compareTo(e1.getJobTitle()); else if (sidx.equalsIgnoreCase("manager")) return sord.equalsIgnoreCase("asc") ? e1.getManager().compareTo(e2.getManager()) : e2.getManager().compareTo(e1.getManager()); else return sord.equalsIgnoreCase("asc") ? e1.getEmployeeName().compareTo(e2.getEmployeeName()) : e2.getEmployeeName().compareTo(e1.getEmployeeName()); } }); } // records = allEmployees.size(); total = (int) Math.ceil((double) records / (double) rows); gridModel = new ArrayList<EmployeeBO>(); to = to > records ? records : to; for (int iCounter = from; iCounter < to; iCounter++) { EmployeeBO employeeBO = allEmployees.get(iCounter); //new EmployeeBO((Employee) employeeLinkedHashMap.values().toArray()[iCounter]); gridModel.add(employeeBO); } } catch (Exception ex) { throw new ExceptionWrapper(ex); } return SUCCESS; }
From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java
public void testAlgorithms() throws Exception { assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" }); assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" }); assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" }); assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "PP", "KK", "ASKS" }); assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg", new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" }); assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg", new String[] { "A000", "B000", "C000", "E220" }); assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg", new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" }); assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg", new String[] { "A0", "B1", "C3", "E034034" }); assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" }); assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" }); assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" }); assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "C", "EASGAS" }); }
From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java
/** blast some random strings through the analyzer */ public void testRandomStrings() throws IOException { Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2() }; for (final Encoder e : encoders) { Analyzer a = new Analyzer() { @Override/*from ww w . jav a 2s .co m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false)); } }; checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); a.close(); Analyzer b = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false)); } }; checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER); b.close(); } }
From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java
public void testEmptyTerm() throws IOException { Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2() }; for (final Encoder e : encoders) { Analyzer a = new Analyzer() { @Override//from ww w . ja v a 2 s. co m protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean())); } }; checkOneTerm(a, "", ""); a.close(); } }
From source file:org.apache.solr.analysis.TestPhoneticFilter.java
public void testEncodes() throws Exception { runner(new DoubleMetaphone(), true); runner(new Metaphone(), true); runner(new Soundex(), true); runner(new RefinedSoundex(), true); runner(new DoubleMetaphone(), false); runner(new Metaphone(), false); runner(new Soundex(), false); runner(new RefinedSoundex(), false); }