Example usage for org.apache.commons.codec.language RefinedSoundex RefinedSoundex

List of usage examples for org.apache.commons.codec.language RefinedSoundex RefinedSoundex

Introduction

In this page you can find the example usage for org.apache.commons.codec.language RefinedSoundex RefinedSoundex.

Prototype

public RefinedSoundex() 

Source Link

Document

Creates an instance of the RefinedSoundex object using the default US English mapping.

Usage

From source file:com.vangent.hieos.empi.transform.RefinedSoundexTransformFunction.java

/**
 * //from ww w  .  ja  v  a2s . c  o  m
 * @param obj
 * @return
 */
public Object transform(Object obj) {
    RefinedSoundex encoder = new RefinedSoundex();
    return encoder.encode((String) obj);
}

From source file:ca.sqlpower.matchmaker.munge.RefinedSoundexMungeStep.java

public Boolean doCall() throws Exception {

    MungeStepOutput<String> out = getOut();
    MungeStepOutput<String> in = getMSOInputs().get(0);
    String data = in.getData();//w  ww .j  av a  2  s.c  om
    if (data != null) {
        out.setData(new RefinedSoundex().soundex(data));
    } else {
        out.setData(null);
    }
    return true;
}

From source file:com.example.PhoneticTokenFilterFactory.java

@Inject
public PhoneticTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name,
        @Assisted Settings settings) {//from ww  w . j ava 2 s . co  m
    super(index, indexSettingsService.getSettings(), name, settings);
    this.languageset = null;
    this.nametype = null;
    this.ruletype = null;
    this.maxcodelength = 0;
    this.replace = settings.getAsBoolean("replace", true);
    // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default
    String encodername = settings.get("encoder", "metaphone");
    if ("metaphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Metaphone();
    } else if ("soundex".equalsIgnoreCase(encodername)) {
        this.encoder = new Soundex();
    } else if ("caverphone1".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone1();
    } else if ("caverphone2".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("caverphone".equalsIgnoreCase(encodername)) {
        this.encoder = new Caverphone2();
    } else if ("refined_soundex".equalsIgnoreCase(encodername)
            || "refinedSoundex".equalsIgnoreCase(encodername)) {
        this.encoder = new RefinedSoundex();
    } else if ("cologne".equalsIgnoreCase(encodername)) {
        this.encoder = new ColognePhonetic();
    } else if ("double_metaphone".equalsIgnoreCase(encodername)
            || "doubleMetaphone".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.maxcodelength = settings.getAsInt("max_code_len", 4);
    } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername)
            || "beidermorse".equalsIgnoreCase(encodername)) {
        this.encoder = null;
        this.languageset = settings.getAsArray("languageset");
        String ruleType = settings.get("rule_type", "approx");
        if ("approx".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.APPROX;
        } else if ("exact".equalsIgnoreCase(ruleType)) {
            ruletype = RuleType.EXACT;
        } else {
            throw new IllegalArgumentException(
                    "No matching rule type [" + ruleType + "] for beider morse encoder");
        }
        String nameType = settings.get("name_type", "generic");
        if ("GENERIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.GENERIC;
        } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) {
            nametype = NameType.ASHKENAZI;
        } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) {
            nametype = NameType.SEPHARDIC;
        }
    } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new KoelnerPhonetik();
    } else if ("haasephonetik".equalsIgnoreCase(encodername)) {
        this.encoder = new HaasePhonetik();
    } else if ("nysiis".equalsIgnoreCase(encodername)) {
        this.encoder = new Nysiis();
    } else if ("daitch_mokotoff".equalsIgnoreCase(encodername)) {
        this.encoder = new DaitchMokotoffSoundex();
    } else {
        throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter");
    }
}

From source file:com.jaeksoft.searchlib.analysis.filter.PhoneticFilter.java

@Override
public TokenStream create(TokenStream tokenStream) {
    if (BEIDER_MORSE.equals(codec))
        return new BeiderMorseTokenFilter(tokenStream, new EncoderKey(ruleType, maxPhonemes));
    if (COLOGNE_PHONETIC.equals(codec))
        return new EncoderTokenFilter(tokenStream, new ColognePhonetic());
    if (SOUNDEX.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Soundex());
    if (REFINED_SOUNDEX.equals(codec))
        return new EncoderTokenFilter(tokenStream, new RefinedSoundex());
    if (METAPHONE.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Metaphone());
    if (CAVERPHONE1.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Caverphone1());
    if (CAVERPHONE2.equals(codec))
        return new EncoderTokenFilter(tokenStream, new Caverphone2());
    return null;/*  w w  w .j  a  va2  s .  com*/
}

From source file:at.jps.sanction.core.util.TokenTool.java

public static float compareRefinedSoundex(final String text1, final String text2, final boolean fuzzy,
        final int minlen, final double fuzzyValue) {

    final RefinedSoundex encoder = new RefinedSoundex(); // TODO: in reallife make
    // this//from   www  .j a  va  2  s . c o  m
    // go away !!

    return (compareCheck(encoder.encode(text1), encoder.encode(text2), fuzzy, minlen, fuzzyValue));

}

From source file:com.perceptive.epm.perkolcentral.action.ajax.EmployeeDetailsAction.java

public String executeGetAllEmployees() throws ExceptionWrapper {
    try {// w w  w .  j a v  a2  s .  c  om
        Soundex sndx = new Soundex();
        DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
        final StringEncoderComparator comparator1 = new StringEncoderComparator(doubleMetaphone);

        LoggingHelpUtil.printDebug("Page " + getPage() + " Rows " + getRows() + " Sorting Order " + getSord()
                + " Index Row :" + getSidx());
        LoggingHelpUtil.printDebug("Search :" + searchField + " " + searchOper + " " + searchString);

        // Calcalate until rows ware selected
        int to = (rows * page);

        // Calculate the first row to read
        int from = to - rows;
        LinkedHashMap<Long, EmployeeBO> employeeLinkedHashMap = new LinkedHashMap<Long, EmployeeBO>();

        employeeLinkedHashMap = employeeBL.getAllEmployees();
        ArrayList<EmployeeBO> allEmployees = new ArrayList<EmployeeBO>(employeeLinkedHashMap.values());
        //Handle search
        if (searchOper != null && !searchOper.trim().equalsIgnoreCase("") && searchString != null
                && !searchString.trim().equalsIgnoreCase("")) {
            if (searchOper.trim().equalsIgnoreCase("eq")) {
                CollectionUtils.filter(allEmployees, new Predicate() {
                    @Override
                    public boolean evaluate(Object o) {
                        return ((EmployeeBO) o).getEmployeeName().equalsIgnoreCase(searchString.trim()); //To change body of implemented methods use File | Settings | File Templates.
                    }
                });
            } else if (searchOper.trim().equalsIgnoreCase("slk")) {
                CollectionUtils.filter(allEmployees, new Predicate() {
                    @Override
                    public boolean evaluate(Object o) {
                        return (new StringEncoderComparator(new Soundex()).compare(
                                ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new DoubleMetaphone()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new Metaphone()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0
                                || new StringEncoderComparator(new RefinedSoundex()).compare(
                                        ((EmployeeBO) o).getEmployeeName().toLowerCase(),
                                        searchString.trim().toLowerCase()) == 0); //To change body of implemented methods use File | Settings | File Templates.
                    }
                });
            } else {
                //First check whether there is an exact match
                if (CollectionUtils.exists(allEmployees, new Predicate() {
                    @Override
                    public boolean evaluate(Object o) {
                        return (((EmployeeBO) o).getEmployeeName().toLowerCase()
                                .contains(searchString.trim().toLowerCase())); //To change body of implemented methods use File | Settings | File Templates.
                    }
                })) {
                    CollectionUtils.filter(allEmployees, new Predicate() {
                        @Override
                        public boolean evaluate(Object o) {
                            return (((EmployeeBO) o).getEmployeeName().toLowerCase()
                                    .contains(searchString.trim().toLowerCase()));
                        }
                    });
                } else {
                    ArrayList<String> matchedEmployeeIds = employeeBL.getLuceneUtil()
                            .getBestMatchEmployeeName(searchString.trim().toLowerCase());
                    allEmployees = new ArrayList<EmployeeBO>();
                    for (String id : matchedEmployeeIds) {
                        allEmployees.add(employeeBL.getAllEmployees().get(Long.valueOf(id)));
                    }
                }
            }

            /*{
            CollectionUtils.filter(allEmployees, new Predicate() {
                @Override
                public boolean evaluate(Object o) {
                    if (((EmployeeBO) o).getEmployeeName().toLowerCase().contains(searchString.trim().toLowerCase()))
                        return true;
                    else if(new StringEncoderComparator(new Soundex()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0
                            || new StringEncoderComparator(new DoubleMetaphone()).compare(((EmployeeBO) o).getEmployeeName().toLowerCase(), searchString.trim().toLowerCase()) == 0)
                    {
                        return true;
                    }
                    else {
                        for (String empNameParts : ((EmployeeBO) o).getEmployeeName().trim().split(" ")) {
                            if (new StringEncoderComparator(new Soundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                    || new StringEncoderComparator(new DoubleMetaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                //    || new StringEncoderComparator(new Metaphone()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                //    || new StringEncoderComparator(new RefinedSoundex()).compare(empNameParts.toLowerCase(), searchString.trim().toLowerCase()) == 0
                                    ) {
                                return true;
                            }
                        }
                        return false;
                    }
                    
                    
                }
            });
            } */
        }
        //// Handle Order By
        if (sidx != null && !sidx.equals("")) {

            Collections.sort(allEmployees, new Comparator<EmployeeBO>() {
                public int compare(EmployeeBO e1, EmployeeBO e2) {
                    if (sidx.equalsIgnoreCase("employeeName"))
                        return sord.equalsIgnoreCase("asc")
                                ? e1.getEmployeeName().compareTo(e2.getEmployeeName())
                                : e2.getEmployeeName().compareTo(e1.getEmployeeName());
                    else if (sidx.equalsIgnoreCase("jobTitle"))
                        return sord.equalsIgnoreCase("asc") ? e1.getJobTitle().compareTo(e2.getJobTitle())
                                : e2.getJobTitle().compareTo(e1.getJobTitle());
                    else if (sidx.equalsIgnoreCase("manager"))
                        return sord.equalsIgnoreCase("asc") ? e1.getManager().compareTo(e2.getManager())
                                : e2.getManager().compareTo(e1.getManager());
                    else
                        return sord.equalsIgnoreCase("asc")
                                ? e1.getEmployeeName().compareTo(e2.getEmployeeName())
                                : e2.getEmployeeName().compareTo(e1.getEmployeeName());
                }
            });

        }
        //

        records = allEmployees.size();
        total = (int) Math.ceil((double) records / (double) rows);

        gridModel = new ArrayList<EmployeeBO>();
        to = to > records ? records : to;
        for (int iCounter = from; iCounter < to; iCounter++) {
            EmployeeBO employeeBO = allEmployees.get(iCounter);
            //new EmployeeBO((Employee) employeeLinkedHashMap.values().toArray()[iCounter]);
            gridModel.add(employeeBO);
        }

    } catch (Exception ex) {
        throw new ExceptionWrapper(ex);

    }
    return SUCCESS;
}

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

public void testAlgorithms() throws Exception {
    assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
    assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" });

    assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
    assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg",
            new String[] { "A", "PP", "KK", "ASKS" });

    assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg",
            new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
    assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg",
            new String[] { "A000", "B000", "C000", "E220" });

    assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg",
            new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
    assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg",
            new String[] { "A0", "B1", "C3", "E034034" });

    assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene", new String[] { "TTA1111111",
            "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" });
    assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
            new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });

    assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg",
            new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
    assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "C", "EASGAS" });
}

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

/** blast some random strings through the analyzer */
public void testRandomStrings() throws IOException {
    Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(),
            new RefinedSoundex(), new Caverphone2() };

    for (final Encoder e : encoders) {
        Analyzer a = new Analyzer() {
            @Override/*from ww w .  jav a  2s .co  m*/
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
            }
        };

        checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
        a.close();

        Analyzer b = new Analyzer() {
            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
            }
        };

        checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
        b.close();
    }
}

From source file:org.apache.lucene.analysis.phonetic.TestPhoneticFilter.java

public void testEmptyTerm() throws IOException {
    Encoder encoders[] = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(),
            new RefinedSoundex(), new Caverphone2() };
    for (final Encoder e : encoders) {
        Analyzer a = new Analyzer() {
            @Override//from ww  w  . ja  v  a  2 s.  co m
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new KeywordTokenizer();
                return new TokenStreamComponents(tokenizer,
                        new PhoneticFilter(tokenizer, e, random().nextBoolean()));
            }
        };
        checkOneTerm(a, "", "");
        a.close();
    }
}

From source file:org.apache.solr.analysis.TestPhoneticFilter.java

public void testEncodes() throws Exception {
    runner(new DoubleMetaphone(), true);
    runner(new Metaphone(), true);
    runner(new Soundex(), true);
    runner(new RefinedSoundex(), true);

    runner(new DoubleMetaphone(), false);
    runner(new Metaphone(), false);
    runner(new Soundex(), false);
    runner(new RefinedSoundex(), false);
}