Example usage for org.apache.commons.codec StringEncoder encode

List of usage examples for org.apache.commons.codec StringEncoder encode

Introduction

In this page you can find the example usage for org.apache.commons.codec StringEncoder encode.

Prototype

String encode(String pString) throws EncoderException;

Source Link

Document

Encodes a String and returns a String.

Usage

From source file:de.tudarmstadt.ukp.dkpro.tc.features.ngram.util.NGramUtils.java

public static FrequencyDistribution<String> getDocumentPhoneticNgrams(JCas jcas, int minN, int maxN)
        throws TextClassificationException {
    StringEncoder encoder;
    String languageCode = jcas.getDocumentLanguage();

    if (languageCode.equals("en")) {
        encoder = new Soundex();
    } else if (languageCode.equals("de")) {
        encoder = new ColognePhonetic();
    } else {//from ww  w. ja  va 2 s  .  co  m
        throw new TextClassificationException(
                "Language code '" + languageCode + "' not supported by phonetic ngrams FE.");
    }

    FrequencyDistribution<String> phoneticNgrams = new FrequencyDistribution<String>();
    for (Sentence s : select(jcas, Sentence.class)) {
        List<String> phoneticStrings = new ArrayList<String>();
        for (Token t : JCasUtil.selectCovered(jcas, Token.class, s)) {
            try {
                phoneticStrings.add(encoder.encode(t.getCoveredText()));
            } catch (EncoderException e) {
                throw new TextClassificationException(e);
            }
        }
        String[] array = phoneticStrings.toArray(new String[phoneticStrings.size()]);

        for (List<String> ngram : new NGramStringListIterable(array, minN, maxN)) {
            phoneticNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));

        }
    }
    return phoneticNgrams;
}

From source file:com.github.jinahya.codec.commons.RareStringEncoderProxyTest.java

@Test
public void testAsStringEncoder() throws EncoderException {

    final StringEncoder encoder = (StringEncoder) RareStringEncoderProxy.newInstance();

    try {//from w w w .j av a2s.  c o m
        encoder.encode((String) null);
        Assert.fail("passed: encode((String) null)");
    } catch (final NullPointerException npe) {
        // expected
    }

    final String expected = "";
    final String actual = encoder.encode(expected);
    Assert.assertEquals(actual, expected);
}

From source file:net.yacy.cora.language.phonetic.Soundex.java

/**
 * Encodes the Strings and returns the number of characters in the two
 * encoded Strings that are the same./*from  ww  w. j a  v a  2 s . c om*/
 * <ul>
 * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
 * little or no similarity, and 4 indicates strong similarity or identical
 * values.</li>
 * <li>For refined Soundex, the return value can be greater than 4.</li>
 * </ul>
 * 
 * @param encoder
 *                  The encoder to use to encode the Strings.
 * @param s1
 *                  A String that will be encoded and compared.
 * @param s2
 *                  A String that will be encoded and compared.
 * @return The number of characters in the two Soundex encoded Strings that
 *             are the same.
 * 
 * @see #differenceEncoded(String,String)
 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
 *          MS T-SQL DIFFERENCE</a>
 * 
 * @throws EncoderException
 *                  if an error occurs encoding one of the strings
 */
static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException {
    return differenceEncoded(encoder.encode(s1), encoder.encode(s2));
}

From source file:org.apache.abdera2.common.text.Codec.java

private String _encode(String value, String charset) {
    if (value == null)
        return null;
    try {/*  w  w w  . j  a v  a  2s . com*/
        StringEncoder e = null;
        switch (this) {
        case Q:
            e = new QCodec(charset);
            break;
        case B:
            e = new BCodec(charset);
            break;
        default:
            e = new StarCodec(charset);
            break;
        }
        return e.encode(value);
    } catch (Exception e) {
        return value;
    }
}

From source file:org.dkpro.tc.features.ngram.util.NGramUtils.java

public static FrequencyDistribution<String> getDocumentPhoneticNgrams(JCas jcas, Annotation target, int minN,
        int maxN) throws TextClassificationException {
    StringEncoder encoder;
    String languageCode = jcas.getDocumentLanguage();

    if (languageCode.equals("en")) {
        encoder = new Soundex();
    } else if (languageCode.equals("de")) {
        encoder = new ColognePhonetic();
    } else {//w ww  . j a va 2  s. c  o  m
        throw new TextClassificationException(
                "Language code '" + languageCode + "' not supported by phonetic ngrams FE.");
    }

    FrequencyDistribution<String> phoneticNgrams = new FrequencyDistribution<String>();
    for (Sentence s : selectCovered(jcas, Sentence.class, target)) {
        List<String> phoneticStrings = new ArrayList<String>();
        for (Token t : JCasUtil.selectCovered(jcas, Token.class, s)) {
            try {
                phoneticStrings.add(encoder.encode(t.getCoveredText()));
            } catch (EncoderException e) {
                throw new TextClassificationException(e);
            }
        }
        String[] array = phoneticStrings.toArray(new String[phoneticStrings.size()]);

        for (List<String> ngram : new NGramStringListIterable(array, minN, maxN)) {
            phoneticNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));

        }
    }
    return phoneticNgrams;
}