Example usage for org.apache.commons.codec StringEncoder encode

Introduction

In this page you can find the example usage for org.apache.commons.codec StringEncoder encode.

Prototype

String encode(String pString) throws EncoderException;

Source Link

Document

Encodes a String and returns a String.

Usage

From source file:de.tudarmstadt.ukp.dkpro.tc.features.ngram.util.NGramUtils.java

public static FrequencyDistribution<String> getDocumentPhoneticNgrams(JCas jcas, int minN, int maxN)
        throws TextClassificationException {
    StringEncoder encoder;
    String languageCode = jcas.getDocumentLanguage();

    if (languageCode.equals("en")) {
        encoder = new Soundex();
    } else if (languageCode.equals("de")) {
        encoder = new ColognePhonetic();
    } else {//from ww  w. ja  va 2 s  .  co  m
        throw new TextClassificationException(
                "Language code '" + languageCode + "' not supported by phonetic ngrams FE.");
    }

    FrequencyDistribution<String> phoneticNgrams = new FrequencyDistribution<String>();
    for (Sentence s : select(jcas, Sentence.class)) {
        List<String> phoneticStrings = new ArrayList<String>();
        for (Token t : JCasUtil.selectCovered(jcas, Token.class, s)) {
            try {
                phoneticStrings.add(encoder.encode(t.getCoveredText()));
            } catch (EncoderException e) {
                throw new TextClassificationException(e);
            }
        }
        String[] array = phoneticStrings.toArray(new String[phoneticStrings.size()]);

        for (List<String> ngram : new NGramStringListIterable(array, minN, maxN)) {
            phoneticNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));

        }
    }
    return phoneticNgrams;
}

From source file:com.github.jinahya.codec.commons.RareStringEncoderProxyTest.java

@Test
public void testAsStringEncoder() throws EncoderException {

    final StringEncoder encoder = (StringEncoder) RareStringEncoderProxy.newInstance();

    try {//from w w w .j av a2s.  c o m
        encoder.encode((String) null);
        Assert.fail("passed: encode((String) null)");
    } catch (final NullPointerException npe) {
        // expected
    }

    final String expected = "";
    final String actual = encoder.encode(expected);
    Assert.assertEquals(actual, expected);
}

From source file:net.yacy.cora.language.phonetic.Soundex.java

/**
 * Encodes the Strings and returns the number of characters in the two
 * encoded Strings that are the same./*from  ww  w. j a  v a  2 s . c om*/
 * <ul>
 * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates
 * little or no similarity, and 4 indicates strong similarity or identical
 * values.</li>
 * <li>For refined Soundex, the return value can be greater than 4.</li>
 * </ul>
 * 
 * @param encoder
 *                  The encoder to use to encode the Strings.
 * @param s1
 *                  A String that will be encoded and compared.
 * @param s2
 *                  A String that will be encoded and compared.
 * @return The number of characters in the two Soundex encoded Strings that
 *             are the same.
 * 
 * @see #differenceEncoded(String,String)
 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
 *          MS T-SQL DIFFERENCE</a>
 * 
 * @throws EncoderException
 *                  if an error occurs encoding one of the strings
 */
static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException {
    return differenceEncoded(encoder.encode(s1), encoder.encode(s2));
}

From source file:org.apache.abdera2.common.text.Codec.java

private String _encode(String value, String charset) {
    if (value == null)
        return null;
    try {/*  w  w w  . j  a v  a  2s . com*/
        StringEncoder e = null;
        switch (this) {
        case Q:
            e = new QCodec(charset);
            break;
        case B:
            e = new BCodec(charset);
            break;
        default:
            e = new StarCodec(charset);
            break;
        }
        return e.encode(value);
    } catch (Exception e) {
        return value;
    }
}

From source file:org.dkpro.tc.features.ngram.util.NGramUtils.java

public static FrequencyDistribution<String> getDocumentPhoneticNgrams(JCas jcas, Annotation target, int minN,
        int maxN) throws TextClassificationException {
    StringEncoder encoder;
    String languageCode = jcas.getDocumentLanguage();

    if (languageCode.equals("en")) {
        encoder = new Soundex();
    } else if (languageCode.equals("de")) {
        encoder = new ColognePhonetic();
    } else {//w ww  . j a va 2  s. c  o  m
        throw new TextClassificationException(
                "Language code '" + languageCode + "' not supported by phonetic ngrams FE.");
    }

    FrequencyDistribution<String> phoneticNgrams = new FrequencyDistribution<String>();
    for (Sentence s : selectCovered(jcas, Sentence.class, target)) {
        List<String> phoneticStrings = new ArrayList<String>();
        for (Token t : JCasUtil.selectCovered(jcas, Token.class, s)) {
            try {
                phoneticStrings.add(encoder.encode(t.getCoveredText()));
            } catch (EncoderException e) {
                throw new TextClassificationException(e);
            }
        }
        String[] array = phoneticStrings.toArray(new String[phoneticStrings.size()]);

        for (List<String> ngram : new NGramStringListIterable(array, minN, maxN)) {
            phoneticNgrams.inc(StringUtils.join(ngram, NGRAM_GLUE));

        }
    }
    return phoneticNgrams;
}