Example usage for org.apache.hadoop.io Text encode

List of usage examples for org.apache.hadoop.io Text encode

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text encode.

Prototype

public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException 

Source Link

Document

Converts the provided String to bytes using the UTF-8 encoding.

Usage

From source file:com.ibm.bi.dml.runtime.transform.DistinctValue.java

License:Open Source License

public DistinctValue(String w, long count) throws CharacterCodingException {
    ByteBuffer bb = Text.encode(w, true);
    _bytes = bb.array();/* w  w  w . ja  va2s.  c o  m*/
    _length = bb.limit();
    _count = count;
}

From source file:cosmos.mapred.AggregatingRecordReader.java

License:Apache License

private void textAppend(Text t, String s) throws IOException {
    try {/* ww  w. ja  v a2  s .  c  om*/
        ByteBuffer buf = Text.encode(s, false);
        t.append(buf.array(), 0, buf.limit());
    } catch (CharacterCodingException e) {
        throw new IOException(e);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.collocations.Gram.java

License:Apache License

/**
 * /*from   ww  w  .ja va2  s  .  c om*/
 * Create a gram with the specified frequency.
 * 
 * @param ngram
 *          the gram string
 * @param frequency
 *          the gram frequency
 * @param type
 *          whether the gram is at the head of its text unit or tail or unigram
 */
public Gram(String ngram, int frequency, Type type) {
    Preconditions.checkNotNull(ngram);
    try {
        // extra character is used for storing type which is part 
        // of the sort key.
        ByteBuffer bb = Text.encode('\0' + ngram, true);
        bytes = bb.array();
        length = bb.limit();
    } catch (CharacterCodingException e) {
        throw new IllegalStateException("Should not have happened ", e);
    }

    encodeType(type, bytes, 0);
    this.frequency = frequency;
}

From source file:mvm.rya.indexing.accumulo.freetext.ColumnPrefixes.java

License:Apache License

private static Text concat(Text prefix, String str) {
    Text temp = new Text(prefix);

    try {/*from   ww  w.j  a v a2  s .  c  o m*/
        ByteBuffer buffer = Text.encode(str, false);
        temp.append(buffer.array(), 0, buffer.limit());
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }

    return temp;
}

From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java

License:Apache License

/**
 * Appends the UTF-8 bytes of the given string to the given {@link Text}
 *///from w  ww .  j  ava  2 s  .com
public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
    try {
        ByteBuffer buffer = Text.encode(s, replaceBadChar);
        t.append(buffer.array(), 0, buffer.limit());
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }
}

From source file:org.apache.accumulo.examples.wikisearch.util.TextUtil.java

License:Apache License

/**
 * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to
 * UTF-8 and is much faster than calling {@link String#getBytes(String)}.
 *
 * @param string//w  ww  .j a  v a  2s.  c  o  m
 *          the string to convert
 * @return the UTF-8 representation of the string
 */
public static byte[] toUtf8(String string) {
    ByteBuffer buffer;
    try {
        buffer = Text.encode(string, false);
    } catch (CharacterCodingException cce) {
        throw new IllegalArgumentException(cce);
    }
    byte[] bytes = new byte[buffer.limit()];
    System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length);
    return bytes;
}

From source file:org.apache.mahout.utils.nlp.collocations.llr.Gram.java

License:Apache License

/**
 * /*from  ww w  . j  a va2  s .c  o  m*/
 * Create a gram with the specified frequency.
 * 
 * @param ngram
 *          the gram string
 * @param frequency
 *          the gram frequency
 * @param type
 *          whether the gram is at the head of its text unit or tail or unigram
 */
public Gram(String ngram, int frequency, Type type) {

    if (ngram == null) {
        throw new NullPointerException();
    }

    try {
        // extra character is used for storing type which is part 
        // of the sort key.
        ByteBuffer bb = Text.encode('\0' + ngram, true);
        bytes = bb.array();
        length = bb.limit();
    } catch (CharacterCodingException e) {
        throw new IllegalStateException("Should not have happened ", e);
    }

    encodeType(type, bytes, 0);
    this.frequency = frequency;
}