Example usage for org.apache.hadoop.io Text encode

List of usage examples for org.apache.hadoop.io Text encode

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text encode.

Prototype


public static ByteBuffer encode(String string) throws CharacterCodingException 

Source Link

Document

Converts the provided String to bytes using the UTF-8 encoding.

Usage

From source file:com.ebay.nest.io.sede.lazy.LazyDate.java

License:Apache License

/**
 * Writes a Date in SQL date format to the output stream.
 * @param out//  w  w w .  ja va  2 s .co m
 *          The output stream
 * @param i
 *          The Date to write
 * @throws IOException
 */
public static void writeUTF8(OutputStream out, DateWritable d) throws IOException {
    ByteBuffer b = Text.encode(d.toString());
    out.write(b.array(), 0, b.limit());
}

From source file:com.ebay.nest.io.sede.lazy.LazyUtils.java

License:Apache License

/**
 * Write out the text representation of a Primitive Object to a UTF8 byte
 * stream./* w  w  w .ja  va2s .  c  o m*/
 *
 * @param out
 *          The UTF8 byte OutputStream
 * @param o
 *          The primitive Object
 * @param needsEscape
 *          Whether a character needs escaping. This array should have size of
 *          128.
 */
public static void writePrimitiveUTF8(OutputStream out, Object o, PrimitiveObjectInspector oi, boolean escaped,
        byte escapeChar, boolean[] needsEscape) throws IOException {

    switch (oi.getPrimitiveCategory()) {
    case BOOLEAN: {
        boolean b = ((BooleanObjectInspector) oi).get(o);
        if (b) {
            out.write(trueBytes, 0, trueBytes.length);
        } else {
            out.write(falseBytes, 0, falseBytes.length);
        }
        break;
    }
    case BYTE: {
        LazyInteger.writeUTF8(out, ((ByteObjectInspector) oi).get(o));
        break;
    }
    case SHORT: {
        LazyInteger.writeUTF8(out, ((ShortObjectInspector) oi).get(o));
        break;
    }
    case INT: {
        LazyInteger.writeUTF8(out, ((IntObjectInspector) oi).get(o));
        break;
    }
    case LONG: {
        LazyLong.writeUTF8(out, ((LongObjectInspector) oi).get(o));
        break;
    }
    case FLOAT: {
        float f = ((FloatObjectInspector) oi).get(o);
        ByteBuffer b = Text.encode(String.valueOf(f));
        out.write(b.array(), 0, b.limit());
        break;
    }
    case DOUBLE: {
        double d = ((DoubleObjectInspector) oi).get(o);
        ByteBuffer b = Text.encode(String.valueOf(d));
        out.write(b.array(), 0, b.limit());
        break;
    }
    case STRING: {
        Text t = ((StringObjectInspector) oi).getPrimitiveWritableObject(o);
        writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape);
        break;
    }

    case VARCHAR: {
        HiveVarcharWritable hc = ((HiveVarcharObjectInspector) oi).getPrimitiveWritableObject(o);
        Text t = hc.getTextValue();
        writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape);
        break;
    }
    case BINARY: {
        BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o);
        byte[] toEncode = new byte[bw.getLength()];
        System.arraycopy(bw.getBytes(), 0, toEncode, 0, bw.getLength());
        byte[] toWrite = Base64.encodeBase64(toEncode);
        out.write(toWrite, 0, toWrite.length);
        break;
    }
    case DATE: {
        LazyDate.writeUTF8(out, ((DateObjectInspector) oi).getPrimitiveWritableObject(o));
        break;
    }
    case TIMESTAMP: {
        LazyTimestamp.writeUTF8(out, ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o));
        break;
    }
    case DECIMAL: {
        HiveDecimal bd = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
        ByteBuffer b = Text.encode(bd.toString());
        out.write(b.array(), 0, b.limit());
        break;
    }
    default: {
        throw new RuntimeException("Hive internal error.");
    }
    }
}

From source file:net.orpiske.tcs.wc.map.WordMapper.java

License:Apache License

private String getColumnValue(SortedMap<ByteBuffer, Column> columns, final String name)
        throws IOException, InterruptedException {
    ByteBuffer byteBuffer = Text.encode(name);

    Column column = columns.get(byteBuffer);

    String ret = ByteBufferUtil.string(column.value());

    return ret;/* w w  w. j a  v a 2 s  .  c  o m*/
}

From source file:org.apache.orc.impl.TestReaderImpl.java

License:Apache License

private byte[] composeContent(String headerStr, String footerStr) throws CharacterCodingException {
    ByteBuffer header = Text.encode(headerStr);
    ByteBuffer footer = Text.encode(footerStr);
    int headerLen = header.remaining();
    int footerLen = footer.remaining() + 1;

    ByteBuffer buf = ByteBuffer.allocate(headerLen + footerLen);

    buf.put(header);/*from   w  w w .  j  a  va 2 s.  c om*/
    buf.put(footer);
    buf.put((byte) footerLen);
    return buf.array();
}

From source file:org.mitre.ccv.mapred.io.KmerEntropyPairWritable.java

License:Open Source License

@Override
public void write(DataOutput out) throws IOException {
    out.writeByte(0); // place holder
    out.writeDouble(this.value);
    //out.writeUTF(this.key);
    /**/*from w  w w  .  j a  va 2 s  .c o m*/
     * Adapted from Text, but we do not use writeVInt becuase
     * we need to read this back in from a ByteBuffer.
     */
    ByteBuffer bytes = Text.encode(this.key);
    int length = bytes.limit();
    out.writeInt(length);
    out.write(bytes.array(), 0, length);
}

From source file:org.terrier.indexing.TwitterJSONDocument.java

License:Mozilla Public License

protected int byteLength(String t) {
    try {//from  w  w  w . j a  v a 2s  .  com
        return Text.encode(t).array().length;
    } catch (Exception e) {
        assert false;
        return -1;
    }
}

From source file:org.terrier.structures.indexing.CompressingMetaIndexBuilder.java

License:Mozilla Public License

/** {@inheritDoc} */
@Override/*from  w ww  . j a va 2 s.  c  o m*/
public void writeDocumentEntry(String[] data) throws IOException {
    int i = 0;
    for (String value : data) {
        if (value == null)
            value = "";
        else if (value.length() > valueLensChars[i])
            if (CROP_LONG)
                value = value.substring(0, valueLensChars[i] - 1);
            else
                throw new IllegalArgumentException("Data (" + value + ") of string length " + value.length()
                        + " for key " + keyNames[i] + " exceeds max string length of " + valueLensChars[i]
                        + "(byte length of " + valueLensBytes[i]
                        + "). Crop in the Document, increase indexer.meta.forward.keylens, or set metaindex.compressed.crop.long");

        final byte[] b = Text.encode(value).array();
        final int numberOfBytesToWrite = b.length;
        if (numberOfBytesToWrite > valueLensBytes[i])
            throw new IllegalArgumentException("Data (" + value + ") of byte length " + numberOfBytesToWrite
                    + " for key " + keyNames[i] + " exceeds max byte length of " + valueLensBytes[i]
                    + "(string length of " + valueLensChars[i]
                    + "). Crop in the Document, or increase indexer.meta.forward.keylens");
        baos.write(b);
        if (numberOfBytesToWrite < valueLensBytes[i])
            baos.write(spaces, 0, valueLensBytes[i] - numberOfBytesToWrite);
        i++;
    }
    zip.reset();
    zip.setInput(baos.toByteArray());
    zip.finish();
    baos.reset();
    indexOutput.writeLong(currentOffset);
    currentIndexOffset += 8;
    int compressedEntrySize = 0;
    while (!zip.finished()) {
        final int numOfCompressedBytes = zip.deflate(compressedBuffer);
        dataOutput.write(compressedBuffer, 0, numOfCompressedBytes);
        compressedEntrySize += numOfCompressedBytes;
    }
    currentOffset += compressedEntrySize;
    for (i = 0; i < forwardKeys.length; i++) {
        Text key = keyFactories[i].newInstance();
        key.set(data[forwardKeys[i]]);
        IntWritable value = new IntWritable();
        value.set(entryCount);
        forwardWriters[i].write(key, value);
        if (lastValues[i] != null && data[forwardKeys[i]].compareTo(lastValues[i]) < 1)
            forwardKeyValuesSorted[i] = false;
        lastValues[i] = data[forwardKeys[i]];
    }
    entryCount++;

    //check for low memory, and flush if necessary
    if (entryCount % DOCS_PER_CHECK == 0 && memCheck.checkMemory()) {
        flush();
        memCheck.reset();
    }
}