Example usage for org.apache.hadoop.io Text getBytes

List of usage examples for org.apache.hadoop.io Text getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Returns the raw bytes; however, only data up to #getLength() is valid.

Usage

From source file:hivemall.fm.FFMPredictUDF.java

License:Apache License

@Override
public Object evaluate(DeferredObject[] args) throws HiveException {
    String modelId = _modelIdOI.getPrimitiveJavaObject(args[0].get());
    if (modelId == null) {
        throw new HiveException("modelId is not set");
    }//  ww  w  .jav a  2s .  co  m

    final FFMPredictionModel model;
    if (modelId.equals(_cachedModeId)) {
        model = this._cachedModel;
    } else {
        Text serModel = _modelOI.getPrimitiveWritableObject(args[1].get());
        if (serModel == null) {
            throw new HiveException("Model is null for model ID: " + modelId);
        }
        byte[] b = serModel.getBytes();
        final int length = serModel.getLength();
        try {
            model = FFMPredictionModel.deserialize(b, length);
            b = null;
        } catch (ClassNotFoundException e) {
            throw new HiveException(e);
        } catch (IOException e) {
            throw new HiveException(e);
        }
        this._cachedModeId = modelId;
        this._cachedModel = model;
    }

    int numFeatures = model.getNumFeatures();
    int numFields = model.getNumFields();

    Object arg2 = args[2].get();
    // [workaround]
    // java.lang.ClassCastException: org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray
    // cannot be cast to [Ljava.lang.Object;
    if (arg2 instanceof LazyBinaryArray) {
        arg2 = ((LazyBinaryArray) arg2).getList();
    }
    Feature[] x = Feature.parseFFMFeatures(arg2, _featureListOI, _probes, numFeatures, numFields);
    if (x == null || x.length == 0) {
        return null; // return NULL if there are no features
    }
    this._probes = x;

    double predicted = predict(x, model);
    _result.set(predicted);
    return _result;
}

From source file:hivemall.mix.MixMessageEncoder.java

License:Open Source License

private static void encodeObject(final Object obj, final ByteBuf buf) throws IOException {
    assert (obj != null);
    if (obj instanceof Integer) {
        Integer i = (Integer) obj;
        buf.writeByte(INTEGER_TYPE);/*w ww .  j  a  v a  2s. com*/
        buf.writeInt(i.intValue());
    } else if (obj instanceof Text) {
        Text t = (Text) obj;
        byte[] b = t.getBytes();
        int length = t.getLength();
        buf.writeByte(TEXT_TYPE);
        buf.writeInt(length);
        buf.writeBytes(b, 0, length);
    } else if (obj instanceof String) {
        String s = (String) obj;
        buf.writeByte(STRING_TYPE);
        writeString(s, buf);
    } else if (obj instanceof IntWritable) {
        IntWritable i = (IntWritable) obj;
        buf.writeByte(INT_WRITABLE_TYPE);
        buf.writeInt(i.get());
    } else if (obj instanceof LongWritable) {
        LongWritable l = (LongWritable) obj;
        buf.writeByte(LONG_WRITABLE_TYPE);
        buf.writeLong(l.get());
    } else {
        throw new IllegalStateException("Unexpected type: " + obj.getClass().getName());
    }
}

From source file:hivemall.sketch.bloom.BloomContainsUDF.java

License:Apache License

@Nullable
public Boolean evaluate(@Nullable Text bloomStr, @Nullable Text keyStr) throws HiveException {
    if (bloomStr == null || key == null) {
        return null;
    }/*from   w  w w  . ja  va2 s.c o m*/

    final Filter bloom;
    if (prevFilter != null && prevKey.equals(keyStr)) {
        bloom = prevFilter;
    } else {
        try {
            bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter());
        } catch (IOException e) {
            throw new HiveException(e);
        }
        this.prevKey = keyStr;
        this.prevFilter = bloom;
        key.set(keyStr.getBytes(), 1.0d);
    }

    return Boolean.valueOf(bloom.membershipTest(key));
}

From source file:hivemall.sketch.bloom.BloomContainsUDFTest.java

License:Apache License

@Nonnull
private static DynamicBloomFilter createBloomFilter(long seed, int size) {
    DynamicBloomFilter dbf = BloomFilterUtils.newDynamicBloomFilter(30);
    final Key key = new Key();

    final Random rnd1 = new Random(seed);
    for (int i = 0; i < size; i++) {
        double d = rnd1.nextGaussian();
        String s = Double.toHexString(d);
        Text t = new Text(s);
        key.set(t.getBytes(), 1.0);
        dbf.add(key);/*from  w  w  w .j a  v  a2  s .c  o m*/
    }

    return dbf;
}

From source file:hivemall.sketch.bloom.BloomFilterUtils.java

License:Apache License

@Nonnull
public static <F extends Filter> F deserialize(@Nonnull final Text in, @Nonnull final F dst)
        throws IOException {
    return deserialize(in.getBytes(), 0, in.getLength(), dst);
}

From source file:hivemall.tools.compress.DeflateUDF.java

License:Apache License

@Override
public BytesWritable evaluate(DeferredObject[] arguments) throws HiveException {
    if (codec == null) {
        this.codec = new DeflateCodec(true, false);
    }//from ww  w. j ava2 s  . c o m

    Object arg0 = arguments[0].get();
    if (arg0 == null) {
        return null;
    }
    Text text = stringOI.getPrimitiveWritableObject(arg0);
    byte[] original = text.getBytes();
    final int len = text.getLength();
    final byte[] compressed;
    try {
        compressed = codec.compress(original, 0, len, compressionLevel);
    } catch (IOException e) {
        throw new HiveException("Failed to compress", e);
    }
    original = null;
    if (result == null) {
        this.result = new BytesWritable(compressed);
    } else {
        result.set(compressed, 0, compressed.length);
    }
    return result;
}

From source file:hivemall.tools.text.Unbase91UDF.java

License:Apache License

@Override
public BytesWritable evaluate(DeferredObject[] arguments) throws HiveException {
    if (outputBuf == null) {
        this.outputBuf = new FastByteArrayOutputStream(4096);
    } else {/*from   ww  w.j av  a2s . co m*/
        outputBuf.reset();
    }

    Object arg0 = arguments[0].get();
    if (arg0 == null) {
        return null;
    }

    Text input = stringOI.getPrimitiveWritableObject(arg0);
    final byte[] inputBytes = input.getBytes();
    final int len = input.getLength();
    try {
        Base91.decode(inputBytes, 0, len, outputBuf);
    } catch (IOException e) {
        throw new HiveException(e);
    }

    if (result == null) {
        byte[] outputBytes = outputBuf.toByteArray();
        this.result = new BytesWritable(outputBytes);
    } else {
        byte[] outputBytes = outputBuf.getInternalArray();
        int outputSize = outputBuf.size();
        result.set(outputBytes, 0, outputSize);
    }
    return result;
}

From source file:hivemall.utils.hadoop.JsonSerdeUtils.java

License:Apache License

@SuppressWarnings("unchecked")
@Nonnull/*  ww w . java 2  s  . c om*/
public static <T> T deserialize(@Nonnull final Text t, @Nullable final List<String> columnNames,
        @Nullable final List<TypeInfo> columnTypes) throws SerDeException {
    final Object result;
    try {
        JsonParser p = new JsonFactory().createJsonParser(new FastByteArrayInputStream(t.getBytes()));
        final JsonToken token = p.nextToken();
        if (token == JsonToken.START_OBJECT) {
            result = parseObject(p, columnNames, columnTypes);
        } else if (token == JsonToken.START_ARRAY) {
            result = parseArray(p, columnTypes);
        } else {
            result = parseValue(p);
        }
    } catch (JsonParseException e) {
        throw new SerDeException(e);
    } catch (IOException e) {
        throw new SerDeException(e);
    }
    return (T) result;
}

From source file:io.aos.hdfs.TextIterator.java

License:Apache License

public static void main(String... args) {
    Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00");

    ByteBuffer buf = ByteBuffer.wrap(t.getBytes(), 0, t.getLength());
    int cp;/*from   w  ww.  j  av  a  2 s  .c  o  m*/
    while (buf.hasRemaining() && (cp = Text.bytesToCodePoint(buf)) != -1) {
        System.out.println(Integer.toHexString(cp));
    }
}

From source file:io.aos.hdfs.TextTest.java

License:Apache License

@Test
public void test() throws IOException {
    // vv TextTest
    Text t = new Text("hadoop");
    assertThat(t.getLength(), is(6));//from w  w  w. j  a  v  a  2  s. c om
    assertThat(t.getBytes().length, is(6));

    assertThat(t.charAt(2), is((int) 'd'));
    assertThat("Out of bounds", t.charAt(100), is(-1));
    // ^^ TextTest
}