Example usage for org.apache.lucene.util UnicodeUtil UTF16toUTF8

List of usage examples for org.apache.lucene.util UnicodeUtil UTF16toUTF8

Introduction

In this page you can find the example usage for org.apache.lucene.util UnicodeUtil UTF16toUTF8.

Prototype

public static int UTF16toUTF8(final CharSequence s, final int offset, final int length, byte[] out) 

Source Link

Document

Encode characters from this String, starting at offset for length characters.

Usage

From source file:com.splicemachine.encoding.StringEncoding.java

License:Apache License

/**
 * Wraps the Lucene UnicodeUtil.UTF16toUTF8 bytes serializatiom...
 *//* w w w  .j a v a 2  s. com*/
public static byte[] toBytes(String value, boolean desc) {
    if (value == null)
        return Encoding.EMPTY_BYTE_ARRAY;
    if (value.length() == 0) {
        if (desc)
            return new byte[] { (byte) (0x01 ^ 0xff) };
        else
            return new byte[] { 0x01 };
    }

    //convert to UTF-8 encoding
    BytesRef result = new BytesRef();
    UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
    byte[] returnArray = new byte[result.length];
    for (int i = 0; i < result.length; i++) {
        byte newD = (byte) (result.bytes[i + result.offset] + 2);
        if (desc)
            newD ^= 0xff; //reverse the sign bit so that data is reversed in 2's complement
        returnArray[i] = newD;
    }
    return returnArray;
}

From source file:com.splicemachine.encoding.StringEncoding.java

License:Apache License

public static int toBytes(String value, boolean desc, byte[] buffer, int offset) {
    if (value == null || value.length() == 0)
        return 0;

    //convert to UTF-8 encoding
    BytesRef result = new BytesRef();
    UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
    for (int i = 0; i < result.length; i++) {
        byte newD = (byte) (result.bytes[i + result.offset] + 2);
        if (desc)
            newD ^= 0xff; //reverse the sign bit so that data is reversed in 2's complement
        buffer[offset + i] = newD;//from   w w w.  j  a  v  a 2s  .  c om
    }
    return value.length();
}

From source file:fi.nationallibrary.ndl.solr.schema.CompressedField.java

License:Apache License

/** Compresses the String value using the specified
 *  compressionLevel (constants are defined in
 *  java.util.zip.Deflater)./*from w  w w.  j ava2 s. c om*/
 *  @param value      the value to compress
 *  @param compressionLevel      the compression level to use. must be between Deflater.BEST_SPEED (1) and Deflater.BEST_COMPRESSION (9)
 */
public static byte[] compressString(String value, int compressionLevel) {
    UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
    UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
    return compress(result.result, 0, result.length, compressionLevel);
}

From source file:io.crate.operation.scalar.string.LowerFunction.java

License:Apache License

@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }/*from  www  . j  a  v  a  2s.co m*/

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toLowerCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}

From source file:io.crate.operation.scalar.string.UpperFunction.java

License:Apache License

@Override
public BytesRef evaluate(Input<Object>... args) {
    Object stringValue = args[0].value();
    if (stringValue == null) {
        return null;
    }/*w  ww  .j a  va 2  s .co  m*/

    BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue);

    char[] ref = new char[inputByteRef.length];
    int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref);
    charUtils.toUpperCase(ref, 0, len);

    byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len];
    len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res);
    return new BytesRef(res, 0, len);
}

From source file:org.apache.blur.lucene.serializer.SerializerUtil.java

License:Apache License

public static void writeString(String s, DataOutput out) throws IOException {
    BytesRef bytes = new BytesRef();
    UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytes);
    writeBytesRef(bytes, out);/*from w ww .ja va 2 s  . co  m*/
}

From source file:org.apache.solr.request.PHPSerializedResponseWriter.java

License:Apache License

@Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
    // serialized PHP strings don't need to be escaped at all, however the 
    // string size reported needs be the number of bytes rather than chars.
    int nBytes;//from  w ww  . j a v a2s.  c o m
    if (CESU8) {
        nBytes = 0;
        for (int i = 0; i < val.length(); i++) {
            char ch = val.charAt(i);
            if (ch <= '\u007f') {
                nBytes += 1;
            } else if (ch <= '\u07ff') {
                nBytes += 2;
            } else {
                nBytes += 3;
            }
        }
    } else {
        UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
        nBytes = utf8.length;
    }

    writer.write("s:");
    writer.write(Integer.toString(nBytes));
    writer.write(":\"");
    writer.write(val);
    writer.write("\";");
}

From source file:org.apache.solr.response.PHPSerializedResponseWriter.java

License:Apache License

@Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
    // serialized PHP strings don't need to be escaped at all, however the 
    // string size reported needs be the number of bytes rather than chars.
    UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
    int nBytes = utf8.length;

    writer.write("s:");
    writer.write(Integer.toString(nBytes));
    writer.write(":\"");
    writer.write(val);
    writer.write("\";");
}

From source file:org.apache.solr.schema.FieldType.java

License:Apache License

/** Given the readable value, return the term value that will match it. */
public void readableToIndexed(CharSequence val, BytesRef result) {
    final String internal = readableToIndexed(val.toString());
    UnicodeUtil.UTF16toUTF8(internal, 0, internal.length(), result);
}

From source file:org.apache.solr.search.ValueSourceParser.java

License:Apache License

private static TInfo parseTerm(FunctionQParser fp) throws SyntaxError {
    TInfo tinfo = new TInfo();

    tinfo.indexedField = tinfo.field = fp.parseArg();
    tinfo.val = fp.parseArg();
    tinfo.indexedBytes = new BytesRef();

    FieldType ft = fp.getReq().getSchema().getFieldTypeNoEx(tinfo.field);
    if (ft == null)
        ft = new StrField();

    if (ft instanceof TextField) {
        // need to do analysis on the term
        String indexedVal = tinfo.val;
        Query q = ft.getFieldQuery(fp, fp.getReq().getSchema().getFieldOrNull(tinfo.field), tinfo.val);
        if (q instanceof TermQuery) {
            Term term = ((TermQuery) q).getTerm();
            tinfo.indexedField = term.field();
            indexedVal = term.text();/*from www . ja  v  a 2  s  .  co m*/
        }
        UnicodeUtil.UTF16toUTF8(indexedVal, 0, indexedVal.length(), tinfo.indexedBytes);
    } else {
        ft.readableToIndexed(tinfo.val, tinfo.indexedBytes);
    }

    return tinfo;
}