List of usage examples for org.apache.lucene.util UnicodeUtil UTF16toUTF8
public static int UTF16toUTF8(final CharSequence s, final int offset, final int length, byte[] out)
From source file:com.splicemachine.encoding.StringEncoding.java
License:Apache License
/** * Wraps the Lucene UnicodeUtil.UTF16toUTF8 bytes serializatiom... *//* w w w .j a v a 2 s. com*/ public static byte[] toBytes(String value, boolean desc) { if (value == null) return Encoding.EMPTY_BYTE_ARRAY; if (value.length() == 0) { if (desc) return new byte[] { (byte) (0x01 ^ 0xff) }; else return new byte[] { 0x01 }; } //convert to UTF-8 encoding BytesRef result = new BytesRef(); UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result); byte[] returnArray = new byte[result.length]; for (int i = 0; i < result.length; i++) { byte newD = (byte) (result.bytes[i + result.offset] + 2); if (desc) newD ^= 0xff; //reverse the sign bit so that data is reversed in 2's complement returnArray[i] = newD; } return returnArray; }
From source file:com.splicemachine.encoding.StringEncoding.java
License:Apache License
public static int toBytes(String value, boolean desc, byte[] buffer, int offset) { if (value == null || value.length() == 0) return 0; //convert to UTF-8 encoding BytesRef result = new BytesRef(); UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result); for (int i = 0; i < result.length; i++) { byte newD = (byte) (result.bytes[i + result.offset] + 2); if (desc) newD ^= 0xff; //reverse the sign bit so that data is reversed in 2's complement buffer[offset + i] = newD;//from w w w. j a v a 2s . c om } return value.length(); }
From source file:fi.nationallibrary.ndl.solr.schema.CompressedField.java
License:Apache License
/** Compresses the String value using the specified * compressionLevel (constants are defined in * java.util.zip.Deflater)./*from w w w. j ava2 s. c om*/ * @param value the value to compress * @param compressionLevel the compression level to use. must be between Deflater.BEST_SPEED (1) and Deflater.BEST_COMPRESSION (9) */ public static byte[] compressString(String value, int compressionLevel) { UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result(); UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result); return compress(result.result, 0, result.length, compressionLevel); }
From source file:io.crate.operation.scalar.string.LowerFunction.java
License:Apache License
@Override public BytesRef evaluate(Input<Object>... args) { Object stringValue = args[0].value(); if (stringValue == null) { return null; }/*from www . j a v a 2s.co m*/ BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue); char[] ref = new char[inputByteRef.length]; int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref); charUtils.toLowerCase(ref, 0, len); byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len]; len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res); return new BytesRef(res, 0, len); }
From source file:io.crate.operation.scalar.string.UpperFunction.java
License:Apache License
@Override public BytesRef evaluate(Input<Object>... args) { Object stringValue = args[0].value(); if (stringValue == null) { return null; }/*w ww .j a va 2 s .co m*/ BytesRef inputByteRef = BytesRefs.toBytesRef(stringValue); char[] ref = new char[inputByteRef.length]; int len = UnicodeUtil.UTF8toUTF16(inputByteRef.bytes, inputByteRef.offset, inputByteRef.length, ref); charUtils.toUpperCase(ref, 0, len); byte[] res = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * len]; len = UnicodeUtil.UTF16toUTF8(ref, 0, len, res); return new BytesRef(res, 0, len); }
From source file:org.apache.blur.lucene.serializer.SerializerUtil.java
License:Apache License
public static void writeString(String s, DataOutput out) throws IOException { BytesRef bytes = new BytesRef(); UnicodeUtil.UTF16toUTF8(s, 0, s.length(), bytes); writeBytesRef(bytes, out);/*from w ww .ja va 2 s . co m*/ }
From source file:org.apache.solr.request.PHPSerializedResponseWriter.java
License:Apache License
@Override public void writeStr(String name, String val, boolean needsEscaping) throws IOException { // serialized PHP strings don't need to be escaped at all, however the // string size reported needs be the number of bytes rather than chars. int nBytes;//from w ww . j a v a2s. c o m if (CESU8) { nBytes = 0; for (int i = 0; i < val.length(); i++) { char ch = val.charAt(i); if (ch <= '\u007f') { nBytes += 1; } else if (ch <= '\u07ff') { nBytes += 2; } else { nBytes += 3; } } } else { UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8); nBytes = utf8.length; } writer.write("s:"); writer.write(Integer.toString(nBytes)); writer.write(":\""); writer.write(val); writer.write("\";"); }
From source file:org.apache.solr.response.PHPSerializedResponseWriter.java
License:Apache License
@Override public void writeStr(String name, String val, boolean needsEscaping) throws IOException { // serialized PHP strings don't need to be escaped at all, however the // string size reported needs be the number of bytes rather than chars. UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8); int nBytes = utf8.length; writer.write("s:"); writer.write(Integer.toString(nBytes)); writer.write(":\""); writer.write(val); writer.write("\";"); }
From source file:org.apache.solr.schema.FieldType.java
License:Apache License
/** Given the readable value, return the term value that will match it. */ public void readableToIndexed(CharSequence val, BytesRef result) { final String internal = readableToIndexed(val.toString()); UnicodeUtil.UTF16toUTF8(internal, 0, internal.length(), result); }
From source file:org.apache.solr.search.ValueSourceParser.java
License:Apache License
private static TInfo parseTerm(FunctionQParser fp) throws SyntaxError { TInfo tinfo = new TInfo(); tinfo.indexedField = tinfo.field = fp.parseArg(); tinfo.val = fp.parseArg(); tinfo.indexedBytes = new BytesRef(); FieldType ft = fp.getReq().getSchema().getFieldTypeNoEx(tinfo.field); if (ft == null) ft = new StrField(); if (ft instanceof TextField) { // need to do analysis on the term String indexedVal = tinfo.val; Query q = ft.getFieldQuery(fp, fp.getReq().getSchema().getFieldOrNull(tinfo.field), tinfo.val); if (q instanceof TermQuery) { Term term = ((TermQuery) q).getTerm(); tinfo.indexedField = term.field(); indexedVal = term.text();/*from www . ja v a 2 s . co m*/ } UnicodeUtil.UTF16toUTF8(indexedVal, 0, indexedVal.length(), tinfo.indexedBytes); } else { ft.readableToIndexed(tinfo.val, tinfo.indexedBytes); } return tinfo; }