List of usage examples for org.apache.lucene.util.fst Util toBytesRef
public static BytesRef toBytesRef(IntsRef input, BytesRefBuilder scratch)
From source file:examples.fst.FstTest.java
public static void main(String[] args) throws IOException { // Input values (keys). These must be provided to Builder in Unicode sorted order! String inputValues[] = { "cat", "dog", "dogs" }; long outputValues[] = { 5, 7, 12 }; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (int i = 0; i < inputValues.length; i++) { scratchBytes.copyChars(inputValues[i]); builder.add(Util.toIntsRef(scratchBytes.toBytesRef(), scratchInts), outputValues[i]); }//from www . j a v a 2 s . co m FST<Long> fst = builder.finish(); Long value = Util.get(fst, new BytesRef("dog")); System.out.println(value); // 7 // Only works because outputs are also in sorted order IntsRef key = Util.getByOutput(fst, 12); System.out.println(Util.toBytesRef(key, scratchBytes).utf8ToString()); // dogs }
From source file:org.codelibs.elasticsearch.search.suggest.completion2x.CompletionTokenStream.java
License:Apache License
@Override public boolean incrementToken() throws IOException { clearAttributes();/*from w ww .ja va2 s . c om*/ if (finiteStrings == null) { Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input); if (strings.size() > MAX_PATHS) { throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS + " finite strings are supported"); } posInc = strings.size(); finiteStrings = strings.iterator(); } if (finiteStrings.hasNext()) { posAttr.setPositionIncrement(posInc); /* * this posInc encodes the number of paths that this surface form * produced. Multi Fields have the same surface form and therefore sum up */ posInc = 0; Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8 if (charTermAttribute != null) { charTermAttribute.setLength(0); charTermAttribute.append(bytesAtt.toUTF16()); } if (payload != null) { payloadAttr.setPayload(this.payload); } return true; } return false; }
From source file:org.elasticsearch.search.suggest.completion.CompletionTokenStream.java
License:Apache License
@Override public boolean incrementToken() throws IOException { clearAttributes();// w ww . j av a2s. c om if (finiteStrings == null) { Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input); if (strings.size() > MAX_PATHS) { throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS + " finite strings are supported"); } posInc = strings.size(); finiteStrings = strings.iterator(); } if (finiteStrings.hasNext()) { posAttr.setPositionIncrement(posInc); /* * this posInc encodes the number of paths that this surface form * produced. Multi Fields have the same surface form and therefore sum up */ posInc = 0; Util.toBytesRef(finiteStrings.next(), bytesAtt.getBytesRef()); // now we have UTF-8 if (charTermAttribute != null) { charTermAttribute.setLength(0); charTermAttribute.append(bytesAtt.toUTF16()); } if (payload != null) { payloadAttr.setPayload(this.payload); } return true; } return false; }