Example usage for org.apache.lucene.util.fst Util toUTF16

List of usage examples for org.apache.lucene.util.fst Util toUTF16

Introduction

In this page you can find the example usage for org.apache.lucene.util.fst Util toUTF16.

Prototype

public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) 

Source Link

Document

Just maps each UTF16 unit (char) to the ints in an IntsRef.

Usage

From source file:elhuyar.bilakit.Dictionary.java

License:Apache License

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
      Map<String, String> mappings = new TreeMap<>();

      for (int i = 0; i < num; i++) {
          String line = reader.readLine();
          String parts[] = line.split("\\s+");
          if (parts.length != 3) {
              throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
          }//  ww  w .  java  2s .  c  o m
          if (mappings.put(parts[1], parts[2]) != null) {
              throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
          }
      }

      Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
      Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
      IntsRefBuilder scratchInts = new IntsRefBuilder();
      for (Map.Entry<String, String> entry : mappings.entrySet()) {
          Util.toUTF16(entry.getKey(), scratchInts);
          builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
      }

      return builder.finish();
  }

From source file:stemmer.Dictionary.java

License:Apache License

private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
    Map<String, String> mappings = new TreeMap<>();

    for (int i = 0; i < num; i++) {
        String line = reader.readLine();
        String parts[] = line.split("\\s+");
        if (parts.length != 3) {
            throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
        }//from  w  w  w.  j a v  a2 s .c om
        if (mappings.put(parts[1], parts[2]) != null) {
            throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
        }
    }

    Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
    Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
    IntsRef scratchInts = new IntsRef();
    for (Map.Entry<String, String> entry : mappings.entrySet()) {
        Util.toUTF16(entry.getKey(), scratchInts);
        builder.add(scratchInts, new CharsRef(entry.getValue()));
    }

    return builder.finish();
}