Example usage for org.apache.lucene.util.fst IntSequenceOutputs getSingleton

List of usage examples for org.apache.lucene.util.fst IntSequenceOutputs getSingleton

Introduction

In this page you can find the example usage for org.apache.lucene.util.fst IntSequenceOutputs getSingleton.

Prototype

public static IntSequenceOutputs getSingleton() 

Source Link

Usage

From source file:elhuyar.bilakit.Dictionary.java

License:Apache License

/**
   * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
   * and dictionary files./*ww w . j av a 2 s. co m*/
   * You have to close the provided InputStreams yourself.
   *
   * @param affix InputStream for reading the hunspell affix file (won't be closed).
   * @param dictionaries InputStream for reading the hunspell dictionary files (won't be closed).
   * @throws IOException Can be thrown while reading from the InputStreams
   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
   */
  public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase)
          throws IOException, ParseException {
      this.ignoreCase = ignoreCase;
      this.needsInputCleaning = ignoreCase;
      this.needsOutputCleaning = false; // set if we have an OCONV
      flagLookup.add(new BytesRef()); // no flags -> ord 0

      File aff = File.createTempFile("affix", "aff", tempDir);
      OutputStream out = new BufferedOutputStream(new FileOutputStream(aff));
      InputStream aff1 = null;
      InputStream aff2 = null;
      try {
          // copy contents of affix stream to temp file
          final byte[] buffer = new byte[1024 * 8];
          int len;
          while ((len = affix.read(buffer)) > 0) {
              out.write(buffer, 0, len);
          }
          out.close();

          // pass 1: get encoding
          aff1 = new BufferedInputStream(new FileInputStream(aff));
          String encoding = getDictionaryEncoding(aff1);

          // pass 2: parse affixes
          CharsetDecoder decoder = getJavaEncoding(encoding);
          aff2 = new BufferedInputStream(new FileInputStream(aff));
          readAffixFile(aff2, decoder);

          // read dictionary entries
          IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
          Builder<IntsRef> b = new Builder<>(FST.INPUT_TYPE.BYTE4, o);
          readDictionaryFiles(dictionaries, decoder, b);
          words = b.finish();
          aliases = null; // no longer needed
          morphAliases = null; // no longer needed
      } finally {
          IOUtils.closeWhileHandlingException(out, aff1, aff2);
          aff.delete();
      }
  }

From source file:elhuyar.bilakit.Dictionary.java

License:Apache License

private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOException {
      IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
      Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
      IntsRefBuilder scratch = new IntsRefBuilder();
      for (Map.Entry<String, List<Integer>> entry : affixes.entrySet()) {
          Util.toUTF32(entry.getKey(), scratch);
          List<Integer> entries = entry.getValue();
          IntsRef output = new IntsRef(entries.size());
          for (Integer c : entries) {
              output.ints[output.length++] = c;
          }/*w ww .  java  2  s  . c  o  m*/
          builder.add(scratch.get(), output);
      }
      return builder.finish();
  }

From source file:hunspell_stemmer.Dictionary.java

License:Apache License

/**
   * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
   * and dictionary files.//ww w.j  a v a  2  s.  c o m
   * You have to close the provided InputStreams yourself.
   *
   * @param affix InputStream for reading the hunspell affix file (won't be closed).
   * @param dictionaries InputStream for reading the hunspell dictionary files (won't be closed).
   * @throws IOException Can be thrown while reading from the InputStreams
   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
   */
  public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase)
          throws IOException, ParseException {
      this.ignoreCase = ignoreCase;
      this.needsInputCleaning = ignoreCase;
      this.needsOutputCleaning = false; // set if we have an OCONV
      flagLookup.add(new BytesRef()); // no flags -> ord 0

      Path aff = Files.createTempFile(tempDir, "affix", "aff");
      OutputStream out = new BufferedOutputStream(Files.newOutputStream(aff));
      InputStream aff1 = null;
      InputStream aff2 = null;
      boolean success = false;
      try {
          // copy contents of affix stream to temp file
          final byte[] buffer = new byte[1024 * 8];
          int len;
          while ((len = affix.read(buffer)) > 0) {
              out.write(buffer, 0, len);
          }
          out.close();

          // pass 1: get encoding
          aff1 = new BufferedInputStream(Files.newInputStream(aff));
          String encoding = getDictionaryEncoding(aff1);

          // pass 2: parse affixes
          CharsetDecoder decoder = getJavaEncoding(encoding);
          aff2 = new BufferedInputStream(Files.newInputStream(aff));
          readAffixFile(aff2, decoder);

          // read dictionary entries
          IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
          Builder<IntsRef> b = new Builder<>(FST.INPUT_TYPE.BYTE4, o);
          readDictionaryFiles(dictionaries, decoder, b);
          words = b.finish();
          aliases = null; // no longer needed
          morphAliases = null; // no longer needed
          success = true;
      } finally {
          IOUtils.closeWhileHandlingException(out, aff1, aff2);
          if (success) {
              Files.delete(aff);
          } else {
              IOUtils.deleteFilesIgnoringExceptions(aff);
          }
      }
  }

From source file:stemmer.Dictionary.java

License:Apache License

/**
 * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
 * and dictionary files.//from www  .  j  av  a2  s  .  c o m
 * You have to close the provided InputStreams yourself.
 *
 * @param affix InputStream for reading the hunspell affix file (won't be closed).
 * @param dictionaries InputStream for reading the hunspell dictionary files (won't be closed).
 * @throws IOException Can be thrown while reading from the InputStreams
 * @throws ParseException Can be thrown if the content of the files does not meet expected formats
 */
public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase)
        throws IOException, ParseException {
    this.ignoreCase = ignoreCase;
    this.needsInputCleaning = ignoreCase;
    this.needsOutputCleaning = false; // set if we have an OCONV
    flagLookup.add(new BytesRef()); // no flags -> ord 0

    File aff = File.createTempFile("affix", "aff", tempDir);
    OutputStream out = new BufferedOutputStream(new FileOutputStream(aff));
    InputStream aff1 = null;
    InputStream aff2 = null;
    try {
        // copy contents of affix stream to temp file
        final byte[] buffer = new byte[1024 * 8];
        int len;
        while ((len = affix.read(buffer)) > 0) {
            out.write(buffer, 0, len);
        }
        out.close();

        // pass 1: get encoding
        aff1 = new BufferedInputStream(new FileInputStream(aff));
        String encoding = getDictionaryEncoding(aff1);

        // pass 2: parse affixes
        CharsetDecoder decoder = getJavaEncoding(encoding);
        aff2 = new BufferedInputStream(new FileInputStream(aff));
        readAffixFile(aff2, decoder);

        // read dictionary entries
        IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
        Builder<IntsRef> b = new Builder<>(FST.INPUT_TYPE.BYTE4, o);
        readDictionaryFiles(dictionaries, decoder, b);
        words = b.finish();
        aliases = null; // no longer needed
    } finally {
        IOUtils.closeWhileHandlingException(out, aff1, aff2);
        aff.delete();
    }
}

From source file:stemmer.Dictionary.java

License:Apache License

private FST<IntsRef> affixFST(TreeMap<String, List<Character>> affixes) throws IOException {
    IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
    Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);

    IntsRef scratch = new IntsRef();
    for (Map.Entry<String, List<Character>> entry : affixes.entrySet()) {
        Util.toUTF32(entry.getKey(), scratch);
        List<Character> entries = entry.getValue();
        IntsRef output = new IntsRef(entries.size());
        for (Character c : entries) {
            output.ints[output.length++] = c;
        }//from w w w.  j  a v  a2s. com
        builder.add(scratch, output);
    }
    return builder.finish();
}