Example usage for opennlp.tools.postag POSDictionary serialize

List of usage examples for opennlp.tools.postag POSDictionary serialize

Introduction

In this page you can find the example usage for opennlp.tools.postag POSDictionary serialize.

Prototype

public void serialize(OutputStream out) throws IOException 

Source Link

Document

Writes the POSDictionary to the given OutputStream ; After the serialization is finished the provided OutputStream remains open.

Usage

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * Convert a lemma dictionary (word lemma postag) into a
 * {@code POSTaggerDictionary}. It saves the resulting file with the name of
 * the original dictionary changing the extension to .xml.
 * /*from   www . ja va2  s  . c  om*/
 * @param lemmaDict
 *          the input file
 * @throws IOException
 *           if io problems
 */
public void convertLemmaToPOSDict(File lemmaDict) throws IOException {
    // process one file
    if (lemmaDict.isFile()) {
        List<String> inputLines = Files.readLines(lemmaDict, Charsets.UTF_8);
        File outFile = new File(Files.getNameWithoutExtension(lemmaDict.getCanonicalPath()) + ".xml");
        POSDictionary posTagDict = getPOSTaggerDict(inputLines);
        OutputStream outputStream = new FileOutputStream(outFile);
        posTagDict.serialize(outputStream);
        outputStream.close();
        System.err.println(">> Serialized Apache OpenNLP POSDictionary format to " + outFile);
    } else {
        System.out.println("Please choose a valid file as input.");
        System.exit(1);
    }
}

From source file:es.ehu.si.ixa.pipe.convert.Convert.java

/**
 * Aggregates a lemma dictionary (word lemma postag) into a
 * {@code POSTaggerDictionary}. It saves the resulting file with the name of
 * the original lemma dictionary changing the extension to .xml.
 * //from ww w . j  ava 2 s  .  c  o m
 * @param lemmaDict
 *          the input file
 * @throws IOException
 *           if io problems
 */
public void addLemmaToPOSDict(File lemmaDict, File posTaggerDict) throws IOException {
    // process one file
    if (lemmaDict.isFile() && posTaggerDict.isFile()) {
        InputStream posDictInputStream = new FileInputStream(posTaggerDict);
        POSDictionary posDict = POSDictionary.create(posDictInputStream);
        List<String> inputLines = Files.readLines(lemmaDict, Charsets.UTF_8);
        File outFile = new File(Files.getNameWithoutExtension(lemmaDict.getCanonicalPath()) + ".xml");
        addPOSTaggerDict(inputLines, posDict);
        OutputStream outputStream = new FileOutputStream(outFile);
        posDict.serialize(outputStream);
        outputStream.close();
        System.err.println(">> Serialized Apache OpenNLP POSDictionary format to " + outFile);
    } else {
        System.out.println("Please choose a valid files as input.");
        System.exit(1);
    }
}