Example usage for edu.stanford.nlp.sequences ColumnDocumentReaderAndWriter ColumnDocumentReaderAndWriter

List of usage examples for edu.stanford.nlp.sequences ColumnDocumentReaderAndWriter ColumnDocumentReaderAndWriter

Introduction

In this page you can find the example usage for edu.stanford.nlp.sequences ColumnDocumentReaderAndWriter ColumnDocumentReaderAndWriter.

Prototype

ColumnDocumentReaderAndWriter

Source Link

Usage

From source file:org.exist.xquery.corenlp.TrainClassifier.java

License:Open Source License

private void trainClassifier(Collection<List<CoreLabel>> documents, final InputDocType inputFormat) {
    final Properties props = new Properties();
    // fixme! - check ocrTrain configurable under other name?
    //props.setProperty("ocrTrain", "true");
    //props.setProperty("serializeTo", tempOutFile.toAbsolutePath().toString());
    props.setProperty("useClassFeature", "true");
    props.setProperty("useWord", "true");
    props.setProperty("useNGrams", "true");
    props.setProperty("noMidNGrams", "true");
    props.setProperty("useDisjunctive", "true");
    props.setProperty("maxNGramLeng", "6");
    props.setProperty("usePrev", "true");
    props.setProperty("useNext", "true");
    props.setProperty("useSequences", "true");
    props.setProperty("usePrevSequences", "true");
    props.setProperty("maxLeft", "1");
    props.setProperty("useTypeSeqs", "true");
    props.setProperty("useTypeSeqs2", "true");
    props.setProperty("useTypeySequences", "true");
    props.setProperty("wordShape", "chris2useLC");

    CRFClassifier<CoreLabel> classifier = new CRFClassifier(props);
    classifier.train(documents, new ColumnDocumentReaderAndWriter());
    classifier.serializeClassifier(tempOutFile.toAbsolutePath().toString());
}