List of usage examples for edu.stanford.nlp.ie.crf CRFClassifier train
@Override public void train(Collection<List<IN>> objectBankWrapper, DocumentReaderAndWriter<IN> readerAndWriter)
From source file:org.exist.xquery.corenlp.TrainClassifier.java
License:Open Source License
private void trainClassifier(Collection<List<CoreLabel>> documents, final InputDocType inputFormat) { final Properties props = new Properties(); // fixme! - check ocrTrain configurable under other name? //props.setProperty("ocrTrain", "true"); //props.setProperty("serializeTo", tempOutFile.toAbsolutePath().toString()); props.setProperty("useClassFeature", "true"); props.setProperty("useWord", "true"); props.setProperty("useNGrams", "true"); props.setProperty("noMidNGrams", "true"); props.setProperty("useDisjunctive", "true"); props.setProperty("maxNGramLeng", "6"); props.setProperty("usePrev", "true"); props.setProperty("useNext", "true"); props.setProperty("useSequences", "true"); props.setProperty("usePrevSequences", "true"); props.setProperty("maxLeft", "1"); props.setProperty("useTypeSeqs", "true"); props.setProperty("useTypeSeqs2", "true"); props.setProperty("useTypeySequences", "true"); props.setProperty("wordShape", "chris2useLC"); CRFClassifier<CoreLabel> classifier = new CRFClassifier(props); classifier.train(documents, new ColumnDocumentReaderAndWriter()); classifier.serializeClassifier(tempOutFile.toAbsolutePath().toString()); }