Example usage for edu.stanford.nlp.ie AbstractSequenceClassifier train

List of usage examples for edu.stanford.nlp.ie AbstractSequenceClassifier train

Introduction

In this page you can find the example usage for edu.stanford.nlp.ie AbstractSequenceClassifier train.

Prototype

public abstract void train(Collection<List<IN>> docs, DocumentReaderAndWriter<IN> readerAndWriter);

Source Link

Document

Trains a classifier from a Collection of sequences.

Usage

From source file:lv.lumii.morphotagger.MorphoCRF.java

License:Open Source License

/**
 * @param args/*from  ww w .ja v a  2 s  .c om*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws ClassCastException 
 */
public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException {
    String trainfile = "MorphoCRF/train_dev.txt";
    String testfile = "MorphoCRF/test.txt";

    boolean train = false;
    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-train")) {
            train = true;
        }
        if (args[i].equalsIgnoreCase("-dev")) {
            trainfile = "MorphoCRF/train.txt";
            testfile = "MorphoCRF/dev.txt";
        }
        if (args[i].equalsIgnoreCase("-production")) {
            trainfile = "MorphoCRF/all.txt";
            testfile = "MorphoCRF/test.txt";
        }
    }

    String pretrainedModel = "models/lv-morpho-model.ser.gz";
    String classifierOutput = "MorphoCRF/lv-morpho-model.ser.gz";

    //Properties props = StringUtils.propFileToProperties("/Users/pet/Documents/java/PaikensNER/MorfoCRF/lv-PP.prop");
    Properties props = new Properties();

    props.setProperty("useLVMorphoAnalyzer", "true");
    props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_PartOfSpeech);
    //props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_Case);
    props.setProperty("useLVMorphoAnalyzerPOS", "true");
    props.setProperty("useLVMorphoAnalyzerTag", "true");
    props.setProperty("useLVMorphoAnalyzerPrev", "true");
    props.setProperty("useLVMorphoAnalyzerNext", "true");
    props.setProperty("useLVMorphoAnalyzerItemIDs", "true");

    props.setProperty("saveFeatureIndexToDisk", "true");
    props.setProperty("maxLeft", "1");

    props.setProperty("useWord", "true");
    //props.setProperty("use2W", "true");
    //props.setProperty("usePrevSequences", "true");
    //props.setProperty("useClassFeature", "true");
    //props.setProperty("useTypeSeqs2", "true");
    //props.setProperty("useSequences", "true");
    props.setProperty("wordShape", "dan2useLC");
    //props.setProperty("useTypeySequences", "true");
    //props.setProperty("useDisjunctive", "true");      
    props.setProperty("noMidNGrams", "true");
    props.setProperty("maxNGramLeng", "6");
    props.setProperty("useNGrams", "true");
    //props.setProperty("usePrev", "true");
    //props.setProperty("useNext", "true");
    //props.setProperty("useTypeSeqs", "true");

    props.setProperty("readerAndWriter", "edu.stanford.nlp.sequences.LVMorphologyReaderAndWriter");
    props.setProperty("map", "word=0,answer=1,lemma=2");

    AbstractSequenceClassifier<CoreLabel> crf = new CMMClassifier<CoreLabel>(props);
    DocumentReaderAndWriter reader = crf.makeReaderAndWriter();
    if (train) {
        ObjectBank<List<CoreLabel>> documents = crf.makeObjectBankFromFile(trainfile, reader);
        crf.train(documents, reader); //atbilstoi props datiem

        crf.serializeClassifier(classifierOutput);
    } else {
        crf = CMMClassifier.getClassifier(pretrainedModel);
    }

    testData(crf, testfile, reader);
}