Example usage for edu.stanford.nlp.ie AbstractSequenceClassifier train

Introduction

In this page you can find the example usage for edu.stanford.nlp.ie AbstractSequenceClassifier train.

Prototype

public abstract void train(Collection<List<IN>> docs, DocumentReaderAndWriter<IN> readerAndWriter);

Source Link

Document

Trains a classifier from a Collection of sequences.

Usage

From source file:lv.lumii.morphotagger.MorphoCRF.java

License:Open Source License

/**
 * @param args/*from  ww w .ja v a  2 s  .c om*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws ClassCastException 
 */
public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException {
    String trainfile = "MorphoCRF/train_dev.txt";
    String testfile = "MorphoCRF/test.txt";

    boolean train = false;
    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-train")) {
            train = true;
        }
        if (args[i].equalsIgnoreCase("-dev")) {
            trainfile = "MorphoCRF/train.txt";
            testfile = "MorphoCRF/dev.txt";
        }
        if (args[i].equalsIgnoreCase("-production")) {
            trainfile = "MorphoCRF/all.txt";
            testfile = "MorphoCRF/test.txt";
        }
    }

    String pretrainedModel = "models/lv-morpho-model.ser.gz";
    String classifierOutput = "MorphoCRF/lv-morpho-model.ser.gz";

    //Properties props = StringUtils.propFileToProperties("/Users/pet/Documents/java/PaikensNER/MorfoCRF/lv-PP.prop");
    Properties props = new Properties();

    props.setProperty("useLVMorphoAnalyzer", "true");
    props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_PartOfSpeech);
    //props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_Case);
    props.setProperty("useLVMorphoAnalyzerPOS", "true");
    props.setProperty("useLVMorphoAnalyzerTag", "true");
    props.setProperty("useLVMorphoAnalyzerPrev", "true");
    props.setProperty("useLVMorphoAnalyzerNext", "true");
    props.setProperty("useLVMorphoAnalyzerItemIDs", "true");

    props.setProperty("saveFeatureIndexToDisk", "true");
    props.setProperty("maxLeft", "1");

    props.setProperty("useWord", "true");
    //props.setProperty("use2W", "true");
    //props.setProperty("usePrevSequences", "true");
    //props.setProperty("useClassFeature", "true");
    //props.setProperty("useTypeSeqs2", "true");
    //props.setProperty("useSequences", "true");
    props.setProperty("wordShape", "dan2useLC");
    //props.setProperty("useTypeySequences", "true");
    //props.setProperty("useDisjunctive", "true");      
    props.setProperty("noMidNGrams", "true");
    props.setProperty("maxNGramLeng", "6");
    props.setProperty("useNGrams", "true");
    //props.setProperty("usePrev", "true");
    //props.setProperty("useNext", "true");
    //props.setProperty("useTypeSeqs", "true");

    props.setProperty("readerAndWriter", "edu.stanford.nlp.sequences.LVMorphologyReaderAndWriter");
    props.setProperty("map", "word=0,answer=1,lemma=2");

    AbstractSequenceClassifier<CoreLabel> crf = new CMMClassifier<CoreLabel>(props);
    DocumentReaderAndWriter reader = crf.makeReaderAndWriter();
    if (train) {
        ObjectBank<List<CoreLabel>> documents = crf.makeObjectBankFromFile(trainfile, reader);
        crf.train(documents, reader); //atbilstoi props datiem

        crf.serializeClassifier(classifierOutput);
    } else {
        crf = CMMClassifier.getClassifier(pretrainedModel);
    }

    testData(crf, testfile, reader);
}