Example usage for edu.stanford.nlp.ie.ner CMMClassifier CMMClassifier

List of usage examples for edu.stanford.nlp.ie.ner CMMClassifier CMMClassifier

Introduction

In this page you can find the example usage for edu.stanford.nlp.ie.ner CMMClassifier CMMClassifier.

Prototype

public CMMClassifier(SeqClassifierFlags flags) 

Source Link

Usage

From source file:lv.lumii.morphotagger.MorphoCRF.java

License:Open Source License

/**
 * @param args/*  w ww .  ja  va  2 s .  co  m*/
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws ClassCastException 
 */
public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException {
    String trainfile = "MorphoCRF/train_dev.txt";
    String testfile = "MorphoCRF/test.txt";

    boolean train = false;
    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-train")) {
            train = true;
        }
        if (args[i].equalsIgnoreCase("-dev")) {
            trainfile = "MorphoCRF/train.txt";
            testfile = "MorphoCRF/dev.txt";
        }
        if (args[i].equalsIgnoreCase("-production")) {
            trainfile = "MorphoCRF/all.txt";
            testfile = "MorphoCRF/test.txt";
        }
    }

    String pretrainedModel = "models/lv-morpho-model.ser.gz";
    String classifierOutput = "MorphoCRF/lv-morpho-model.ser.gz";

    //Properties props = StringUtils.propFileToProperties("/Users/pet/Documents/java/PaikensNER/MorfoCRF/lv-PP.prop");
    Properties props = new Properties();

    props.setProperty("useLVMorphoAnalyzer", "true");
    props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_PartOfSpeech);
    //props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_Case);
    props.setProperty("useLVMorphoAnalyzerPOS", "true");
    props.setProperty("useLVMorphoAnalyzerTag", "true");
    props.setProperty("useLVMorphoAnalyzerPrev", "true");
    props.setProperty("useLVMorphoAnalyzerNext", "true");
    props.setProperty("useLVMorphoAnalyzerItemIDs", "true");

    props.setProperty("saveFeatureIndexToDisk", "true");
    props.setProperty("maxLeft", "1");

    props.setProperty("useWord", "true");
    //props.setProperty("use2W", "true");
    //props.setProperty("usePrevSequences", "true");
    //props.setProperty("useClassFeature", "true");
    //props.setProperty("useTypeSeqs2", "true");
    //props.setProperty("useSequences", "true");
    props.setProperty("wordShape", "dan2useLC");
    //props.setProperty("useTypeySequences", "true");
    //props.setProperty("useDisjunctive", "true");      
    props.setProperty("noMidNGrams", "true");
    props.setProperty("maxNGramLeng", "6");
    props.setProperty("useNGrams", "true");
    //props.setProperty("usePrev", "true");
    //props.setProperty("useNext", "true");
    //props.setProperty("useTypeSeqs", "true");

    props.setProperty("readerAndWriter", "edu.stanford.nlp.sequences.LVMorphologyReaderAndWriter");
    props.setProperty("map", "word=0,answer=1,lemma=2");

    AbstractSequenceClassifier<CoreLabel> crf = new CMMClassifier<CoreLabel>(props);
    DocumentReaderAndWriter reader = crf.makeReaderAndWriter();
    if (train) {
        ObjectBank<List<CoreLabel>> documents = crf.makeObjectBankFromFile(trainfile, reader);
        crf.train(documents, reader); //atbilstoi props datiem

        crf.serializeClassifier(classifierOutput);
    } else {
        crf = CMMClassifier.getClassifier(pretrainedModel);
    }

    testData(crf, testfile, reader);
}