Example usage for edu.stanford.nlp.ie AbstractSequenceClassifier makeReaderAndWriter

List of usage examples for edu.stanford.nlp.ie AbstractSequenceClassifier makeReaderAndWriter

Introduction

In this page you can find the example usage for edu.stanford.nlp.ie AbstractSequenceClassifier makeReaderAndWriter.

Prototype

public DocumentReaderAndWriter<IN> makeReaderAndWriter() 

Source Link

Document

Makes a DocumentReaderAndWriter based on the flags the CRFClassifier was constructed with.

Usage

From source file:lv.lumii.morphotagger.MorphoCRF.java

License:Open Source License

/**
 * @param args//w w w  .  j av a2 s .c o m
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws ClassCastException 
 */
public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException {
    String trainfile = "MorphoCRF/train_dev.txt";
    String testfile = "MorphoCRF/test.txt";

    boolean train = false;
    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-train")) {
            train = true;
        }
        if (args[i].equalsIgnoreCase("-dev")) {
            trainfile = "MorphoCRF/train.txt";
            testfile = "MorphoCRF/dev.txt";
        }
        if (args[i].equalsIgnoreCase("-production")) {
            trainfile = "MorphoCRF/all.txt";
            testfile = "MorphoCRF/test.txt";
        }
    }

    String pretrainedModel = "models/lv-morpho-model.ser.gz";
    String classifierOutput = "MorphoCRF/lv-morpho-model.ser.gz";

    //Properties props = StringUtils.propFileToProperties("/Users/pet/Documents/java/PaikensNER/MorfoCRF/lv-PP.prop");
    Properties props = new Properties();

    props.setProperty("useLVMorphoAnalyzer", "true");
    props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_PartOfSpeech);
    //props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_Case);
    props.setProperty("useLVMorphoAnalyzerPOS", "true");
    props.setProperty("useLVMorphoAnalyzerTag", "true");
    props.setProperty("useLVMorphoAnalyzerPrev", "true");
    props.setProperty("useLVMorphoAnalyzerNext", "true");
    props.setProperty("useLVMorphoAnalyzerItemIDs", "true");

    props.setProperty("saveFeatureIndexToDisk", "true");
    props.setProperty("maxLeft", "1");

    props.setProperty("useWord", "true");
    //props.setProperty("use2W", "true");
    //props.setProperty("usePrevSequences", "true");
    //props.setProperty("useClassFeature", "true");
    //props.setProperty("useTypeSeqs2", "true");
    //props.setProperty("useSequences", "true");
    props.setProperty("wordShape", "dan2useLC");
    //props.setProperty("useTypeySequences", "true");
    //props.setProperty("useDisjunctive", "true");      
    props.setProperty("noMidNGrams", "true");
    props.setProperty("maxNGramLeng", "6");
    props.setProperty("useNGrams", "true");
    //props.setProperty("usePrev", "true");
    //props.setProperty("useNext", "true");
    //props.setProperty("useTypeSeqs", "true");

    props.setProperty("readerAndWriter", "edu.stanford.nlp.sequences.LVMorphologyReaderAndWriter");
    props.setProperty("map", "word=0,answer=1,lemma=2");

    AbstractSequenceClassifier<CoreLabel> crf = new CMMClassifier<CoreLabel>(props);
    DocumentReaderAndWriter reader = crf.makeReaderAndWriter();
    if (train) {
        ObjectBank<List<CoreLabel>> documents = crf.makeObjectBankFromFile(trainfile, reader);
        crf.train(documents, reader); //atbilstoi props datiem

        crf.serializeClassifier(classifierOutput);
    } else {
        crf = CMMClassifier.getClassifier(pretrainedModel);
    }

    testData(crf, testfile, reader);
}