List of usage examples for edu.stanford.nlp.ie.ner CMMClassifier CMMClassifier
public CMMClassifier(SeqClassifierFlags flags)
From source file:lv.lumii.morphotagger.MorphoCRF.java
License:Open Source License
/** * @param args/* w ww . ja va 2 s . co m*/ * @throws IOException * @throws ClassNotFoundException * @throws ClassCastException */ public static void main(String[] args) throws IOException, ClassCastException, ClassNotFoundException { String trainfile = "MorphoCRF/train_dev.txt"; String testfile = "MorphoCRF/test.txt"; boolean train = false; for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-train")) { train = true; } if (args[i].equalsIgnoreCase("-dev")) { trainfile = "MorphoCRF/train.txt"; testfile = "MorphoCRF/dev.txt"; } if (args[i].equalsIgnoreCase("-production")) { trainfile = "MorphoCRF/all.txt"; testfile = "MorphoCRF/test.txt"; } } String pretrainedModel = "models/lv-morpho-model.ser.gz"; String classifierOutput = "MorphoCRF/lv-morpho-model.ser.gz"; //Properties props = StringUtils.propFileToProperties("/Users/pet/Documents/java/PaikensNER/MorfoCRF/lv-PP.prop"); Properties props = new Properties(); props.setProperty("useLVMorphoAnalyzer", "true"); props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_PartOfSpeech); //props.setProperty("LVMorphoAnalyzerTag", AttributeNames.i_Case); props.setProperty("useLVMorphoAnalyzerPOS", "true"); props.setProperty("useLVMorphoAnalyzerTag", "true"); props.setProperty("useLVMorphoAnalyzerPrev", "true"); props.setProperty("useLVMorphoAnalyzerNext", "true"); props.setProperty("useLVMorphoAnalyzerItemIDs", "true"); props.setProperty("saveFeatureIndexToDisk", "true"); props.setProperty("maxLeft", "1"); props.setProperty("useWord", "true"); //props.setProperty("use2W", "true"); //props.setProperty("usePrevSequences", "true"); //props.setProperty("useClassFeature", "true"); //props.setProperty("useTypeSeqs2", "true"); //props.setProperty("useSequences", "true"); props.setProperty("wordShape", "dan2useLC"); //props.setProperty("useTypeySequences", "true"); //props.setProperty("useDisjunctive", "true"); props.setProperty("noMidNGrams", "true"); props.setProperty("maxNGramLeng", "6"); props.setProperty("useNGrams", "true"); //props.setProperty("usePrev", "true"); //props.setProperty("useNext", "true"); //props.setProperty("useTypeSeqs", "true"); props.setProperty("readerAndWriter", "edu.stanford.nlp.sequences.LVMorphologyReaderAndWriter"); props.setProperty("map", "word=0,answer=1,lemma=2"); AbstractSequenceClassifier<CoreLabel> crf = new CMMClassifier<CoreLabel>(props); DocumentReaderAndWriter reader = crf.makeReaderAndWriter(); if (train) { ObjectBank<List<CoreLabel>> documents = crf.makeObjectBankFromFile(trainfile, reader); crf.train(documents, reader); //atbilstoi props datiem crf.serializeClassifier(classifierOutput); } else { crf = CMMClassifier.getClassifier(pretrainedModel); } testData(crf, testfile, reader); }