List of usage examples for edu.stanford.nlp.ie.crf CRFClassifier getClassifier
public static <INN extends CoreMap> CRFClassifier<INN> getClassifier(ObjectInputStream ois, Properties props) throws IOException, ClassCastException, ClassNotFoundException
From source file:lv.lumii.ner.analysis.ClassifierComparator.java
License:Open Source License
public static void main(String[] args) throws ClassCastException, ClassNotFoundException, IOException { Properties props = new Properties(); props = StringUtils.argsToProperties(args); inputFile = props.getProperty("inputFile", null); String propString = "" + "-loadClassifier lv-ner-model.ser.gz" //+ " -whiteList Gazetteer/DB_persons.txt,Gazetteer/DB_locations.txt,Gazetteer/DB_professions.txt,Gazetteer/Laura_partijas_lem.txt,Gazetteer/AZ_valsts_parvaldes_struktura_lem.txt,D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/stats/DB_organizations2.txt" //+ "-loadClassifier D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-1.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-2.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-3.ser.gz" //";D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-4.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-5.ser.gz" + ""; //props = StringUtils.argsToProperties(propString.split(" ")); System.err.println(props);//from ww w. ja v a2s. c om ClassifierComparator cc = new ClassifierComparator(props); for (Integer i = 0; i < 10; i++) { if (props.containsKey("classifier" + i.toString())) { System.err.println("classifier" + i.toString()); String[] parts = props.getProperty("classifier" + i.toString()).split("\\s*\\|\\s*"); List<AbstractSequenceClassifier<CoreLabel>> nccl = new ArrayList<>(); for (String part : parts) { System.err.println("\t" + part); String[] items = part.split("\\s*=\\s*"); System.err.println("\t" + part); if (items[0].equalsIgnoreCase("whiteList")) { System.err.println("\twhiteList" + items[1]); nccl.add(new ListNERSequenceClassifier(items[1], true, true, true)); } if (items[0].equalsIgnoreCase("loadClassifier")) { System.err.println("\tloadClassifier" + items[1]); nccl.add(CRFClassifier.getClassifier(items[1], props)); } if (items[0].equalsIgnoreCase("regexList")) { System.err.println("\tregexListt" + items[1]); nccl.add(new RegexNERSequenceClassifier(items[1], true, true)); } } cc.addClassifier(nccl); } } if (props.containsKey("whiteList")) { String whiteListString = props.getProperty("whiteList"); String whiteLists[] = whiteListString.split(";"); for (String whiteList : whiteLists) { whiteList = whiteList.trim(); cc.addClassifier(new ListNERSequenceClassifier(whiteList, true, true, true)); } } if (props.containsKey("loadClassifier")) { String loadClassifierString = props.getProperty("loadClassifier"); String loadClassifiers[] = loadClassifierString.split(";"); for (String loadClassifier : loadClassifiers) { loadClassifier = loadClassifier.trim(); cc.addClassifier(CRFClassifier.getClassifier(loadClassifier, props)); } } if (props.containsKey("regexList")) { String regexListString = props.getProperty("regexList"); String regexLists[] = regexListString.split(";"); for (String regexList : regexLists) { regexList = regexList.trim(); cc.addClassifier(new RegexNERSequenceClassifier(regexList, true, true)); } } LVCoNLLDocumentReaderAndWriter reader = new LVCoNLLDocumentReaderAndWriter(); //List<CoreLabel> doc = reader.readCONLL("D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/FB1A480C-5109-4D34-AFDF-FD4B9CC6E790.conll"); //inputFile = "D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/leta-2014-jan-test.tab"; List<CoreLabel> doc = reader.readCONLL(inputFile); cc.classify(doc); reader.outputType = LVCoNLLDocumentReaderAndWriter.outputTypes.COMPARE; //reader.printAnswers(cc.document, new PrintWriter(System.out)); cc.entity_stats().preview(); }
From source file:lv.lumii.ner.NerPipe.java
License:Open Source License
@SuppressWarnings("unchecked") public NerPipe(Properties props) throws ClassCastException, ClassNotFoundException, IOException { this.props = props; initializeFromProperties();//from w w w. jav a 2 s .c o m List<AbstractSequenceClassifier<CoreLabel>> classifiers = new ArrayList<>(); if (props.containsKey("whiteListCasedLemmas")) classifiers.add( new ListNERSequenceClassifier(props.getProperty("whiteListUncasedLemmas"), false, true, true)); if (props.containsKey("whiteListUncasedWords")) classifiers.add( new ListNERSequenceClassifier(props.getProperty("whiteListUncasedWords"), true, false, true)); if (props.containsKey("whiteListCasedWords")) classifiers.add( new ListNERSequenceClassifier(props.getProperty("whiteListCasedWords"), false, false, true)); if (defaultCrfClassifier != null) classifiers.add(CRFClassifier.getClassifier(defaultCrfClassifier, props)); if (props.containsKey("regexList")) classifiers.add(new RegexNERSequenceClassifier(props.getProperty("regexList"), true, true)); classifier = new NERClassifierCombiner(classifiers); defaultReaderWriter = new LVCoNLLDocumentReaderAndWriter(); defaultReaderWriter.init(classifier.flags); }
From source file:lv.pipe.NerTagger.java
License:Open Source License
@Override public void init(Properties props) { properties = props;// ww w.j a va 2 s . c o m List<AbstractSequenceClassifier<CoreLabel>> classifiers = new ArrayList<>(); if (props.containsKey("whiteListCasedLemmas")) classifiers.add( new ListNERSequenceClassifier(props.getProperty("whiteListCasedLemmas"), false, true, true)); if (props.containsKey("whiteListUncasedWords")) classifiers.add( new ListNERSequenceClassifier(props.getProperty("whiteListUncasedWords"), true, false, true)); if (props.containsKey("whiteListCasedWords")) classifiers.add( new ListNERSequenceClassifier(props.getProperty("whiteListCasedWords"), false, false, true)); if (props.containsKey("loadClassifier")) { Properties crfProps = new Properties(); for (@SuppressWarnings("rawtypes") Enumeration propertyNames = props.propertyNames(); propertyNames.hasMoreElements();) { Object key = propertyNames.nextElement(); if (key.equals("whiteListCasedLemmas")) continue; if (key.equals("whiteListUncasedWords")) continue; if (key.equals("whiteListCasedWords")) continue; if (key.equals("regexList")) continue; crfProps.put(key, props.get(key)); } System.err.println(crfProps); try { classifiers.add(CRFClassifier.getClassifier(crfProps.getProperty("loadClassifier"), crfProps)); } catch (ClassCastException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } if (props.containsKey("regexList")) classifiers.add(new RegexNERSequenceClassifier(props.getProperty("regexList"), true, true)); try { nerClassifier = new NERClassifierCombiner(classifiers); } catch (FileNotFoundException e) { e.printStackTrace(); } }