Example usage for edu.stanford.nlp.ie.regexp RegexNERSequenceClassifier RegexNERSequenceClassifier

List of usage examples for edu.stanford.nlp.ie.regexp RegexNERSequenceClassifier RegexNERSequenceClassifier

Introduction

In this page you can find the example usage for edu.stanford.nlp.ie.regexp RegexNERSequenceClassifier RegexNERSequenceClassifier.

Prototype

public RegexNERSequenceClassifier(String mapping, boolean ignoreCase, boolean overwriteMyLabels) 

Source Link

Usage

From source file:lv.lumii.ner.analysis.ClassifierComparator.java

License:Open Source License

public static void main(String[] args) throws ClassCastException, ClassNotFoundException, IOException {
    Properties props = new Properties();
    props = StringUtils.argsToProperties(args);
    inputFile = props.getProperty("inputFile", null);
    String propString = "" + "-loadClassifier lv-ner-model.ser.gz"
    //+ " -whiteList Gazetteer/DB_persons.txt,Gazetteer/DB_locations.txt,Gazetteer/DB_professions.txt,Gazetteer/Laura_partijas_lem.txt,Gazetteer/AZ_valsts_parvaldes_struktura_lem.txt,D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/stats/DB_organizations2.txt"
    //+ "-loadClassifier D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-1.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-2.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-3.ser.gz"
    //";D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-4.ser.gz;D:/LUMII/workspace/LVTagger/ner_models/26-11-2013_011138_cross/lv-ner-model-5.ser.gz"
            + "";
    //props = StringUtils.argsToProperties(propString.split(" "));
    System.err.println(props);//from   w  w w.  j a  v  a  2s .co  m
    ClassifierComparator cc = new ClassifierComparator(props);

    for (Integer i = 0; i < 10; i++) {
        if (props.containsKey("classifier" + i.toString())) {
            System.err.println("classifier" + i.toString());
            String[] parts = props.getProperty("classifier" + i.toString()).split("\\s*\\|\\s*");
            List<AbstractSequenceClassifier<CoreLabel>> nccl = new ArrayList<>();
            for (String part : parts) {
                System.err.println("\t" + part);
                String[] items = part.split("\\s*=\\s*");
                System.err.println("\t" + part);
                if (items[0].equalsIgnoreCase("whiteList")) {
                    System.err.println("\twhiteList" + items[1]);
                    nccl.add(new ListNERSequenceClassifier(items[1], true, true, true));
                }
                if (items[0].equalsIgnoreCase("loadClassifier")) {
                    System.err.println("\tloadClassifier" + items[1]);
                    nccl.add(CRFClassifier.getClassifier(items[1], props));
                }
                if (items[0].equalsIgnoreCase("regexList")) {
                    System.err.println("\tregexListt" + items[1]);
                    nccl.add(new RegexNERSequenceClassifier(items[1], true, true));
                }
            }
            cc.addClassifier(nccl);
        }
    }

    if (props.containsKey("whiteList")) {
        String whiteListString = props.getProperty("whiteList");
        String whiteLists[] = whiteListString.split(";");
        for (String whiteList : whiteLists) {
            whiteList = whiteList.trim();
            cc.addClassifier(new ListNERSequenceClassifier(whiteList, true, true, true));
        }
    }

    if (props.containsKey("loadClassifier")) {
        String loadClassifierString = props.getProperty("loadClassifier");
        String loadClassifiers[] = loadClassifierString.split(";");
        for (String loadClassifier : loadClassifiers) {
            loadClassifier = loadClassifier.trim();
            cc.addClassifier(CRFClassifier.getClassifier(loadClassifier, props));
        }
    }

    if (props.containsKey("regexList")) {
        String regexListString = props.getProperty("regexList");
        String regexLists[] = regexListString.split(";");
        for (String regexList : regexLists) {
            regexList = regexList.trim();
            cc.addClassifier(new RegexNERSequenceClassifier(regexList, true, true));
        }
    }

    LVCoNLLDocumentReaderAndWriter reader = new LVCoNLLDocumentReaderAndWriter();
    //List<CoreLabel> doc = reader.readCONLL("D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/FB1A480C-5109-4D34-AFDF-FD4B9CC6E790.conll");
    //inputFile = "D:/LUMII/ner/improve_9.1.2014/leta-2014-jan/export/leta-2014-jan-test.tab";

    List<CoreLabel> doc = reader.readCONLL(inputFile);

    cc.classify(doc);

    reader.outputType = LVCoNLLDocumentReaderAndWriter.outputTypes.COMPARE;
    //reader.printAnswers(cc.document, new PrintWriter(System.out));

    cc.entity_stats().preview();
}

From source file:lv.lumii.ner.NerPipe.java

License:Open Source License

@SuppressWarnings("unchecked")
public NerPipe(Properties props) throws ClassCastException, ClassNotFoundException, IOException {
    this.props = props;
    initializeFromProperties();/*from w w  w  . j av a 2s.co  m*/

    List<AbstractSequenceClassifier<CoreLabel>> classifiers = new ArrayList<>();

    if (props.containsKey("whiteListCasedLemmas"))
        classifiers.add(
                new ListNERSequenceClassifier(props.getProperty("whiteListUncasedLemmas"), false, true, true));
    if (props.containsKey("whiteListUncasedWords"))
        classifiers.add(
                new ListNERSequenceClassifier(props.getProperty("whiteListUncasedWords"), true, false, true));
    if (props.containsKey("whiteListCasedWords"))
        classifiers.add(
                new ListNERSequenceClassifier(props.getProperty("whiteListCasedWords"), false, false, true));
    if (defaultCrfClassifier != null)
        classifiers.add(CRFClassifier.getClassifier(defaultCrfClassifier, props));
    if (props.containsKey("regexList"))
        classifiers.add(new RegexNERSequenceClassifier(props.getProperty("regexList"), true, true));

    classifier = new NERClassifierCombiner(classifiers);
    defaultReaderWriter = new LVCoNLLDocumentReaderAndWriter();
    defaultReaderWriter.init(classifier.flags);
}

From source file:lv.pipe.NerTagger.java

License:Open Source License

@Override
public void init(Properties props) {
    properties = props;/* w  w  w.j  ava 2s .com*/
    List<AbstractSequenceClassifier<CoreLabel>> classifiers = new ArrayList<>();
    if (props.containsKey("whiteListCasedLemmas"))
        classifiers.add(
                new ListNERSequenceClassifier(props.getProperty("whiteListCasedLemmas"), false, true, true));
    if (props.containsKey("whiteListUncasedWords"))
        classifiers.add(
                new ListNERSequenceClassifier(props.getProperty("whiteListUncasedWords"), true, false, true));
    if (props.containsKey("whiteListCasedWords"))
        classifiers.add(
                new ListNERSequenceClassifier(props.getProperty("whiteListCasedWords"), false, false, true));
    if (props.containsKey("loadClassifier")) {
        Properties crfProps = new Properties();
        for (@SuppressWarnings("rawtypes")
        Enumeration propertyNames = props.propertyNames(); propertyNames.hasMoreElements();) {
            Object key = propertyNames.nextElement();
            if (key.equals("whiteListCasedLemmas"))
                continue;
            if (key.equals("whiteListUncasedWords"))
                continue;
            if (key.equals("whiteListCasedWords"))
                continue;
            if (key.equals("regexList"))
                continue;
            crfProps.put(key, props.get(key));
        }
        System.err.println(crfProps);
        try {
            classifiers.add(CRFClassifier.getClassifier(crfProps.getProperty("loadClassifier"), crfProps));
        } catch (ClassCastException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    if (props.containsKey("regexList"))
        classifiers.add(new RegexNERSequenceClassifier(props.getProperty("regexList"), true, true));
    try {
        nerClassifier = new NERClassifierCombiner(classifiers);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
}