Example usage for edu.stanford.nlp.pipeline POSTaggerAnnotator POSTaggerAnnotator

List of usage examples for edu.stanford.nlp.pipeline POSTaggerAnnotator POSTaggerAnnotator

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline POSTaggerAnnotator POSTaggerAnnotator.

Prototype

public POSTaggerAnnotator(String annotatorName, Properties props) 

Source Link

Usage

From source file:edu.illinois.cs.cogcomp.comma.annotators.PreProcessor.java

License:Open Source License

public PreProcessor() throws Exception {
    System.out.println("initializing 1 ");
    // Initialise AnnotatorServices with default configurations
    Map<String, String> nonDefaultValues = new HashMap<>();
    if (CommaProperties.getInstance().useCurator()) {
        nonDefaultValues.put(CuratorConfigurator.RESPECT_TOKENIZATION.key, Configurator.TRUE);
        nonDefaultValues.put(CuratorConfigurator.CURATOR_FORCE_UPDATE.key, Configurator.FALSE);
        ResourceManager curatorConfig = (new CuratorConfigurator()).getConfig(nonDefaultValues);
        annotatorService = CuratorFactory.buildCuratorClient(curatorConfig);
    } else {/*from   ww  w.  j a  va  2  s  .  c o  m*/
        ResourceManager rm = new Stanford331Configurator().getDefaultConfig();
        String timePerSentence = Stanford331Configurator.STFRD_TIME_PER_SENTENCE.value;
        String maxParseSentenceLength = Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH.value;
        boolean throwExceptionOnSentenceLengthCheck = rm
                .getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);

        System.out.println("initializing 2 ");

        this.pos = new POSAnnotator();
        this.nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
        this.shallowParser = new ChunkerAnnotator();

        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", maxParseSentenceLength);
        stanfordProps.put("parse.maxtime", timePerSentence);
        // per sentence? could be per
        // document but no idea from
        // stanford javadoc
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        int maxLength = Integer.parseInt(maxParseSentenceLength);
        this.parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength,
                throwExceptionOnSentenceLengthCheck);
    }
}

From source file:edu.illinois.cs.cogcomp.pipeline.main.PipelineFactory.java

License:Open Source License

/**
 * instantiate a set of annotators for use in an AnnotatorService object by default, will use
 * lazy initialization where possible -- change this behavior with the
 * {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
 * /*from  w  w  w.j av  a 2s.  co m*/
 * @param nonDefaultRm ResourceManager with all non-default values for Annotators
 * @return a Map from annotator view name to annotator
 */
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
    ResourceManager rm = new PipelineConfigurator()
            .getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
    String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key,
            PipelineConfigurator.TRUE);

    Map<String, Annotator> viewGenerators = new HashMap<>();

    if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
        POSAnnotator pos = new POSAnnotator();
        viewGenerators.put(pos.getViewName(), pos);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
        IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
        viewGenerators.put(lem.getViewName(), lem);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
        viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
        NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
        viewGenerators.put(nerConll.getViewName(), nerConll);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
        NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
        viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
        DepAnnotator dep = new DepAnnotator();
        viewGenerators.put(dep.getViewName(), dep);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)
            || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", maxParseSentenceLength);
        stanfordProps.put("parse.maxtime", timePerSentence); // per sentence? could be per
                                                             // document but no idea from
                                                             // stanford javadoc
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        int maxLength = Integer.parseInt(maxParseSentenceLength);
        boolean throwExceptionOnSentenceLengthCheck = rm
                .getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);

        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
            StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength,
                    throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(depParser.getViewName(), depParser);
        }
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
            StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength,
                    throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(parser.getViewName(), parser);
        }
    }

    if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
        Properties verbProps = new Properties();
        String verbType = SRLType.Verb.name();
        verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
        ResourceManager verbRm = new ResourceManager(verbProps);
        rm = Configurator.mergeProperties(rm, verbRm);
        try {
            SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
        } catch (Exception e) {
            throw new IOException("SRL verb cannot init: " + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
        Properties nomProps = new Properties();
        String nomType = SRLType.Nom.name();
        nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
        ResourceManager nomRm = new ResourceManager(nomProps);
        rm = Configurator.mergeProperties(rm, nomRm);

        try {
            SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            // note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
            // initialized yet
            viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
            // viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
            // ViewNames.SRL_NOM,
            // useLazyInitialization, rm));
        } catch (Exception e) {
            throw new IOException("SRL nom cannot init .." + e.getMessage());
        }
    }

    if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
        Quantifier quantifierAnnotator = new Quantifier();
        viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
    }

    if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) {
        for (Language lang : TransliterationAnnotator.supportedLanguages) {
            TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang);
            viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator);
        }
    }

    if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
        PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
        viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
    }

    if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
        CommaLabeler commaLabeler = new CommaLabeler();
        viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
    }

    if (rm.getBoolean(PipelineConfigurator.USE_VERB_SENSE)) {
        VerbSenseAnnotator verbSense = new VerbSenseAnnotator();
        viewGenerators.put(ViewNames.VERB_SENSE, verbSense);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_MENTION)) {
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
        viewGenerators.put(ViewNames.MENTION, mentionAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_RELATION)) {
        viewGenerators.put(ViewNames.RELATION, new RelationAnnotator(true));
    }
    if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)) {
        Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
        TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps));
        viewGenerators.put(ViewNames.TIMEX3, tca);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_ESA)) {
        rm = new ESADatalessConfigurator().getConfig(nonDefaultRm);
        ESADatalessAnnotator esaDataless = new ESADatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_ESA, esaDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_W2V)) {
        rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
        W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) {
        QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator();
        viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper);
    }

    return viewGenerators;
}

From source file:org.cogcomp.re.ACEMentionReader.java

License:Open Source License

public ACEMentionReader(String file, String type) {
    readType = type;/*from   w  w w  .ja v  a  2s .co  m*/
    relations_mono = new ArrayList<>();
    relations_bi = new ArrayList<>();

    try {
        ACEReader reader = new ACEReaderWithTrueCaseFixer(file, new String[] { "bn", "nw" }, false);
        POSAnnotator pos_annotator = new POSAnnotator();
        ChunkerAnnotator chunker = new ChunkerAnnotator(true);
        chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.6, false);
        Gazetteers gazetteers = GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordNet = WordNetManager.getInstance();
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);

        for (TextAnnotation ta : reader) {
            if (ta.getId().equals("bn\\CNN_ENG_20030424_070008.15.apf.xml")) {
                continue;
            }
            ta.addView(pos_annotator);
            stanfordDepHandler.addView(ta);
            chunker.addView(ta);

            View entityView = ta.getView(ViewNames.MENTION_ACE);
            View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
            for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
                Constituent c = co.cloneForNewView("RE_ANNOTATED");
                for (String s : co.getAttributeKeys()) {
                    c.addAttribute(s, co.getAttribute(s));
                }
                c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordNet, c));
                c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordNet, c));
                annotatedTokenView.addConstituent(c);
            }
            ta.addView("RE_ANNOTATED", annotatedTokenView);

            List<Relation> existRelations = entityView.getRelations();
            for (int i = 0; i < ta.getNumberOfSentences(); i++) {
                Sentence curSentence = ta.getSentence(i);
                List<Constituent> cins = entityView.getConstituentsCoveringSpan(curSentence.getStartSpan(),
                        curSentence.getEndSpan());
                for (int j = 0; j < cins.size(); j++) {
                    for (int k = j + 1; k < cins.size(); k++) {
                        Constituent firstArg = cins.get(j);
                        Constituent secondArg = cins.get(k);
                        Constituent firstArgHead = RelationFeatureExtractor
                                .getEntityHeadForConstituent(firstArg, firstArg.getTextAnnotation(), "A");
                        Constituent secondArgHead = RelationFeatureExtractor
                                .getEntityHeadForConstituent(secondArg, secondArg.getTextAnnotation(), "A");
                        firstArg.addAttribute("GAZ",
                                ((FlatGazetteers) gazetteers).annotatePhrase(firstArgHead));
                        secondArg.addAttribute("GAZ",
                                ((FlatGazetteers) gazetteers).annotatePhrase(secondArgHead));

                        boolean found_as_source = false;
                        boolean found_as_target = false;
                        for (Relation r : existRelations) {
                            if (r.getSource().getStartSpan() == firstArg.getStartSpan()
                                    && r.getSource().getEndSpan() == firstArg.getEndSpan()
                                    && r.getTarget().getStartSpan() == secondArg.getStartSpan()
                                    && r.getTarget().getEndSpan() == secondArg.getEndSpan()) {
                                relations_mono.add(r);
                                found_as_source = true;
                                String opTagFine = getOppoName(r.getAttribute("RelationSubtype"));
                                String opTagCoarse = ACERelationTester.getCoarseType(opTagFine);
                                Relation opdir = new Relation(opTagCoarse, secondArg, firstArg, 1.0f);
                                opdir.addAttribute("RelationSubtype", opTagFine);
                                opdir.addAttribute("RelationType", opTagCoarse);
                                relations_bi.add(r);
                                relations_bi.add(opdir);
                                break;
                            }
                            if (r.getTarget().getStartSpan() == firstArg.getStartSpan()
                                    && r.getTarget().getEndSpan() == firstArg.getEndSpan()
                                    && r.getSource().getStartSpan() == secondArg.getStartSpan()
                                    && r.getSource().getEndSpan() == secondArg.getEndSpan()) {
                                relations_mono.add(r);
                                found_as_target = true;
                                String opTagFine = getOppoName(r.getAttribute("RelationSubtype"));
                                String opTagCoarse = ACERelationTester.getCoarseType(opTagFine);
                                Relation opdir = new Relation(opTagCoarse, firstArg, secondArg, 1.0f);
                                opdir.addAttribute("RelationSubtype", opTagFine);
                                opdir.addAttribute("RelationType", opTagCoarse);
                                relations_bi.add(r);
                                relations_bi.add(opdir);
                                break;
                            }
                        }
                        if (!found_as_source && !found_as_target) {
                            Relation newRelation_1 = new Relation("NOT_RELATED", firstArg, secondArg, 1.0f);
                            newRelation_1.addAttribute("RelationSubtype", "NOT_RELATED");
                            newRelation_1.addAttribute("RelationType", "NOT_RELATED");
                            relations_mono.add(newRelation_1);
                            Relation newRelation_2 = new Relation("NOT_RELATED", secondArg, firstArg, 1.0f);
                            newRelation_2.addAttribute("RelationSubtype", "NOT_RELATED");
                            newRelation_2.addAttribute("RelationType", "NOT_RELATED");
                            relations_bi.add(newRelation_1);
                            relations_bi.add(newRelation_2);
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.cogcomp.re.ExampleUsage.java

License:Open Source License

public static void AnnotatorExample() {
    String text = "He went to Chicago after his Father moved there.";

    String corpus = "story";
    String textId = "001";

    // Create a TextAnnotation From Text
    TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);

    POSAnnotator pos_annotator = new POSAnnotator();
    ChunkerAnnotator chunker = new ChunkerAnnotator(true);
    chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
    Properties stanfordProps = new Properties();
    stanfordProps.put("annotators", "pos, parse");
    stanfordProps.put("parse.originalDependencies", true);
    stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
    ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
    StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
    RelationAnnotator relationAnnotator = new RelationAnnotator();

    try {/*from  w  w w . j  a v  a  2 s  .  co  m*/
        ta.addView(pos_annotator);
        chunker.addView(ta);
        stanfordDepHandler.addView(ta);
        relationAnnotator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }

    View mentionView = ta.getView(ViewNames.MENTION);

    List<Constituent> predictedMentions = mentionView.getConstituents();
    List<Relation> predictedRelations = mentionView.getRelations();

    for (Relation r : predictedRelations) {
        IOHelper.printRelation(r);
    }
}

From source file:org.cogcomp.re.ExampleUsage.java

License:Open Source License

public static void SemEvalAnnotate() {
    String text = "People have been moving back into downtown.";
    String corpus = "semeval";
    String textId = "001";

    // Create a TextAnnotation From Text
    TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);

    POSAnnotator pos_annotator = new POSAnnotator();
    ChunkerAnnotator chunker = new ChunkerAnnotator(true);
    chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
    Properties stanfordProps = new Properties();
    stanfordProps.put("annotators", "pos, parse");
    stanfordProps.put("parse.originalDependencies", true);
    stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
    ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
    StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
    String modelPath = "";
    FlatGazetteers gazetteers = null;/*from   w ww. java2 s .c  om*/
    try {
        ta.addView(pos_annotator);
        chunker.addView(ta);
        stanfordDepHandler.addView(ta);
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File model = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false);
        modelPath = model.getPath();
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        gazetteers = (FlatGazetteers) GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordnet = WordNetManager.getInstance();
        View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
        for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
            Constituent c = co.cloneForNewView("RE_ANNOTATED");
            for (String s : co.getAttributeKeys()) {
                c.addAttribute(s, co.getAttribute(s));
            }
            c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, c));
            c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, c));
            annotatedTokenView.addConstituent(c);
        }
        ta.addView("RE_ANNOTATED", annotatedTokenView);
    } catch (Exception e) {
        e.printStackTrace();
    }

    Constituent source = new Constituent("first", "Mention", ta, 0, 1);
    Constituent target = new Constituent("second", "Mention", ta, 6, 7);
    source.addAttribute("GAZ", gazetteers.annotatePhrase(source));
    target.addAttribute("GAZ", gazetteers.annotatePhrase(target));
    Relation relation = new Relation("TEST", source, target, 1.0f);

    String prefix = modelPath + File.separator + "SEMEVAL" + File.separator + "SEMEVAL";
    semeval_relation_classifier classifier = new semeval_relation_classifier(prefix + ".lc", prefix + ".lex");
    String tag = classifier.discreteValue(relation);

    System.out.println(tag);
}

From source file:org.cogcomp.re.PredictedMentionReader.java

License:Open Source License

public PredictedMentionReader(String path) {
    relations = new ArrayList<>();
    try {//from  ww  w . j  a  v  a 2 s  . c o  m
        ACEReader aceReader = new ACEReader(path, false);
        POSAnnotator pos_annotator = new POSAnnotator();
        ChunkerAnnotator chunker = new ChunkerAnnotator(true);
        chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");

        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.6, false);
        Gazetteers gazetteers = GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordNet = WordNetManager.getInstance();
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
        for (TextAnnotation ta : aceReader) {
            if (ta.getId().equals("bn\\CNN_ENG_20030424_070008.15.apf.xml")) {
                continue;
            }
            ta.addView(pos_annotator);
            mentionAnnotator.addView(ta);
            stanfordDepHandler.addView(ta);
            chunker.addView(ta);

            View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
            for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
                Constituent c = co.cloneForNewView("RE_ANNOTATED");
                for (String s : co.getAttributeKeys()) {
                    c.addAttribute(s, co.getAttribute(s));
                }
                c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordNet, c));
                c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordNet, c));
                annotatedTokenView.addConstituent(c);
            }
            ta.addView("RE_ANNOTATED", annotatedTokenView);

            View goldView = ta.getView(ViewNames.MENTION_ACE);
            View predictedView = ta.getView(ViewNames.MENTION);
            Map<Constituent, Constituent> consMap = new HashMap<Constituent, Constituent>();
            for (Constituent c : goldView.getConstituents()) {
                consMap.put(c, null);
                Constituent ch = RelationFeatureExtractor.getEntityHeadForConstituent(c, ta, "");
                for (Constituent pc : predictedView.getConstituents()) {
                    Constituent pch = MentionAnnotator.getHeadConstituent(pc, "");
                    if (ch.getStartSpan() == pch.getStartSpan() && ch.getEndSpan() == pch.getEndSpan()) {
                        consMap.put(c, pc);
                        break;
                    }
                }
            }
            size_of_gold_relations += goldView.getRelations().size();
            for (int i = 0; i < ta.getNumberOfSentences(); i++) {
                Sentence curSentence = ta.getSentence(i);
                List<Constituent> in_cur_sentence = predictedView
                        .getConstituentsCoveringSpan(curSentence.getStartSpan(), curSentence.getEndSpan());
                for (int j = 0; j < in_cur_sentence.size(); j++) {
                    for (int k = j + 1; k < in_cur_sentence.size(); k++) {
                        Constituent source = in_cur_sentence.get(j);
                        Constituent target = in_cur_sentence.get(k);
                        Constituent sourceHead = MentionAnnotator.getHeadConstituent(source, "");
                        Constituent targetHead = MentionAnnotator.getHeadConstituent(target, "");
                        source.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(sourceHead));
                        target.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(targetHead));

                        boolean found_tag = false;
                        for (Relation r : goldView.getRelations()) {

                            if (consMap.get(r.getSource()) == null || consMap.get(r.getTarget()) == null) {
                                continue;
                            }

                            Constituent gsh = ACEReader.getEntityHeadForConstituent(r.getSource(), ta, "A");
                            Constituent gth = ACEReader.getEntityHeadForConstituent(r.getTarget(), ta, "A");
                            Constituent psh = MentionAnnotator.getHeadConstituent(source, "B");
                            Constituent pth = MentionAnnotator.getHeadConstituent(target, "B");

                            if (gsh.getStartSpan() == psh.getStartSpan() && gsh.getEndSpan() == psh.getEndSpan()
                                    && gth.getStartSpan() == pth.getStartSpan()
                                    && gth.getEndSpan() == pth.getEndSpan()) {
                                Relation newRelation = new Relation(r.getAttribute("RelationSubtype"), source,
                                        target, 1.0f);
                                newRelation.addAttribute("RelationType", r.getAttribute("RelationType"));
                                newRelation.addAttribute("RelationSubtype", r.getAttribute("RelationSubtype"));
                                newRelation.addAttribute("IsGoldRelation", "True");
                                relations.add(newRelation);
                                found_tag = true;
                                break;
                            }
                            if (gsh.getStartSpan() == pth.getStartSpan() && gsh.getEndSpan() == pth.getEndSpan()
                                    && gth.getStartSpan() == psh.getStartSpan()
                                    && gth.getEndSpan() == psh.getEndSpan()) {
                                Relation newRelation = new Relation(r.getAttribute("RelationSubtype"), target,
                                        source, 1.0f);
                                newRelation.addAttribute("RelationType", r.getAttribute("RelationType"));
                                newRelation.addAttribute("RelationSubtype", r.getAttribute("RelationSubtype"));
                                newRelation.addAttribute("IsGoldRelation", "True");
                                relations.add(newRelation);
                                found_tag = true;
                                break;
                            }
                        }
                        if (!found_tag) {
                            Relation newRelation = new Relation("NOT_RELATED", source, target, 1.0f);
                            newRelation.addAttribute("RelationType", "NOT_RELATED");
                            newRelation.addAttribute("RelationSubtype", "NOT_RELATED");
                            newRelation.addAttribute("IsGoldRelation", "False");
                            relations.add(newRelation);
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.cogcomp.re.SemEvalMentionReader.java

License:Open Source License

public void initExternalTools() {
    try {//from w w w . j av  a2 s. c om
        _posAnnotator = new POSAnnotator();
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        _gazetteers = (FlatGazetteers) GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        _wordnet = WordNetManager.getInstance();
        __chunker = new ChunkerAnnotator(true);
        __chunker.initialize(new ChunkerConfigurator().getDefaultConfig());

        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        __stanfordDep = new StanfordDepHandler(posAnnotator, parseAnnotator);
        __mentionAnnotator = new MentionAnnotator("ACE_TYPE");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:shef.mt.tools.ParsingProcessor.java

public ParsingProcessor(String lang, String pm, String dm, HashSet<String> requirements) {
    //Store required resources:
    this.requiresPOSTags = requirements.contains("postags");
    this.requiresDepCounts = requirements.contains("depcounts");

    //Create model path objects:
    String posModel = null;/*w  ww  .  j a  va2  s. c  o  m*/
    String depModel = null;

    //Setup model paths:
    if (pm == null) {
        if (lang.equals("english")) {
            posModel = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
        } else if (lang.equals("spanish")) {
            posModel = "edu/stanford/nlp/models/pos-tagger/spanish/spanish-distsim.tagger";
        } else if (lang.equals("chinese")) {
            posModel = "edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger";
        } else {
            posModel = "edu/stanford.nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
        }
    } else {
        posModel = pm;
    }
    if (dm == null) {
        if (lang.equals("english")) {
            depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
        } else if (lang.equals("spanish")) {
            depModel = "edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz";
        } else if (lang.equals("chinese")) {
            depModel = "edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz";
        } else {
            depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
        }
    } else {
        depModel = dm;
    }

    //Create base properties:
    Properties props = new Properties();
    if (this.requiresDepCounts) {
        props.setProperty("annotators", "tokenize, ssplit, pos, parse");
    } else {
        props.setProperty("annotators", "tokenize, ssplit, pos");
    }

    //Create base pipeline:
    pipeline = new StanfordCoreNLP(props);

    try {
        //Create pipeline object:
        tokenizer = new TokenizerAnnotator(true, TokenizerAnnotator.TokenizerType.Whitespace);

        //Add objects to the pipeline:
        pipeline.addAnnotator(tokenizer);
    } catch (Exception ex) {
        System.out.println("ERROR: Problem while creating Stanford tokenizer.");
    }

    try {
        //Create pipeline object:
        tagger = new POSTaggerAnnotator(posModel, false);

        //Add object to the pipeline:
        pipeline.addAnnotator(tagger);
    } catch (Exception ex) {
        System.out.println(
                "ERROR: Problem while creating Stanford POS tagger. Please review the model paths and check for library availability.");
    }

    //If dependency counts are required:
    if (this.requiresDepCounts) {
        try {
            //Create pipeline object:
            parser = new ParserAnnotator(depModel, false, 300, StringUtils.EMPTY_STRING_ARRAY);

            //Add object to the pipeline:
            pipeline.addAnnotator(parser);
        } catch (Exception ex) {
            System.out.println(
                    "ERROR: Problem while creating Stanford dependency parser. Please review the model paths and check for library availability.");
        }
    }
}