Example usage for edu.stanford.nlp.pipeline ParserAnnotator ParserAnnotator

List of usage examples for edu.stanford.nlp.pipeline ParserAnnotator ParserAnnotator

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline ParserAnnotator ParserAnnotator.

Prototype

public ParserAnnotator(String annotatorName, Properties props) 

Source Link

Usage

From source file:edu.illinois.cs.cogcomp.comma.annotators.PreProcessor.java

License:Open Source License

public PreProcessor() throws Exception {
    System.out.println("initializing 1 ");
    // Initialise AnnotatorServices with default configurations
    Map<String, String> nonDefaultValues = new HashMap<>();
    if (CommaProperties.getInstance().useCurator()) {
        nonDefaultValues.put(CuratorConfigurator.RESPECT_TOKENIZATION.key, Configurator.TRUE);
        nonDefaultValues.put(CuratorConfigurator.CURATOR_FORCE_UPDATE.key, Configurator.FALSE);
        ResourceManager curatorConfig = (new CuratorConfigurator()).getConfig(nonDefaultValues);
        annotatorService = CuratorFactory.buildCuratorClient(curatorConfig);
    } else {//  w  w  w  .  j av  a2  s .  c o m
        ResourceManager rm = new Stanford331Configurator().getDefaultConfig();
        String timePerSentence = Stanford331Configurator.STFRD_TIME_PER_SENTENCE.value;
        String maxParseSentenceLength = Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH.value;
        boolean throwExceptionOnSentenceLengthCheck = rm
                .getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);

        System.out.println("initializing 2 ");

        this.pos = new POSAnnotator();
        this.nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
        this.shallowParser = new ChunkerAnnotator();

        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", maxParseSentenceLength);
        stanfordProps.put("parse.maxtime", timePerSentence);
        // per sentence? could be per
        // document but no idea from
        // stanford javadoc
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        int maxLength = Integer.parseInt(maxParseSentenceLength);
        this.parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength,
                throwExceptionOnSentenceLengthCheck);
    }
}

From source file:edu.illinois.cs.cogcomp.pipeline.main.PipelineFactory.java

License:Open Source License

/**
 * instantiate a set of annotators for use in an AnnotatorService object by default, will use
 * lazy initialization where possible -- change this behavior with the
 * {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property.
 * /*from ww  w.  java2s. co  m*/
 * @param nonDefaultRm ResourceManager with all non-default values for Annotators
 * @return a Map from annotator view name to annotator
 */
private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException {
    ResourceManager rm = new PipelineConfigurator()
            .getConfig(new Stanford331Configurator().getConfig(nonDefaultRm));
    String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key,
            PipelineConfigurator.TRUE);

    Map<String, Annotator> viewGenerators = new HashMap<>();

    if (rm.getBoolean(PipelineConfigurator.USE_POS)) {
        POSAnnotator pos = new POSAnnotator();
        viewGenerators.put(pos.getViewName(), pos);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) {
        IllinoisLemmatizer lem = new IllinoisLemmatizer(rm);
        viewGenerators.put(lem.getViewName(), lem);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) {
        viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator());
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) {
        NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
        viewGenerators.put(nerConll.getViewName(), nerConll);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) {
        NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES);
        viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DEP)) {
        DepAnnotator dep = new DepAnnotator();
        viewGenerators.put(dep.getViewName(), dep);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)
            || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", maxParseSentenceLength);
        stanfordProps.put("parse.maxtime", timePerSentence); // per sentence? could be per
                                                             // document but no idea from
                                                             // stanford javadoc
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        int maxLength = Integer.parseInt(maxParseSentenceLength);
        boolean throwExceptionOnSentenceLengthCheck = rm
                .getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key);

        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) {
            StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength,
                    throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(depParser.getViewName(), depParser);
        }
        if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) {
            StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength,
                    throwExceptionOnSentenceLengthCheck);
            viewGenerators.put(parser.getViewName(), parser);
        }
    }

    if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) {
        Properties verbProps = new Properties();
        String verbType = SRLType.Verb.name();
        verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType);
        ResourceManager verbRm = new ResourceManager(verbProps);
        rm = Configurator.mergeProperties(rm, verbRm);
        try {
            SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            viewGenerators.put(ViewNames.SRL_VERB, verbSrl);
        } catch (Exception e) {
            throw new IOException("SRL verb cannot init: " + e.getMessage());
        }
    }
    if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) {
        Properties nomProps = new Properties();
        String nomType = SRLType.Nom.name();
        nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType);
        ResourceManager nomRm = new ResourceManager(nomProps);
        rm = Configurator.mergeProperties(rm, nomRm);

        try {
            SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization);
            // note that you can't call nomSrl (or verbSrl).getViewName() as it may not be
            // initialized yet
            viewGenerators.put(ViewNames.SRL_NOM, nomSrl);
            // viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType,
            // ViewNames.SRL_NOM,
            // useLazyInitialization, rm));
        } catch (Exception e) {
            throw new IOException("SRL nom cannot init .." + e.getMessage());
        }
    }

    if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) {
        Quantifier quantifierAnnotator = new Quantifier();
        viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator);
    }

    if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) {
        for (Language lang : TransliterationAnnotator.supportedLanguages) {
            TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang);
            viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator);
        }
    }

    if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) {
        PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator();
        viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator);
    }

    if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) {
        CommaLabeler commaLabeler = new CommaLabeler();
        viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler);
    }

    if (rm.getBoolean(PipelineConfigurator.USE_VERB_SENSE)) {
        VerbSenseAnnotator verbSense = new VerbSenseAnnotator();
        viewGenerators.put(ViewNames.VERB_SENSE, verbSense);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_MENTION)) {
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
        viewGenerators.put(ViewNames.MENTION, mentionAnnotator);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_RELATION)) {
        viewGenerators.put(ViewNames.RELATION, new RelationAnnotator(true));
    }
    if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)) {
        Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
        TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps));
        viewGenerators.put(ViewNames.TIMEX3, tca);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_ESA)) {
        rm = new ESADatalessConfigurator().getConfig(nonDefaultRm);
        ESADatalessAnnotator esaDataless = new ESADatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_ESA, esaDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_W2V)) {
        rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm);
        W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm);
        viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless);
    }
    if (rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) {
        QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator();
        viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper);
    }

    return viewGenerators;
}

From source file:org.cogcomp.re.ACEMentionReader.java

License:Open Source License

public ACEMentionReader(String file, String type) {
    readType = type;/*w  w  w.  j av  a2 s.  co  m*/
    relations_mono = new ArrayList<>();
    relations_bi = new ArrayList<>();

    try {
        ACEReader reader = new ACEReaderWithTrueCaseFixer(file, new String[] { "bn", "nw" }, false);
        POSAnnotator pos_annotator = new POSAnnotator();
        ChunkerAnnotator chunker = new ChunkerAnnotator(true);
        chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.6, false);
        Gazetteers gazetteers = GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordNet = WordNetManager.getInstance();
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);

        for (TextAnnotation ta : reader) {
            if (ta.getId().equals("bn\\CNN_ENG_20030424_070008.15.apf.xml")) {
                continue;
            }
            ta.addView(pos_annotator);
            stanfordDepHandler.addView(ta);
            chunker.addView(ta);

            View entityView = ta.getView(ViewNames.MENTION_ACE);
            View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
            for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
                Constituent c = co.cloneForNewView("RE_ANNOTATED");
                for (String s : co.getAttributeKeys()) {
                    c.addAttribute(s, co.getAttribute(s));
                }
                c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordNet, c));
                c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordNet, c));
                annotatedTokenView.addConstituent(c);
            }
            ta.addView("RE_ANNOTATED", annotatedTokenView);

            List<Relation> existRelations = entityView.getRelations();
            for (int i = 0; i < ta.getNumberOfSentences(); i++) {
                Sentence curSentence = ta.getSentence(i);
                List<Constituent> cins = entityView.getConstituentsCoveringSpan(curSentence.getStartSpan(),
                        curSentence.getEndSpan());
                for (int j = 0; j < cins.size(); j++) {
                    for (int k = j + 1; k < cins.size(); k++) {
                        Constituent firstArg = cins.get(j);
                        Constituent secondArg = cins.get(k);
                        Constituent firstArgHead = RelationFeatureExtractor
                                .getEntityHeadForConstituent(firstArg, firstArg.getTextAnnotation(), "A");
                        Constituent secondArgHead = RelationFeatureExtractor
                                .getEntityHeadForConstituent(secondArg, secondArg.getTextAnnotation(), "A");
                        firstArg.addAttribute("GAZ",
                                ((FlatGazetteers) gazetteers).annotatePhrase(firstArgHead));
                        secondArg.addAttribute("GAZ",
                                ((FlatGazetteers) gazetteers).annotatePhrase(secondArgHead));

                        boolean found_as_source = false;
                        boolean found_as_target = false;
                        for (Relation r : existRelations) {
                            if (r.getSource().getStartSpan() == firstArg.getStartSpan()
                                    && r.getSource().getEndSpan() == firstArg.getEndSpan()
                                    && r.getTarget().getStartSpan() == secondArg.getStartSpan()
                                    && r.getTarget().getEndSpan() == secondArg.getEndSpan()) {
                                relations_mono.add(r);
                                found_as_source = true;
                                String opTagFine = getOppoName(r.getAttribute("RelationSubtype"));
                                String opTagCoarse = ACERelationTester.getCoarseType(opTagFine);
                                Relation opdir = new Relation(opTagCoarse, secondArg, firstArg, 1.0f);
                                opdir.addAttribute("RelationSubtype", opTagFine);
                                opdir.addAttribute("RelationType", opTagCoarse);
                                relations_bi.add(r);
                                relations_bi.add(opdir);
                                break;
                            }
                            if (r.getTarget().getStartSpan() == firstArg.getStartSpan()
                                    && r.getTarget().getEndSpan() == firstArg.getEndSpan()
                                    && r.getSource().getStartSpan() == secondArg.getStartSpan()
                                    && r.getSource().getEndSpan() == secondArg.getEndSpan()) {
                                relations_mono.add(r);
                                found_as_target = true;
                                String opTagFine = getOppoName(r.getAttribute("RelationSubtype"));
                                String opTagCoarse = ACERelationTester.getCoarseType(opTagFine);
                                Relation opdir = new Relation(opTagCoarse, firstArg, secondArg, 1.0f);
                                opdir.addAttribute("RelationSubtype", opTagFine);
                                opdir.addAttribute("RelationType", opTagCoarse);
                                relations_bi.add(r);
                                relations_bi.add(opdir);
                                break;
                            }
                        }
                        if (!found_as_source && !found_as_target) {
                            Relation newRelation_1 = new Relation("NOT_RELATED", firstArg, secondArg, 1.0f);
                            newRelation_1.addAttribute("RelationSubtype", "NOT_RELATED");
                            newRelation_1.addAttribute("RelationType", "NOT_RELATED");
                            relations_mono.add(newRelation_1);
                            Relation newRelation_2 = new Relation("NOT_RELATED", secondArg, firstArg, 1.0f);
                            newRelation_2.addAttribute("RelationSubtype", "NOT_RELATED");
                            newRelation_2.addAttribute("RelationType", "NOT_RELATED");
                            relations_bi.add(newRelation_1);
                            relations_bi.add(newRelation_2);
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.cogcomp.re.ExampleUsage.java

License:Open Source License

public static void AnnotatorExample() {
    String text = "He went to Chicago after his Father moved there.";

    String corpus = "story";
    String textId = "001";

    // Create a TextAnnotation From Text
    TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);

    POSAnnotator pos_annotator = new POSAnnotator();
    ChunkerAnnotator chunker = new ChunkerAnnotator(true);
    chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
    Properties stanfordProps = new Properties();
    stanfordProps.put("annotators", "pos, parse");
    stanfordProps.put("parse.originalDependencies", true);
    stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
    ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
    StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
    RelationAnnotator relationAnnotator = new RelationAnnotator();

    try {// www.  ja v  a 2  s. co  m
        ta.addView(pos_annotator);
        chunker.addView(ta);
        stanfordDepHandler.addView(ta);
        relationAnnotator.addView(ta);
    } catch (Exception e) {
        e.printStackTrace();
    }

    View mentionView = ta.getView(ViewNames.MENTION);

    List<Constituent> predictedMentions = mentionView.getConstituents();
    List<Relation> predictedRelations = mentionView.getRelations();

    for (Relation r : predictedRelations) {
        IOHelper.printRelation(r);
    }
}

From source file:org.cogcomp.re.ExampleUsage.java

License:Open Source License

public static void SemEvalAnnotate() {
    String text = "People have been moving back into downtown.";
    String corpus = "semeval";
    String textId = "001";

    // Create a TextAnnotation From Text
    TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
    TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);

    POSAnnotator pos_annotator = new POSAnnotator();
    ChunkerAnnotator chunker = new ChunkerAnnotator(true);
    chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
    Properties stanfordProps = new Properties();
    stanfordProps.put("annotators", "pos, parse");
    stanfordProps.put("parse.originalDependencies", true);
    stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
    stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
    POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
    ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
    StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
    String modelPath = "";
    FlatGazetteers gazetteers = null;//w ww  .ja  v  a  2  s. co m
    try {
        ta.addView(pos_annotator);
        chunker.addView(ta);
        stanfordDepHandler.addView(ta);
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File model = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false);
        modelPath = model.getPath();
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        gazetteers = (FlatGazetteers) GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordnet = WordNetManager.getInstance();
        View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
        for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
            Constituent c = co.cloneForNewView("RE_ANNOTATED");
            for (String s : co.getAttributeKeys()) {
                c.addAttribute(s, co.getAttribute(s));
            }
            c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, c));
            c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, c));
            annotatedTokenView.addConstituent(c);
        }
        ta.addView("RE_ANNOTATED", annotatedTokenView);
    } catch (Exception e) {
        e.printStackTrace();
    }

    Constituent source = new Constituent("first", "Mention", ta, 0, 1);
    Constituent target = new Constituent("second", "Mention", ta, 6, 7);
    source.addAttribute("GAZ", gazetteers.annotatePhrase(source));
    target.addAttribute("GAZ", gazetteers.annotatePhrase(target));
    Relation relation = new Relation("TEST", source, target, 1.0f);

    String prefix = modelPath + File.separator + "SEMEVAL" + File.separator + "SEMEVAL";
    semeval_relation_classifier classifier = new semeval_relation_classifier(prefix + ".lc", prefix + ".lex");
    String tag = classifier.discreteValue(relation);

    System.out.println(tag);
}

From source file:org.cogcomp.re.PredictedMentionReader.java

License:Open Source License

public PredictedMentionReader(String path) {
    relations = new ArrayList<>();
    try {/*ww w  . j av  a 2 s.c om*/
        ACEReader aceReader = new ACEReader(path, false);
        POSAnnotator pos_annotator = new POSAnnotator();
        ChunkerAnnotator chunker = new ChunkerAnnotator(true);
        chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
        MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");

        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.6, false);
        Gazetteers gazetteers = GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        WordNetManager wordNet = WordNetManager.getInstance();
        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
        for (TextAnnotation ta : aceReader) {
            if (ta.getId().equals("bn\\CNN_ENG_20030424_070008.15.apf.xml")) {
                continue;
            }
            ta.addView(pos_annotator);
            mentionAnnotator.addView(ta);
            stanfordDepHandler.addView(ta);
            chunker.addView(ta);

            View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta);
            for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) {
                Constituent c = co.cloneForNewView("RE_ANNOTATED");
                for (String s : co.getAttributeKeys()) {
                    c.addAttribute(s, co.getAttribute(s));
                }
                c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordNet, c));
                c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordNet, c));
                annotatedTokenView.addConstituent(c);
            }
            ta.addView("RE_ANNOTATED", annotatedTokenView);

            View goldView = ta.getView(ViewNames.MENTION_ACE);
            View predictedView = ta.getView(ViewNames.MENTION);
            Map<Constituent, Constituent> consMap = new HashMap<Constituent, Constituent>();
            for (Constituent c : goldView.getConstituents()) {
                consMap.put(c, null);
                Constituent ch = RelationFeatureExtractor.getEntityHeadForConstituent(c, ta, "");
                for (Constituent pc : predictedView.getConstituents()) {
                    Constituent pch = MentionAnnotator.getHeadConstituent(pc, "");
                    if (ch.getStartSpan() == pch.getStartSpan() && ch.getEndSpan() == pch.getEndSpan()) {
                        consMap.put(c, pc);
                        break;
                    }
                }
            }
            size_of_gold_relations += goldView.getRelations().size();
            for (int i = 0; i < ta.getNumberOfSentences(); i++) {
                Sentence curSentence = ta.getSentence(i);
                List<Constituent> in_cur_sentence = predictedView
                        .getConstituentsCoveringSpan(curSentence.getStartSpan(), curSentence.getEndSpan());
                for (int j = 0; j < in_cur_sentence.size(); j++) {
                    for (int k = j + 1; k < in_cur_sentence.size(); k++) {
                        Constituent source = in_cur_sentence.get(j);
                        Constituent target = in_cur_sentence.get(k);
                        Constituent sourceHead = MentionAnnotator.getHeadConstituent(source, "");
                        Constituent targetHead = MentionAnnotator.getHeadConstituent(target, "");
                        source.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(sourceHead));
                        target.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(targetHead));

                        boolean found_tag = false;
                        for (Relation r : goldView.getRelations()) {

                            if (consMap.get(r.getSource()) == null || consMap.get(r.getTarget()) == null) {
                                continue;
                            }

                            Constituent gsh = ACEReader.getEntityHeadForConstituent(r.getSource(), ta, "A");
                            Constituent gth = ACEReader.getEntityHeadForConstituent(r.getTarget(), ta, "A");
                            Constituent psh = MentionAnnotator.getHeadConstituent(source, "B");
                            Constituent pth = MentionAnnotator.getHeadConstituent(target, "B");

                            if (gsh.getStartSpan() == psh.getStartSpan() && gsh.getEndSpan() == psh.getEndSpan()
                                    && gth.getStartSpan() == pth.getStartSpan()
                                    && gth.getEndSpan() == pth.getEndSpan()) {
                                Relation newRelation = new Relation(r.getAttribute("RelationSubtype"), source,
                                        target, 1.0f);
                                newRelation.addAttribute("RelationType", r.getAttribute("RelationType"));
                                newRelation.addAttribute("RelationSubtype", r.getAttribute("RelationSubtype"));
                                newRelation.addAttribute("IsGoldRelation", "True");
                                relations.add(newRelation);
                                found_tag = true;
                                break;
                            }
                            if (gsh.getStartSpan() == pth.getStartSpan() && gsh.getEndSpan() == pth.getEndSpan()
                                    && gth.getStartSpan() == psh.getStartSpan()
                                    && gth.getEndSpan() == psh.getEndSpan()) {
                                Relation newRelation = new Relation(r.getAttribute("RelationSubtype"), target,
                                        source, 1.0f);
                                newRelation.addAttribute("RelationType", r.getAttribute("RelationType"));
                                newRelation.addAttribute("RelationSubtype", r.getAttribute("RelationSubtype"));
                                newRelation.addAttribute("IsGoldRelation", "True");
                                relations.add(newRelation);
                                found_tag = true;
                                break;
                            }
                        }
                        if (!found_tag) {
                            Relation newRelation = new Relation("NOT_RELATED", source, target, 1.0f);
                            newRelation.addAttribute("RelationType", "NOT_RELATED");
                            newRelation.addAttribute("RelationSubtype", "NOT_RELATED");
                            newRelation.addAttribute("IsGoldRelation", "False");
                            relations.add(newRelation);
                        }
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.cogcomp.re.SemEvalMentionReader.java

License:Open Source License

public void initExternalTools() {
    try {//from  w ww. j  av a  2 s . c om
        _posAnnotator = new POSAnnotator();
        Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig());
        File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false);
        _gazetteers = (FlatGazetteers) GazetteersFactory.get(5,
                gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English);
        WordNetManager.loadConfigAsClasspathResource(true);
        _wordnet = WordNetManager.getInstance();
        __chunker = new ChunkerAnnotator(true);
        __chunker.initialize(new ChunkerConfigurator().getDefaultConfig());

        Properties stanfordProps = new Properties();
        stanfordProps.put("annotators", "pos, parse");
        stanfordProps.put("parse.originalDependencies", true);
        stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
        stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
        POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
        ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
        __stanfordDep = new StanfordDepHandler(posAnnotator, parseAnnotator);
        __mentionAnnotator = new MentionAnnotator("ACE_TYPE");
    } catch (Exception e) {
        e.printStackTrace();
    }
}