List of usage examples for edu.stanford.nlp.pipeline POSTaggerAnnotator POSTaggerAnnotator
public POSTaggerAnnotator(String annotatorName, Properties props)
From source file:edu.illinois.cs.cogcomp.comma.annotators.PreProcessor.java
License:Open Source License
public PreProcessor() throws Exception { System.out.println("initializing 1 "); // Initialise AnnotatorServices with default configurations Map<String, String> nonDefaultValues = new HashMap<>(); if (CommaProperties.getInstance().useCurator()) { nonDefaultValues.put(CuratorConfigurator.RESPECT_TOKENIZATION.key, Configurator.TRUE); nonDefaultValues.put(CuratorConfigurator.CURATOR_FORCE_UPDATE.key, Configurator.FALSE); ResourceManager curatorConfig = (new CuratorConfigurator()).getConfig(nonDefaultValues); annotatorService = CuratorFactory.buildCuratorClient(curatorConfig); } else {/*from ww w. j a va 2 s . c o m*/ ResourceManager rm = new Stanford331Configurator().getDefaultConfig(); String timePerSentence = Stanford331Configurator.STFRD_TIME_PER_SENTENCE.value; String maxParseSentenceLength = Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH.value; boolean throwExceptionOnSentenceLengthCheck = rm .getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key); System.out.println("initializing 2 "); this.pos = new POSAnnotator(); this.nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL); this.shallowParser = new ChunkerAnnotator(); Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", maxParseSentenceLength); stanfordProps.put("parse.maxtime", timePerSentence); // per sentence? could be per // document but no idea from // stanford javadoc POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); int maxLength = Integer.parseInt(maxParseSentenceLength); this.parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck); } }
From source file:edu.illinois.cs.cogcomp.pipeline.main.PipelineFactory.java
License:Open Source License
/** * instantiate a set of annotators for use in an AnnotatorService object by default, will use * lazy initialization where possible -- change this behavior with the * {@link PipelineConfigurator#USE_LAZY_INITIALIZATION} property. * /*from w w w.j av a 2s. co m*/ * @param nonDefaultRm ResourceManager with all non-default values for Annotators * @return a Map from annotator view name to annotator */ private static Map<String, Annotator> buildAnnotators(ResourceManager nonDefaultRm) throws IOException { ResourceManager rm = new PipelineConfigurator() .getConfig(new Stanford331Configurator().getConfig(nonDefaultRm)); String timePerSentence = rm.getString(Stanford331Configurator.STFRD_TIME_PER_SENTENCE); String maxParseSentenceLength = rm.getString(Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); boolean useLazyInitialization = rm.getBoolean(PipelineConfigurator.USE_LAZY_INITIALIZATION.key, PipelineConfigurator.TRUE); Map<String, Annotator> viewGenerators = new HashMap<>(); if (rm.getBoolean(PipelineConfigurator.USE_POS)) { POSAnnotator pos = new POSAnnotator(); viewGenerators.put(pos.getViewName(), pos); } if (rm.getBoolean(PipelineConfigurator.USE_LEMMA)) { IllinoisLemmatizer lem = new IllinoisLemmatizer(rm); viewGenerators.put(lem.getViewName(), lem); } if (rm.getBoolean(PipelineConfigurator.USE_SHALLOW_PARSE)) { viewGenerators.put(ViewNames.SHALLOW_PARSE, new ChunkerAnnotator()); } if (rm.getBoolean(PipelineConfigurator.USE_NER_CONLL)) { NERAnnotator nerConll = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL); viewGenerators.put(nerConll.getViewName(), nerConll); } if (rm.getBoolean(PipelineConfigurator.USE_NER_ONTONOTES)) { NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_ONTONOTES); viewGenerators.put(nerOntonotes.getViewName(), nerOntonotes); } if (rm.getBoolean(PipelineConfigurator.USE_DEP)) { DepAnnotator dep = new DepAnnotator(); viewGenerators.put(dep.getViewName(), dep); } if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP) || rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) { Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", maxParseSentenceLength); stanfordProps.put("parse.maxtime", timePerSentence); // per sentence? could be per // document but no idea from // stanford javadoc POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); int maxLength = Integer.parseInt(maxParseSentenceLength); boolean throwExceptionOnSentenceLengthCheck = rm .getBoolean(Stanford331Configurator.THROW_EXCEPTION_ON_FAILED_LENGTH_CHECK.key); if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_DEP)) { StanfordDepHandler depParser = new StanfordDepHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck); viewGenerators.put(depParser.getViewName(), depParser); } if (rm.getBoolean(PipelineConfigurator.USE_STANFORD_PARSE)) { StanfordParseHandler parser = new StanfordParseHandler(posAnnotator, parseAnnotator, maxLength, throwExceptionOnSentenceLengthCheck); viewGenerators.put(parser.getViewName(), parser); } } if (rm.getBoolean(PipelineConfigurator.USE_SRL_VERB)) { Properties verbProps = new Properties(); String verbType = SRLType.Verb.name(); verbProps.setProperty(SrlConfigurator.SRL_TYPE.key, verbType); ResourceManager verbRm = new ResourceManager(verbProps); rm = Configurator.mergeProperties(rm, verbRm); try { SemanticRoleLabeler verbSrl = new SemanticRoleLabeler(rm, useLazyInitialization); viewGenerators.put(ViewNames.SRL_VERB, verbSrl); } catch (Exception e) { throw new IOException("SRL verb cannot init: " + e.getMessage()); } } if (rm.getBoolean(PipelineConfigurator.USE_SRL_NOM)) { Properties nomProps = new Properties(); String nomType = SRLType.Nom.name(); nomProps.setProperty(SrlConfigurator.SRL_TYPE.key, nomType); ResourceManager nomRm = new ResourceManager(nomProps); rm = Configurator.mergeProperties(rm, nomRm); try { SemanticRoleLabeler nomSrl = new SemanticRoleLabeler(rm, useLazyInitialization); // note that you can't call nomSrl (or verbSrl).getViewName() as it may not be // initialized yet viewGenerators.put(ViewNames.SRL_NOM, nomSrl); // viewGenerators.put(ViewNames.SRL_NOM,new SrlHandler("NomSRL", "5.1.9", nomType, // ViewNames.SRL_NOM, // useLazyInitialization, rm)); } catch (Exception e) { throw new IOException("SRL nom cannot init .." + e.getMessage()); } } if (rm.getBoolean(PipelineConfigurator.USE_QUANTIFIER)) { Quantifier quantifierAnnotator = new Quantifier(); viewGenerators.put(ViewNames.QUANTITIES, quantifierAnnotator); } if (rm.getBoolean(PipelineConfigurator.USE_TRANSLITERATION)) { for (Language lang : TransliterationAnnotator.supportedLanguages) { TransliterationAnnotator transliterationAnnotator = new TransliterationAnnotator(true, lang); viewGenerators.put(ViewNames.TRANSLITERATION + "_" + lang.getCode(), transliterationAnnotator); } } if (rm.getBoolean(PipelineConfigurator.USE_SRL_PREP)) { PrepSRLAnnotator prepSRLAnnotator = new PrepSRLAnnotator(); viewGenerators.put(ViewNames.SRL_PREP, prepSRLAnnotator); } if (rm.getBoolean(PipelineConfigurator.USE_SRL_COMMA)) { CommaLabeler commaLabeler = new CommaLabeler(); viewGenerators.put(ViewNames.SRL_COMMA, commaLabeler); } if (rm.getBoolean(PipelineConfigurator.USE_VERB_SENSE)) { VerbSenseAnnotator verbSense = new VerbSenseAnnotator(); viewGenerators.put(ViewNames.VERB_SENSE, verbSense); } if (rm.getBoolean(PipelineConfigurator.USE_MENTION)) { MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE"); viewGenerators.put(ViewNames.MENTION, mentionAnnotator); } if (rm.getBoolean(PipelineConfigurator.USE_RELATION)) { viewGenerators.put(ViewNames.RELATION, new RelationAnnotator(true)); } if (rm.getBoolean(PipelineConfigurator.USE_TIMEX3)) { Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties(); TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps)); viewGenerators.put(ViewNames.TIMEX3, tca); } if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_ESA)) { rm = new ESADatalessConfigurator().getConfig(nonDefaultRm); ESADatalessAnnotator esaDataless = new ESADatalessAnnotator(rm); viewGenerators.put(ViewNames.DATALESS_ESA, esaDataless); } if (rm.getBoolean(PipelineConfigurator.USE_DATALESS_W2V)) { rm = new W2VDatalessConfigurator().getConfig(nonDefaultRm); W2VDatalessAnnotator w2vDataless = new W2VDatalessAnnotator(rm); viewGenerators.put(ViewNames.DATALESS_W2V, w2vDataless); } if (rm.getBoolean(PipelineConfigurator.USE_QUESTION_TYPER)) { QuestionTypeAnnotator questionTyper = new QuestionTypeAnnotator(); viewGenerators.put(ViewNames.QUESTION_TYPE, questionTyper); } return viewGenerators; }
From source file:org.cogcomp.re.ACEMentionReader.java
License:Open Source License
public ACEMentionReader(String file, String type) { readType = type;/*from w w w .ja v a 2s .co m*/ relations_mono = new ArrayList<>(); relations_bi = new ArrayList<>(); try { ACEReader reader = new ACEReaderWithTrueCaseFixer(file, new String[] { "bn", "nw" }, false); POSAnnotator pos_annotator = new POSAnnotator(); ChunkerAnnotator chunker = new ChunkerAnnotator(true); chunker.initialize(new ChunkerConfigurator().getDefaultConfig()); Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig()); File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.6, false); Gazetteers gazetteers = GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English); WordNetManager.loadConfigAsClasspathResource(true); WordNetManager wordNet = WordNetManager.getInstance(); Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator); for (TextAnnotation ta : reader) { if (ta.getId().equals("bn\\CNN_ENG_20030424_070008.15.apf.xml")) { continue; } ta.addView(pos_annotator); stanfordDepHandler.addView(ta); chunker.addView(ta); View entityView = ta.getView(ViewNames.MENTION_ACE); View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta); for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) { Constituent c = co.cloneForNewView("RE_ANNOTATED"); for (String s : co.getAttributeKeys()) { c.addAttribute(s, co.getAttribute(s)); } c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordNet, c)); c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordNet, c)); annotatedTokenView.addConstituent(c); } ta.addView("RE_ANNOTATED", annotatedTokenView); List<Relation> existRelations = entityView.getRelations(); for (int i = 0; i < ta.getNumberOfSentences(); i++) { Sentence curSentence = ta.getSentence(i); List<Constituent> cins = entityView.getConstituentsCoveringSpan(curSentence.getStartSpan(), curSentence.getEndSpan()); for (int j = 0; j < cins.size(); j++) { for (int k = j + 1; k < cins.size(); k++) { Constituent firstArg = cins.get(j); Constituent secondArg = cins.get(k); Constituent firstArgHead = RelationFeatureExtractor .getEntityHeadForConstituent(firstArg, firstArg.getTextAnnotation(), "A"); Constituent secondArgHead = RelationFeatureExtractor .getEntityHeadForConstituent(secondArg, secondArg.getTextAnnotation(), "A"); firstArg.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(firstArgHead)); secondArg.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(secondArgHead)); boolean found_as_source = false; boolean found_as_target = false; for (Relation r : existRelations) { if (r.getSource().getStartSpan() == firstArg.getStartSpan() && r.getSource().getEndSpan() == firstArg.getEndSpan() && r.getTarget().getStartSpan() == secondArg.getStartSpan() && r.getTarget().getEndSpan() == secondArg.getEndSpan()) { relations_mono.add(r); found_as_source = true; String opTagFine = getOppoName(r.getAttribute("RelationSubtype")); String opTagCoarse = ACERelationTester.getCoarseType(opTagFine); Relation opdir = new Relation(opTagCoarse, secondArg, firstArg, 1.0f); opdir.addAttribute("RelationSubtype", opTagFine); opdir.addAttribute("RelationType", opTagCoarse); relations_bi.add(r); relations_bi.add(opdir); break; } if (r.getTarget().getStartSpan() == firstArg.getStartSpan() && r.getTarget().getEndSpan() == firstArg.getEndSpan() && r.getSource().getStartSpan() == secondArg.getStartSpan() && r.getSource().getEndSpan() == secondArg.getEndSpan()) { relations_mono.add(r); found_as_target = true; String opTagFine = getOppoName(r.getAttribute("RelationSubtype")); String opTagCoarse = ACERelationTester.getCoarseType(opTagFine); Relation opdir = new Relation(opTagCoarse, firstArg, secondArg, 1.0f); opdir.addAttribute("RelationSubtype", opTagFine); opdir.addAttribute("RelationType", opTagCoarse); relations_bi.add(r); relations_bi.add(opdir); break; } } if (!found_as_source && !found_as_target) { Relation newRelation_1 = new Relation("NOT_RELATED", firstArg, secondArg, 1.0f); newRelation_1.addAttribute("RelationSubtype", "NOT_RELATED"); newRelation_1.addAttribute("RelationType", "NOT_RELATED"); relations_mono.add(newRelation_1); Relation newRelation_2 = new Relation("NOT_RELATED", secondArg, firstArg, 1.0f); newRelation_2.addAttribute("RelationSubtype", "NOT_RELATED"); newRelation_2.addAttribute("RelationType", "NOT_RELATED"); relations_bi.add(newRelation_1); relations_bi.add(newRelation_2); } } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.cogcomp.re.ExampleUsage.java
License:Open Source License
public static void AnnotatorExample() { String text = "He went to Chicago after his Father moved there."; String corpus = "story"; String textId = "001"; // Create a TextAnnotation From Text TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text); POSAnnotator pos_annotator = new POSAnnotator(); ChunkerAnnotator chunker = new ChunkerAnnotator(true); chunker.initialize(new ChunkerConfigurator().getDefaultConfig()); Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator); RelationAnnotator relationAnnotator = new RelationAnnotator(); try {/*from w w w . j a v a 2 s . co m*/ ta.addView(pos_annotator); chunker.addView(ta); stanfordDepHandler.addView(ta); relationAnnotator.addView(ta); } catch (Exception e) { e.printStackTrace(); } View mentionView = ta.getView(ViewNames.MENTION); List<Constituent> predictedMentions = mentionView.getConstituents(); List<Relation> predictedRelations = mentionView.getRelations(); for (Relation r : predictedRelations) { IOHelper.printRelation(r); } }
From source file:org.cogcomp.re.ExampleUsage.java
License:Open Source License
public static void SemEvalAnnotate() { String text = "People have been moving back into downtown."; String corpus = "semeval"; String textId = "001"; // Create a TextAnnotation From Text TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer()); TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text); POSAnnotator pos_annotator = new POSAnnotator(); ChunkerAnnotator chunker = new ChunkerAnnotator(true); chunker.initialize(new ChunkerConfigurator().getDefaultConfig()); Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator); String modelPath = ""; FlatGazetteers gazetteers = null;/*from w ww. java2 s .c om*/ try { ta.addView(pos_annotator); chunker.addView(ta); stanfordDepHandler.addView(ta); Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig()); File model = ds.getDirectory("org.cogcomp.re", "SEMEVAL", 1.1, false); modelPath = model.getPath(); File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false); gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English); WordNetManager.loadConfigAsClasspathResource(true); WordNetManager wordnet = WordNetManager.getInstance(); View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta); for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) { Constituent c = co.cloneForNewView("RE_ANNOTATED"); for (String s : co.getAttributeKeys()) { c.addAttribute(s, co.getAttribute(s)); } c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordnet, c)); c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordnet, c)); annotatedTokenView.addConstituent(c); } ta.addView("RE_ANNOTATED", annotatedTokenView); } catch (Exception e) { e.printStackTrace(); } Constituent source = new Constituent("first", "Mention", ta, 0, 1); Constituent target = new Constituent("second", "Mention", ta, 6, 7); source.addAttribute("GAZ", gazetteers.annotatePhrase(source)); target.addAttribute("GAZ", gazetteers.annotatePhrase(target)); Relation relation = new Relation("TEST", source, target, 1.0f); String prefix = modelPath + File.separator + "SEMEVAL" + File.separator + "SEMEVAL"; semeval_relation_classifier classifier = new semeval_relation_classifier(prefix + ".lc", prefix + ".lex"); String tag = classifier.discreteValue(relation); System.out.println(tag); }
From source file:org.cogcomp.re.PredictedMentionReader.java
License:Open Source License
public PredictedMentionReader(String path) { relations = new ArrayList<>(); try {//from ww w . j a v a 2 s . c o m ACEReader aceReader = new ACEReader(path, false); POSAnnotator pos_annotator = new POSAnnotator(); ChunkerAnnotator chunker = new ChunkerAnnotator(true); chunker.initialize(new ChunkerConfigurator().getDefaultConfig()); MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE"); Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig()); File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.6, false); Gazetteers gazetteers = GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English); WordNetManager.loadConfigAsClasspathResource(true); WordNetManager wordNet = WordNetManager.getInstance(); Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator); for (TextAnnotation ta : aceReader) { if (ta.getId().equals("bn\\CNN_ENG_20030424_070008.15.apf.xml")) { continue; } ta.addView(pos_annotator); mentionAnnotator.addView(ta); stanfordDepHandler.addView(ta); chunker.addView(ta); View annotatedTokenView = new SpanLabelView("RE_ANNOTATED", ta); for (Constituent co : ta.getView(ViewNames.TOKENS).getConstituents()) { Constituent c = co.cloneForNewView("RE_ANNOTATED"); for (String s : co.getAttributeKeys()) { c.addAttribute(s, co.getAttribute(s)); } c.addAttribute("WORDNETTAG", BIOFeatureExtractor.getWordNetTags(wordNet, c)); c.addAttribute("WORDNETHYM", BIOFeatureExtractor.getWordNetHyms(wordNet, c)); annotatedTokenView.addConstituent(c); } ta.addView("RE_ANNOTATED", annotatedTokenView); View goldView = ta.getView(ViewNames.MENTION_ACE); View predictedView = ta.getView(ViewNames.MENTION); Map<Constituent, Constituent> consMap = new HashMap<Constituent, Constituent>(); for (Constituent c : goldView.getConstituents()) { consMap.put(c, null); Constituent ch = RelationFeatureExtractor.getEntityHeadForConstituent(c, ta, ""); for (Constituent pc : predictedView.getConstituents()) { Constituent pch = MentionAnnotator.getHeadConstituent(pc, ""); if (ch.getStartSpan() == pch.getStartSpan() && ch.getEndSpan() == pch.getEndSpan()) { consMap.put(c, pc); break; } } } size_of_gold_relations += goldView.getRelations().size(); for (int i = 0; i < ta.getNumberOfSentences(); i++) { Sentence curSentence = ta.getSentence(i); List<Constituent> in_cur_sentence = predictedView .getConstituentsCoveringSpan(curSentence.getStartSpan(), curSentence.getEndSpan()); for (int j = 0; j < in_cur_sentence.size(); j++) { for (int k = j + 1; k < in_cur_sentence.size(); k++) { Constituent source = in_cur_sentence.get(j); Constituent target = in_cur_sentence.get(k); Constituent sourceHead = MentionAnnotator.getHeadConstituent(source, ""); Constituent targetHead = MentionAnnotator.getHeadConstituent(target, ""); source.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(sourceHead)); target.addAttribute("GAZ", ((FlatGazetteers) gazetteers).annotatePhrase(targetHead)); boolean found_tag = false; for (Relation r : goldView.getRelations()) { if (consMap.get(r.getSource()) == null || consMap.get(r.getTarget()) == null) { continue; } Constituent gsh = ACEReader.getEntityHeadForConstituent(r.getSource(), ta, "A"); Constituent gth = ACEReader.getEntityHeadForConstituent(r.getTarget(), ta, "A"); Constituent psh = MentionAnnotator.getHeadConstituent(source, "B"); Constituent pth = MentionAnnotator.getHeadConstituent(target, "B"); if (gsh.getStartSpan() == psh.getStartSpan() && gsh.getEndSpan() == psh.getEndSpan() && gth.getStartSpan() == pth.getStartSpan() && gth.getEndSpan() == pth.getEndSpan()) { Relation newRelation = new Relation(r.getAttribute("RelationSubtype"), source, target, 1.0f); newRelation.addAttribute("RelationType", r.getAttribute("RelationType")); newRelation.addAttribute("RelationSubtype", r.getAttribute("RelationSubtype")); newRelation.addAttribute("IsGoldRelation", "True"); relations.add(newRelation); found_tag = true; break; } if (gsh.getStartSpan() == pth.getStartSpan() && gsh.getEndSpan() == pth.getEndSpan() && gth.getStartSpan() == psh.getStartSpan() && gth.getEndSpan() == psh.getEndSpan()) { Relation newRelation = new Relation(r.getAttribute("RelationSubtype"), target, source, 1.0f); newRelation.addAttribute("RelationType", r.getAttribute("RelationType")); newRelation.addAttribute("RelationSubtype", r.getAttribute("RelationSubtype")); newRelation.addAttribute("IsGoldRelation", "True"); relations.add(newRelation); found_tag = true; break; } } if (!found_tag) { Relation newRelation = new Relation("NOT_RELATED", source, target, 1.0f); newRelation.addAttribute("RelationType", "NOT_RELATED"); newRelation.addAttribute("RelationSubtype", "NOT_RELATED"); newRelation.addAttribute("IsGoldRelation", "False"); relations.add(newRelation); } } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.cogcomp.re.SemEvalMentionReader.java
License:Open Source License
public void initExternalTools() { try {//from w w w . j av a2 s. c om _posAnnotator = new POSAnnotator(); Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig()); File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false); _gazetteers = (FlatGazetteers) GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English); WordNetManager.loadConfigAsClasspathResource(true); _wordnet = WordNetManager.getInstance(); __chunker = new ChunkerAnnotator(true); __chunker.initialize(new ChunkerConfigurator().getDefaultConfig()); Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); __stanfordDep = new StanfordDepHandler(posAnnotator, parseAnnotator); __mentionAnnotator = new MentionAnnotator("ACE_TYPE"); } catch (Exception e) { e.printStackTrace(); } }
From source file:shef.mt.tools.ParsingProcessor.java
public ParsingProcessor(String lang, String pm, String dm, HashSet<String> requirements) { //Store required resources: this.requiresPOSTags = requirements.contains("postags"); this.requiresDepCounts = requirements.contains("depcounts"); //Create model path objects: String posModel = null;/*w ww . j a va2 s. c o m*/ String depModel = null; //Setup model paths: if (pm == null) { if (lang.equals("english")) { posModel = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; } else if (lang.equals("spanish")) { posModel = "edu/stanford/nlp/models/pos-tagger/spanish/spanish-distsim.tagger"; } else if (lang.equals("chinese")) { posModel = "edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger"; } else { posModel = "edu/stanford.nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; } } else { posModel = pm; } if (dm == null) { if (lang.equals("english")) { depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; } else if (lang.equals("spanish")) { depModel = "edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz"; } else if (lang.equals("chinese")) { depModel = "edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz"; } else { depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; } } else { depModel = dm; } //Create base properties: Properties props = new Properties(); if (this.requiresDepCounts) { props.setProperty("annotators", "tokenize, ssplit, pos, parse"); } else { props.setProperty("annotators", "tokenize, ssplit, pos"); } //Create base pipeline: pipeline = new StanfordCoreNLP(props); try { //Create pipeline object: tokenizer = new TokenizerAnnotator(true, TokenizerAnnotator.TokenizerType.Whitespace); //Add objects to the pipeline: pipeline.addAnnotator(tokenizer); } catch (Exception ex) { System.out.println("ERROR: Problem while creating Stanford tokenizer."); } try { //Create pipeline object: tagger = new POSTaggerAnnotator(posModel, false); //Add object to the pipeline: pipeline.addAnnotator(tagger); } catch (Exception ex) { System.out.println( "ERROR: Problem while creating Stanford POS tagger. Please review the model paths and check for library availability."); } //If dependency counts are required: if (this.requiresDepCounts) { try { //Create pipeline object: parser = new ParserAnnotator(depModel, false, 300, StringUtils.EMPTY_STRING_ARRAY); //Add object to the pipeline: pipeline.addAnnotator(parser); } catch (Exception ex) { System.out.println( "ERROR: Problem while creating Stanford dependency parser. Please review the model paths and check for library availability."); } } }