List of usage examples for edu.stanford.nlp.ling IndexedWord value
@Override
public String value()
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { modelProvider.configure(aJCas.getCas()); List<Tree> trees = new ArrayList<Tree>(); List<CoreMap> sentences = new ArrayList<CoreMap>(); List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>(); for (ROOT root : select(aJCas, ROOT.class)) { // Copy all relevant information from the tokens List<CoreLabel> tokens = new ArrayList<CoreLabel>(); for (Token token : selectCovered(Token.class, root)) { tokens.add(tokenToWord(token)); }//from ww w . ja v a 2 s .co m sentenceTokens.add(tokens); // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace // it with PRN to avoid NPEs. TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) { @Override public Tree newTreeNode(String aParent, List<Tree> aChildren) { String parent = aParent; if ("PRN0".equals(parent)) { parent = "PRN"; } Tree node = super.newTreeNode(parent, aChildren); return node; } }; // deep copy of the tree. These are modified inside coref! Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy(); treeCopy.indexSpans(); trees.add(treeCopy); // Build the sentence CoreMap sentence = new CoreLabel(); sentence.set(TreeAnnotation.class, treeCopy); sentence.set(TokensAnnotation.class, tokens); sentence.set(RootKey.class, root); sentences.add(sentence); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590 // We currently do not copy over dependencies from the CAS. This is supposed to fill // in the dependencies so we do not get NPEs. TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()); ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy, GrammaticalStructure.Extras.NONE); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582 SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); for (IndexedWord vertex : deps.vertexSet()) { vertex.setWord(vertex.value()); } // merge the new CoreLabels with the tree leaves MentionExtractor.mergeLabels(treeCopy, tokens); MentionExtractor.initializeUtterance(tokens); } Annotation document = new Annotation(aJCas.getDocumentText()); document.set(SentencesAnnotation.class, sentences); Coreferencer coref = modelProvider.getResource(); // extract all possible mentions // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here // disables reparsing. RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false); List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0, coref.corefSystem.dictionaries()); // add the relevant info to mentions and order them for coref Map<Integer, CorefChain> result; try { Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions); result = coref.corefSystem.coref(doc); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } for (CorefChain chain : result.values()) { CoreferenceLink last = null; for (CorefMention mention : chain.getMentionsInTextualOrder()) { CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.startIndex - 1); CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.endIndex - 2); CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(), endLabel.get(TokenKey.class).getEnd()); if (mention.mentionType != null) { link.setReferenceType(mention.mentionType.toString()); } if (last == null) { // This is the first mention. Here we'll initialize the chain CoreferenceChain corefChain = new CoreferenceChain(aJCas); corefChain.setFirst(link); corefChain.addToIndexes(); } else { // For the other mentions, we'll add them to the chain. last.setNext(link); } last = link; link.addToIndexes(); } } }
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
/** * Attaches particles to the main predicate. *//* w w w . j a v a 2 s . c o m*/ protected String getPredicate(SemanticGraph sg, IndexedWord mainPred) { if (sg.hasChildWithReln(mainPred, UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE)) { IndexedWord part = sg.getChildWithReln(mainPred, UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE); return String.format("%s %s", mainPred.lemma().equals("be") ? "" : mainPred.lemma(), part.value()); } return mainPred.lemma(); }
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
protected ProposedTuples parseAnnotation(Annotation ann) { ProposedTuples tuples = new ProposedTuples(); ArrayList<SemanticGraph> sgs = new ArrayList<SemanticGraph>(); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph sg = sentence/*from ww w .j a va2 s . c o m*/ .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); sgs.add(sg); } for (SemanticGraph sg : sgs) { // Everything from RuleBasedParser except resolvePlurals(sg); SemanticGraphEnhancer.processQuanftificationModifiers(sg); SemanticGraphEnhancer.collapseCompounds(sg); SemanticGraphEnhancer.collapseParticles(sg); SemanticGraphEnhancer.resolvePronouns(sg); SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } matcher = ACL_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN }; for (SemgrexPattern p : subjPredPatterns) { matcher = p.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord pred = matcher.getNode("pred"); if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) { IndexedWord caseMarker = sg.getChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER); String prep = caseMarker.value(); if (sg.hasChildWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { prep = prep + " " + additionalCaseMarker.value(); } } tuples.addTuple(subj, pred, prep); } else { if (!pred.lemma().equals("be")) { tuples.addTuple(subj, pred); } } } } matcher = ADJ_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = ADJ_PRED_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = PP_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); String reln = matcher.getRelnString("reln"); String predicate = reln.replace("nmod:", "").replace("_", " "); if (predicate.equals("poss") || predicate.equals("agent")) { continue; } tuples.addTuple(gov, mod, predicate); } matcher = POSS_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); tuples.addTuple(mod, gov, "have"); } matcher = AGENT_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); tuples.addTuple(subj, obj, getPredicate(sg, pred)); } matcher = PLURAL_SUBJECT_OBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); checkForNumericAttribute(tuples, sg, subj); checkForNumericAttribute(tuples, sg, obj); } matcher = PLURAL_SUBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); checkForNumericAttribute(tuples, sg, subj); } matcher = PLURAL_OTHER_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord word = matcher.getNode("word"); checkForNumericAttribute(tuples, sg, word); } matcher = COMPOUND_NOUN_PATTERN.matcher(sg); Set<IndexedWord> compoundNouns = new HashSet<IndexedWord>(); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); compoundNouns.add(tail); compoundNouns.add(head); tuples.addTuple(tail, head); } // Must happen last, since it will reuse existing parts of the scene // graph matcher = NOUN_CONJ_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); int original_length = tuples.tuples.size(); for (int i = 0; i < original_length; ++i) { ArrayList<String> prop = tuples.tuples.get(i); if (prop.size() == 3 && prop.get(0).equals(head)) { tuples.addTuple(head, prop.get(1), prop.get(2)); } if (prop.size() == 3 && prop.get(1).equals(tail)) { tuples.addTuple(tail, prop.get(1), prop.get(2)); } } } matcher = NOUN_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord word = matcher.getNode("word"); if (!compoundNouns.contains(word)) { tuples.addTuple(word); } } } return tuples; }
From source file:edu.nus.comp.nlp.stanford.UtilParser.java
License:Open Source License
public static DefaultMutableTreeNode toDMTree(IndexedWord root, SemanticGraph dependencies) { if (root == null) { root = dependencies.getFirstRoot(); }// w ww. j av a 2s . com DefaultMutableTreeNode node = new DefaultMutableTreeNode(); String nodeContent = root.value(); for (SemanticGraphEdge edge : dependencies.edgeIterable()) { if (edge.getDependent().equals(root)) { nodeContent = "<-" + edge.getRelation() + "- " + nodeContent; break; } } node.setUserObject(nodeContent); for (IndexedWord c : dependencies.getChildList(root)) { DefaultMutableTreeNode n = toDMTree(c, dependencies); node.add(n); } return node; }
From source file:main.java.parsers.StanfordParser.java
/** * Gets a map that links words on one of the ends of the dependency paths to the dependency paths * // www . j a v a2 s . c o m * @param offsets1 * @param offsets2 * @param first * @param startOffsetIndexedWord * @param graph * @return */ public static Map<String, List<String>> getWordLinkedDependencyPaths(int[] offsets1, int[] offsets2, boolean first, Map<Integer, IndexedWord> startOffsetIndexedWord, SemanticGraph graph) { Map<String, List<String>> wordLinkedDependencyPaths = new HashMap<>(); for (int startOffset1 = offsets1[0]; startOffset1 < offsets1[1]; startOffset1++) { if (!startOffsetIndexedWord.containsKey(startOffset1)) continue; IndexedWord iw1 = startOffsetIndexedWord.get(startOffset1); for (int startOffset2 = offsets2[0]; startOffset2 < offsets2[1]; startOffset2++) { if (!startOffsetIndexedWord.containsKey(startOffset2)) continue; IndexedWord iw2 = startOffsetIndexedWord.get(startOffset2); String path = getPath(iw1, iw2, graph); if (path.equals("")) continue; String word = first ? iw1.value() : iw2.value(); List<String> dependencyPaths = wordLinkedDependencyPaths.get(word); if (dependencyPaths == null) wordLinkedDependencyPaths.put(word, dependencyPaths = new ArrayList<>()); if (!dependencyPaths.contains(path)) dependencyPaths.add(path); } } return wordLinkedDependencyPaths; }
From source file:main.java.spatialrelex.ling.Features.java
/** * Gets lexical pattern containing spatial element roles and words in between. * //from w w w . ja va2s .c o m * @param startOffsetIndexedWord is a map containing all tokens of the document * to which the spatial element belongs. * @param startOffsetSpatialElement is a sorted map linking the starting offset positions * of the two or three spatial elements to the spatial elements. * @return String which is the lexical pattern. */ public static String getLexicalPatternStr(Map<Integer, IndexedWord> startOffsetIndexedWord, Map<Integer, SpatialElement> startOffsetSpatialElement) { String lexicalPattern = ""; int start = -1; int end = -1; for (int startOffset : startOffsetSpatialElement.keySet()) { SpatialElement se = startOffsetSpatialElement.get(startOffset); if (start == -1) start = se.end; else if (end == -1) { end = se.start; String substring = ""; int i = start; while (i < end) { if (!startOffsetIndexedWord.containsKey(i)) { i++; continue; } IndexedWord iw = startOffsetIndexedWord.get(i); substring += iw.value() + " "; i++; } substring = substring.trim(); if (!substring.equals("")) lexicalPattern += "_" + substring; start = se.end; end = -1; } lexicalPattern = lexicalPattern.equals("") ? se.role : lexicalPattern + "_" + se.role; } return lexicalPattern; }
From source file:main.java.spatialrelex.markup.SpatialElement.java
public static SpatialElement setSpatialElementFeatures(Doc document, SpatialElement se) { IndexedWord iw = document.startOffsetIndexedWord.get(se.start); se.lemmaText = iw.lemma();/* w w w . ja v a 2 s . com*/ se.startToken = iw.index(); se.endToken = iw.index(); int i = se.start + 1; while (i < se.end) { if (!document.startOffsetIndexedWord.containsKey(i)) { i++; continue; } iw = document.startOffsetIndexedWord.get(i); se.endToken = iw.index(); se.lemmaText += " " + iw.lemma(); if (iw.tag().contains("NN")) { se.generalInquirerCategories = GeneralInquirer .getGeneralInquirerCategories(se.generalInquirerCategories, iw.value().toLowerCase()); se = WordNet.setWordNetSynsetsAndHypernyms(se, iw.tag(), "NN"); } else if (iw.tag().contains("VB")) { se.verbNetClasses = VerbNet.getVerbNetClasses(se.verbNetClasses, iw.value().toLowerCase()); se = WordNet.setWordNetSynsetsAndHypernyms(se, iw.tag(), "VB"); } List<String> tokenSRLs = document.startOffsetSRLRoles.get(i); i++; if (tokenSRLs == null) continue; for (String tokenSRL : tokenSRLs) { if (se.srls.contains(tokenSRL)) continue; se.srls.add(tokenSRL); } } return se; }
From source file:me.aatma.languagetologic.graph.pattern.KBNLPatternEventAdvclEvent.java
public boolean check() { // String mark = null; GrammaticalRelation markGR = NLPTools.getGR("mark", null); if (this.dependencies.hasChildWithReln(this.toNl, markGR)) { this.advclMark = this.dependencies.getChildWithReln(this.toNl, markGR).value(); log.info("Marker of advcl: " + this.advclMark); }//from w w w . j a v a 2s.c o m if (edgeNlRelation.equals(NLPConstants.xcomp)) { if (this.dependencies.hasChildWithReln(this.toNl, NLPConstants.aux)) { Set<IndexedWord> auxs = this.dependencies.getChildrenWithReln(this.toNl, NLPConstants.aux); for (IndexedWord aux : auxs) { if (aux.value().equalsIgnoreCase("toKbNl")) { this.xcompTo = true; } } } } // This is inter event relationship return this.fromKbNl instanceof KBNLEventNodeCloud && this.toKbNl instanceof KBNLEventNodeCloud && (this.edgeNlRelation.toString().equals("advcl") || this.edgeNlRelation.toString().equals("vmod") || // TODO: Check when it relates two events, if its always a purpose.. prepGRs.contains(this.edgeNlRelation) || this.xcompTo); }
From source file:org.sam_agent.csparser.ContinuousParser.java
License:Open Source License
public String stringify(Collection<IndexedWord> roots) { List<String> rootTokens = new ArrayList<String>(); for (IndexedWord root : roots) { rootTokens.add(String.format("\"%s-%d\"", esc(root.value()), root.index())); }/*from w ww . ja va2s . c o m*/ return "[" + String.join(",", rootTokens) + "]"; }
From source file:semRewrite.datesandnumber.DateAndNumbersGeneration.java
License:Open Source License
/** *************************************************************** *//*from w w w. j ava2s .c o m*/ private String lemmatizeWord(IndexedWord measuredEntity) { String value = measuredEntity.value(); if (!measuredEntity.tag().equals("NNP") || !measuredEntity.tag().equals("NNPS")) { value = measuredEntity.lemma(); } return value; }