List of usage examples for edu.stanford.nlp.trees Tree label
@Override
public Label label()
From source file:englishparser.EnglishParser.java
private static boolean isNNP(Tree t) { // return t.label().value().equals("NNS") || t.label().value().equals("NP") || t.label().value().equals("NN"); return t.label().value().equals("NNP"); }
From source file:ims.cs.parc.ParcUtils.java
License:Open Source License
/** * Find all head verbs in the corpus. The algorithm is taken from Pareti (2015). * @param sentence//from w w w . j a v a 2 s . c om */ public static void markHeadVerbs(Sentence sentence) { for (Tree tree : sentence.tree.preOrderNodeList()) { if (tree.label().value().equals("VP")) { boolean valid = true; for (Tree child : tree.children()) { if (child.label().value().equals("VP")) { valid = false; break; } } if (valid) { for (Tree child : tree.children()) { if (child.firstChild().isLeaf() && child.label().value().startsWith("V")) { Token token = sentence.treeLookup.get(child.firstChild()); if (token != null) token.isHeadVerb = true; } } } } } }
From source file:ims.cs.qsample.features.components.SentenceConstituentFeatures.java
License:Open Source License
/** * Recursion step for tree featues//from w w w . jav a2s .com * @param sentence * @param t complete tree * @param level current level * @param governingLabels list of governing labels * @param parent information about direct parent * @param isLeftmost is the node the leftmost one in the constituent specified by ancestorWhereLeftmost * @param ancestorWhereLeftmost */ private static void addTreeFeatures(Sentence sentence, Tree t, int level, List<NodeFeatures> governingLabels, NodeFeatures parent, boolean isLeftmost, NodeFeatures ancestorWhereLeftmost) { if (t.isLeaf()) { /* terminal nodes */ // get the current token represented by this subtree Token pToken = sentence.treeLookup.get(t); // check if token is null. this can happen if the token was unaligned previously (e.g., because of // a parser error) if (pToken == null) { if (StaticConfig.verbose) System.err.println(sentence.sentenceId + " Dropping tree without associated token: " + t + " "); return; } FeatureSet fs = pToken.boundaryFeatureSet; // leftmost feature (see Pareti paper for description) if (StaticConfig.constituentLeftmost && isLeftmost) fs.add(LEFTMOST_FEATURE); // level in tree if (StaticConfig.constituentLevel) { fs.add(LEVEL_FEATURE + level); addLevelBinHeuristic(pToken, LEVEL_FEATURE, level); } // leftmost feature label if (StaticConfig.constituentAncestorL) { fs.add(AL_FEATURE + "LBL:" + ancestorWhereLeftmost.label); fs.add(AL_FEATURE + "LVL:" + ancestorWhereLeftmost.level); addLevelBinHeuristic(pToken, AL_FEATURE + "LVL", ancestorWhereLeftmost.level); } // parent in constituent tree if (StaticConfig.constituentParent) { fs.add(PARENT_FEATURE + "LBL:" + parent.label); } // labels of all ancestors if (StaticConfig.constituentGoverning) { /* "Ancestor" features in the paper */ for (NodeFeatures nf : governingLabels) { // label with and without depth fs.add(GOV_FEATURE + nf.label + "@" + nf.level); /* ambiguous in paper */ fs.add(GOV_FEATURE + nf.label); fs.add(GOV_FEATURE + nf.label + "@-" + (level - nf.level)); /* ambiguous in paper */ addLevelBinHeuristic(pToken, GOV_FEATURE + nf.label + "@", nf.level); addLevelBinHeuristic(pToken, GOV_FEATURE + nf.label + "@-", (level - nf.level)); } } } else { // non-terminal node List<Tree> childList = t.getChildrenAsList(); String label = t.label().toString(); // copy governing node features for next recursion step List<NodeFeatures> governingLabelsUpdate = new LinkedList<NodeFeatures>(governingLabels); governingLabelsUpdate.add(new NodeFeatures(label, level)); // set leftmost ancestor if (ancestorWhereLeftmost == null) { ancestorWhereLeftmost = new NodeFeatures(label, level); } // check for pre-terminals -- otherwise, set the leftmost flag for the first constituent if (childList.size() > 1) { isLeftmost = true; } // call function for all children for (Tree child : childList) { addTreeFeatures(sentence, child, level + 1, governingLabelsUpdate, new NodeFeatures(label, level), isLeftmost, ancestorWhereLeftmost); isLeftmost = false; ancestorWhereLeftmost = null; } } }
From source file:info.mhaas.ma.Evaluation.CCEvaluator.java
private static boolean labelMatches(Tree gold, Tree predicted) { Label goldLabel = gold.label(); Label predLabel = predicted.label(); return collapseFineGrained(goldLabel) == collapseFineGrained(predLabel); }
From source file:it.uniud.ailab.dcore.wrappers.external.StanfordBootstrapperAnnotator.java
License:Open Source License
/** * Annotate the document by splitting the document, tokenizing it, * performing PoS tagging and Named Entity Recognition using the Stanford * Core NLP tools./*from w w w . ja v a 2 s. c o m*/ * * @param component the component to annotate. */ @Override public void annotate(Blackboard blackboard, DocumentComponent component) { if (pipeline == null) { // creates a StanfordCoreNLP object, with POS tagging, lemmatization, //NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, parse, lemma, ner, dcoref"); pipeline = new StanfordCoreNLP(props); } // read some text in the text variable String text = component.getText(); // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); //get the graph for coreference resolution Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class); //prepare the map for coreference graph of document Map<String, Collection<Set<CorefChain.CorefMention>>> coreferenceGraph = new HashMap<>(); for (CorefChain corefChain : graph.values()) { //get the representative mention, that is the word recall in other sentences CorefChain.CorefMention cm = corefChain.getRepresentativeMention(); //eliminate auto-references if (corefChain.getMentionMap().size() <= 1) { continue; } //get the stemmed form of the references, so the comparison with //grams will be easier List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1) .get(TokensAnnotation.class); //list of tokens which compose the anaphor List<Token> anaphorsTokens = new ArrayList<>(); for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) { CoreLabel current = tks.get(i); Token t = new Token(current.word()); t.setPoS(current.tag()); t.setLemma(current.lemma()); anaphorsTokens.add(t); } //the mention n-gram which is formed by the anaphor and a //list of references Mention mention = new Mention(cm.mentionSpan, anaphorsTokens, cm.mentionSpan); //get map of the references to the corefchain obj Collection<Set<CorefChain.CorefMention>> mentionMap = corefChain.getMentionMap().values(); for (Set<CorefChain.CorefMention> mentions : mentionMap) { for (CorefChain.CorefMention reference : mentions) { //eliminate self-references if (reference.mentionSpan.equalsIgnoreCase(cm.mentionSpan)) { continue; } List<CoreLabel> tokens = document.get(SentencesAnnotation.class).get(reference.sentNum - 1) .get(TokensAnnotation.class); //list of tokens which compose the mention List<Token> mentionTokens = new ArrayList<>(); for (int i = reference.startIndex - 1; i < reference.endIndex - 1; i++) { CoreLabel current = tokens.get(i); //set token features Token t = new Token(current.word()); t.setPoS(current.tag()); t.setLemma(current.lemma()); mentionTokens.add(t); } //add to mention a new reference mention.addReference(reference.mentionSpan, mentionTokens, reference.mentionType.toString()); } } //assign to the document a new corenference obj //containing the anaphor and its mentions blackboard.addGram(mention); } // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and //has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); //A counter that keeps track of the number of phrases in a sentences int phraseCounter = 0; for (CoreMap stanfordSentence : sentences) { Sentence distilledSentence = new Sentence(stanfordSentence.toString(), "" + sentenceCounter++); distilledSentence.setLanguage(Locale.ENGLISH); //getting the dependency graph of the document so to count the number of phrases //ROOT sentences are the first level children in the parse tree; every ROOT sentence //is constitute by a group of clauses which can be the principal (main clauses) or not //(coordinate and subordinate). We use ROOT sentences as a starting point to find out all //the phrases present in the sentences themselves, checking out for the tag "S". Tree sentenceTree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class); for (Tree sub : sentenceTree.subTreeList()) { if (sub.label().value().equals("S")) { phraseCounter++; } } //annotate the sentence with a new feature counting all the phrases //cointained in the sentence distilledSentence.addAnnotation(new FeatureAnnotation(DefaultAnnotations.PHRASES_COUNT, phraseCounter)); // traversing the words in the current sentence // for each token in the text, we create a new token annotate it // with the word representing it, its pos tag and its lemma for (CoreLabel token : stanfordSentence.get(TokensAnnotation.class)) { // this is the text of the token Token t = new Token(token.originalText()); // this is the POS tag of the token t.setPoS(token.tag()); // this is the lemma of the ttoken t.setLemma(token.lemma()); String ner = token.get(NamedEntityTagAnnotation.class); if (!ner.equalsIgnoreCase("O")) { t.addAnnotation(new NERAnnotation(DefaultAnnotations.IS_NER, ner)); } //add the token to the sentence distilledSentence.addToken(t); } //add the sentence to document ((DocumentComposite) component).addComponent(distilledSentence); } }
From source file:knu.univ.lingvo.coref.Mention.java
License:Open Source License
public String lowestNPIncludesHead() { String ret = ""; Tree head = this.contextParseTree.getLeaves().get(this.headIndex); Tree lowestNP = head; String s;//from w w w. ja va 2s. c om while (true) { if (lowestNP == null) return ret; s = ((CoreLabel) lowestNP.label()).get(CoreAnnotations.ValueAnnotation.class); if (s.equals("NP") || s.equals("ROOT")) break; lowestNP = lowestNP.ancestor(1, this.contextParseTree); } if (s.equals("ROOT")) lowestNP = head; for (Tree t : lowestNP.getLeaves()) { if (!ret.equals("")) ret = ret + " "; ret = ret + ((CoreLabel) t.label()).get(CoreAnnotations.TextAnnotation.class); } if (!this.spanToString().contains(ret)) return this.sentenceWords.get(this.headIndex).get(CoreAnnotations.TextAnnotation.class); return ret; }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
protected int getHeadIndex(Tree t) { // The trees passed in do not have the CoordinationTransformer // applied, but that just means the SemanticHeadFinder results are // slightly worse. Tree ht = t.headTerminal(headFinder); if (ht == null) return -1; // temporary: a key which is matched to nothing CoreLabel l = (CoreLabel) ht.label(); return l.get(CoreAnnotations.IndexAnnotation.class); }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
/** * Post-processes the extracted mentions. Here we set the Mention fields required for coref and order mentions by tree-traversal order. * @param words List of words in each sentence, in textual order * @param trees List of trees, one per sentence * @param unorderedMentions List of unordered, unprocessed mentions * Each mention MUST have startIndex and endIndex set! * Optionally, if scoring is desired, mentions must have mentionID and originalRef set. * All the other Mention fields are set here. * @return List of mentions ordered according to the tree traversal * @throws Exception/*from w w w .jav a 2s .co m*/ */ public List<List<Mention>> arrange(Annotation anno, List<List<CoreLabel>> words, List<Tree> trees, List<List<Mention>> unorderedMentions, boolean doMergeLabels) throws Exception { List<List<Mention>> orderedMentionsBySentence = new ArrayList<List<Mention>>(); // // traverse all sentences and process each individual one // int mentionNumber = 0; for (int sent = 0, sz = words.size(); sent < sz; sent++) { List<CoreLabel> sentence = words.get(sent); Tree tree = trees.get(sent); List<Mention> mentions = unorderedMentions.get(sent); Map<String, List<Mention>> mentionsToTrees = Generics.newHashMap(); // merge the parse tree of the entire sentence with the sentence words if (doMergeLabels) mergeLabels(tree, sentence); // // set the surface information and the syntactic info in each mention // startIndex and endIndex MUST be set before! // for (Mention mention : mentions) { mention.sentenceNumber = sent; mention.mentionNumber = mentionNumber++; mention.contextParseTree = tree; mention.sentenceWords = sentence; mention.originalSpan = new ArrayList<CoreLabel>( mention.sentenceWords.subList(mention.startIndex, mention.endIndex)); if (!((CoreLabel) tree.label()).has(CoreAnnotations.BeginIndexAnnotation.class)) tree.indexSpans(0); if (mention.headWord == null) { Tree headTree = ((RuleBasedCorefMentionFinder) mentionFinder).findSyntacticHead(mention, tree, sentence); mention.headWord = (CoreLabel) headTree.label(); mention.headIndex = mention.headWord.get(CoreAnnotations.IndexAnnotation.class) - 1; } if (mention.mentionSubTree == null) { // mentionSubTree = highest NP that has the same head Tree headTree = tree.getLeaves().get(mention.headIndex); if (headTree == null) { throw new RuntimeException("Missing head tree for a mention!"); } Tree t = headTree; while ((t = t.parent(tree)) != null) { if (t.headTerminal(headFinder) == headTree && t.value().equals("NP")) { mention.mentionSubTree = t; } else if (mention.mentionSubTree != null) { break; } } if (mention.mentionSubTree == null) { mention.mentionSubTree = headTree; } } List<Mention> mentionsForTree = mentionsToTrees.get(treeToKey(mention.mentionSubTree)); if (mentionsForTree == null) { mentionsForTree = new ArrayList<Mention>(); mentionsToTrees.put(treeToKey(mention.mentionSubTree), mentionsForTree); } mentionsForTree.add(mention); // generates all fields required for coref, such as gender, number, etc. mention.process(dictionaries, semantics, this, singletonPredictor); } // // Order all mentions in tree-traversal order // List<Mention> orderedMentions = new ArrayList<Mention>(); orderedMentionsBySentence.add(orderedMentions); // extract all mentions in tree traversal order (alternative: tree.postOrderNodeList()) for (Tree t : tree.preOrderNodeList()) { List<Mention> lm = mentionsToTrees.get(treeToKey(t)); if (lm != null) { for (Mention m : lm) { orderedMentions.add(m); } } } // // find appositions, predicate nominatives, relative pronouns in this sentence // findSyntacticRelations(tree, orderedMentions); assert (mentions.size() == orderedMentions.size()); } return orderedMentionsBySentence; }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
private void addFoundPair(Tree np1, Tree np2, Tree t, Set<Pair<Integer, Integer>> foundPairs) { Tree head1 = np1.headTerminal(headFinder); Tree head2 = np2.headTerminal(headFinder); int h1 = ((CoreMap) head1.label()).get(CoreAnnotations.IndexAnnotation.class) - 1; int h2 = ((CoreMap) head2.label()).get(CoreAnnotations.IndexAnnotation.class) - 1; Pair<Integer, Integer> p = new Pair<Integer, Integer>(h1, h2); foundPairs.add(p);//from w w w. ja va2 s .c o m }
From source file:knu.univ.lingvo.coref.sievepasses.DeterministicCorefSieve.java
License:Open Source License
/** Divides a sentence into clauses and sorts the antecedents for pronoun matching. */ private static List<Mention> sortMentionsForPronoun(List<Mention> l, Mention m1, boolean sameSentence) { List<Mention> sorted = new ArrayList<Mention>(); if (sameSentence) { Tree tree = m1.contextParseTree; Tree current = m1.mentionSubTree; while (true) { current = current.ancestor(1, tree); if (current.label().value().startsWith("S")) { for (Mention m : l) { if (!sorted.contains(m) && current.dominates(m.mentionSubTree)) { sorted.add(m);/* w ww . ja va2 s . co m*/ } } } if (current.label().value().equals("ROOT") || current.ancestor(1, tree) == null) break; } if (SieveCoreferenceSystem.logger.isLoggable(Level.FINEST)) { if (l.size() != sorted.size()) { SieveCoreferenceSystem.logger.finest("sorting failed!!! -> parser error?? \tmentionID: " + m1.mentionID + " " + m1.spanToString()); sorted = l; } else if (!l.equals(sorted)) { SieveCoreferenceSystem.logger.finest("sorting succeeded & changed !! \tmentionID: " + m1.mentionID + " " + m1.spanToString()); for (int i = 0; i < l.size(); i++) { Mention ml = l.get(i); Mention msorted = sorted.get(i); SieveCoreferenceSystem.logger .finest("\t[" + ml.spanToString() + "]\t[" + msorted.spanToString() + "]"); } } else { SieveCoreferenceSystem.logger .finest("no changed !! \tmentionID: " + m1.mentionID + " " + m1.spanToString()); } } } return sorted; }