List of usage examples for edu.stanford.nlp.trees Tree toString
@Override
public String toString()
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public List<String> buildForestForRSTArcs(ParseThicket pt) { List<String> results = new ArrayList<String>(); for (WordWordInterSentenceRelationArc arc : pt.getArcs()) { // TODO - uncomment // if (!arc.getArcType().getType().startsWith("rst")) // continue; int fromSent = arc.getCodeFrom().getFirst(); int toSent = arc.getCodeTo().getFirst(); String wordFrom = arc.getLemmaFrom(); String wordTo = arc.getLemmaTo(); if (wordFrom == null || wordFrom.length() < 1 || wordTo == null || wordTo.length() < 1) log.severe("Empty lemmas for RST arc " + arc); List<Tree> trees = getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent), pt.getSentences().get(fromSent), new String[] { wordFrom }); if (trees == null || trees.size() < 1) continue; System.out.println(trees); StringBuilder sb = new StringBuilder(10000); Tree tree = trees.get(0);/*from ww w . j av a 2 s .co m*/ // instead of phrase type for the root of the tree, we want to put // the RST relation name if (arc.getArcType().getType().startsWith("rst")) tree.setValue(arc.getArcType().getSubtype()); toStringBuilderExtenderByAnotherLinkedTree1(sb, pt.getSentences().get(toSent), tree, new String[] { wordTo }); System.out.println(sb.toString()); results.add(sb.toString()); } // if no arcs then orig sentences if (results.isEmpty()) { for (Tree t : pt.getSentences()) { results.add(t.toString()); } } return results; }
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public StringBuilder toStringBuilderExtenderByAnotherLinkedTree1(StringBuilder sb, Tree t, Tree treeToInsert, String[] corefWords) {// w w w . jav a 2 s .c o m if (t.isLeaf()) { if (t.label() != null) { sb.append(t.label().value()); } return sb; } else { sb.append('('); if (t.label() != null) { if (t.value() != null) { sb.append(t.label().value()); } } boolean bInsertNow = false; Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { if (corefWords != null) { String word = corefWords[corefWords.length - 1]; String phraseStr = kid.toString(); phraseStr = phraseStr.replace(")", ""); if (phraseStr.endsWith(word)) { bInsertNow = true; } } } if (bInsertNow) { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, null, null); } sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree1(sb, treeToInsert, null, null); } else { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, treeToInsert, corefWords); } } } return sb.append(')'); } }
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public List<Tree> getASubtreeWithRootAsNodeForWord1(Tree tree, Tree currentSubTree, String[] corefWords) { if (currentSubTree.isLeaf()) { return null; }// w w w . jav a 2 s . co m List<Tree> result = null; Tree[] kids = currentSubTree.children(); if (kids != null) { boolean bFound = false; String word = corefWords[corefWords.length - 1]; for (Tree kid : kids) { if (bFound) { result.add(kid); } else { String phraseStr = kid.toString(); phraseStr = phraseStr.replace(")", ""); if (phraseStr.endsWith(word)) { // found bFound = true; result = new ArrayList<Tree>(); } } } if (bFound) { return result; } // if not a selected node, proceed with iteration for (Tree kid : kids) { List<Tree> ts = getASubtreeWithRootAsNodeForWord1(tree, kid, corefWords); if (ts != null) return ts; } } return null; }
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public Tree[] getASubtreeWithRootAsNodeForWord(Tree tree, Tree currentSubTree, String[] corefWords) { if (currentSubTree.isLeaf()) { return null; }//from w w w.j a v a2s.co m boolean bInsertNow = false; /* * List<ParseTreeNode> bigTreeNodes = * parsePhrase(currentSubTree.label().value()); for(ParseTreeNode * smallNode: bigTreeNodes ){ if * (bigTreeNodes.get(0).getWord().equals("") ) continue; String word = * bigTreeNodes.get(0).getWord(); for(String cWord: corefWords){ * * if (word.equalsIgnoreCase(cWord)) bInsertNow=true; } } */ String nodePhraseStr = currentSubTree.toString(); System.out.println(nodePhraseStr); for (String w : corefWords) nodePhraseStr = nodePhraseStr.replace(w, ""); // all words are covered if (nodePhraseStr.toUpperCase().equals(nodePhraseStr)) bInsertNow = true; // if(bInsertNow) // return currentSubTree; Tree[] kids = currentSubTree.children(); if (kids != null) { /* * for (Tree kid : kids) { List<ParseTreeNode> bigTreeNodes = * parsePhrase(kid.label().value()); if (bigTreeNodes!=null && * bigTreeNodes.size()>0 && bigTreeNodes.get(0)!=null && * bigTreeNodes.get(0).getWord().equalsIgnoreCase(corefWords[0])){ * bInsertNow=true; return kids; } * * } */ for (Tree kid : kids) { Tree[] t = getASubtreeWithRootAsNodeForWord(tree, kid, corefWords); if (t != null) return t; } } return null; }
From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java
License:Apache License
private void navigateR(Tree t, List<ParseTreeNode> sentence, List<List<ParseTreeNode>> phrases) { if (!t.isPreTerminal()) { if (t.label() != null) { if (t.value() != null) { // if ROOT or S, returns empty List<ParseTreeNode> nodes = parsePhrase(t.label().value(), t.toString()); nodes = assignIndexToNodes(nodes, sentence); if (!nodes.isEmpty()) phrases.add(nodes);//from www. j a va 2s .com if (nodes.size() > 0 && nodes.get(0).getId() == null) { if (nodes.size() > 1 && nodes.get(1) != null && nodes.get(1).getId() != null) { try { ParseTreeNode n = nodes.get(0); n.setId(nodes.get(1).getId() - 1); nodes.set(0, n); } catch (Exception e) { e.printStackTrace(); } } else { log.severe("Failed alignment:" + nodes); } } } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { navigateR(kid, sentence, phrases); } } return; } }
From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java
License:Apache License
private void navigateR1(Tree t, List<ParseTreeNode> sentence, int l, List<List<ParseTreeNode>> phrases) { if (t.isPreTerminal()) { if (t.label() != null) { List<ParseTreeNode> node = parsePhrase(t.toString()); if (!node.isEmpty()) phrases.add(node);//from ww w. j av a 2s . c o m } return; } else { if (t.label() != null) { if (t.value() != null) { List<ParseTreeNode> node = parsePhrase(t.label().value()); if (!node.isEmpty()) phrases.add(node); } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { navigateR1(kid, sentence, l, phrases); } } return; } }
From source file:opennlp.tools.parse_thicket.parse_thicket2graph.GraphFromPTreeBuilder.java
License:Apache License
private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) { // String currParent = tree.label().value()+" $"+Integer.toString(l); // g.addVertex(currParent); if (tree.getChildrenAsList().size() == 1) navigate(tree.getChildrenAsList().get(0), g, l + 1, currParent); else if (tree.getChildrenAsList().size() == 0) return;//from ww w . j a v a 2s .c o m for (Tree child : tree.getChildrenAsList()) { String currChild = null; ParseGraphNode currChildNode = null; try { if (child.isLeaf()) continue; if (child.label().value().startsWith("S")) navigate(child.getChildrenAsList().get(0), g, l + 1, currParent); if (!child.isPhrasal() || child.isPreTerminal()) currChild = child.toString() + " #" + Integer.toString(l); else currChild = child.label().value() + " #" + Integer.toString(l); currChildNode = new ParseGraphNode(child, currChild); g.addVertex(currChildNode); g.addEdge(currParent, currChildNode); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } navigate(child, g, l + 1, currChildNode); } }
From source file:org.textmining.annotator.StanfordCoreNlpAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos);/*from ww w.j a va 2s . c om*/ token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException(String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort(mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort(entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { entity.addToIndexes(); } //end of process-method }
From source file:parsers.CoreNLPSuite.java
public static String parse(String input) { String ans = ""; Annotation document = new Annotation(input); Parsers.pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { Tree tree = sentence.get(TreeAnnotation.class); String parseExpr = tree.toString(); ans = ans + parseExpr + "\n"; }/*from w ww. j ava2s .c o m*/ return ans; }
From source file:pltag.util.Utils.java
License:Open Source License
public static String removeSubtreesAfterWord(String inputTree, int numOfLeaves) { Tree tree = Tree.valueOf(inputTree); List<Tree> leaves = tree.getLeaves(); if (leaves.size() > numOfLeaves) { // find common ancestor between last valid leaf and extraneous leaf Tree firstLeaf = leaves.get(numOfLeaves - 1); Tree lastLeaf = leaves.get(leaves.size() - 1); Tree commonAncestorNode = lastLeaf.parent(tree); while (!commonAncestorNode.getLeaves().contains(firstLeaf)) { commonAncestorNode = commonAncestorNode.parent(tree); }/*from w w w.ja v a 2 s.com*/ // found the common ancestor, now we need to chop the children nodes the span of which is outwith the last valid leaf Tree p = lastLeaf.parent(tree); while (p != commonAncestorNode) { int numOfChildren = p.numChildren(); for (int i = 0; i < numOfChildren; i++) p.removeChild(0); p = p.parent(tree); } // remove last leftover parent node of the invalid leaf commonAncestorNode.removeChild(commonAncestorNode.numChildren() - 1); return tree.toString(); } else { return inputTree; } }