List of usage examples for edu.stanford.nlp.trees Tree getChild
public Tree getChild(int i)
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public StringBuilder toStringBuilderExtenderByAnotherLinkedTree(StringBuilder sb, Tree t, Tree treeToInsert) { if (t.isLeaf()) { if (t.label() != null) { sb.append(t.label().value()); }//from w ww.j a va2 s . c o m return sb; } else { sb.append('('); if (t.label() != null) { if (t.value() != null) { sb.append(t.label().value()); } } boolean bInsertNow = false; // we try match trees to find out if we are at the insertion // position if (treeToInsert != null) { List<ParseTreeNode> bigTreeNodes = parsePhrase(t.label().value()); List<ParseTreeNode> smallTreeNodes = parsePhrase( treeToInsert.getChild(0).getChild(0).getChild(0).label().value()); System.out.println(t + " \n " + treeToInsert + "\n"); if (smallTreeNodes.size() > 0 && bigTreeNodes.size() > 0) for (ParseTreeNode smallNode : smallTreeNodes) { if (!bigTreeNodes.get(0).getWord().equals("") && bigTreeNodes.get(0).getWord().equalsIgnoreCase(smallNode.getWord())) bInsertNow = true; } } if (bInsertNow) { Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree(sb, kid, null); } sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree(sb, treeToInsert.getChild(0).getChild(1), null); int z = 0; z++; } } else { Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree(sb, kid, treeToInsert); } } } return sb.append(')'); } }
From source file:org.aksw.simba.bengal.triple2nl.property.PropertyVerbalizer.java
License:Apache License
private PropertyVerbalization getTypeByLinguisticAnalysis(String propertyURI, String propertyText) { logger.debug("...using linguistical analysis..."); Annotation document = new Annotation(propertyText); pipeline.annotate(document);/*from w ww. j a va 2 s . c o m*/ List<CoreMap> sentences = document.get(SentencesAnnotation.class); String pattern = ""; PropertyVerbalizationType verbalizationType = PropertyVerbalizationType.UNSPECIFIED; boolean firstTokenAuxiliary = false; for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); // get the first word and check if it's 'is' or 'has' CoreLabel token = tokens.get(0); String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); String lemma = token.getString(LemmaAnnotation.class); firstTokenAuxiliary = auxiliaryVerbs.contains(lemma); if (lemma.equals("be") || word.equals("have")) { pattern += lemma.toUpperCase(); } else { if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } } if (tokens.size() > 1) { pattern += " "; for (int i = 1; i < tokens.size(); i++) { token = tokens.get(i); pos = token.get(PartOfSpeechAnnotation.class); if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } pattern += " "; } } // get the parse tree Tree tree = sentence.get(TreeAnnotation.class); // skip ROOT tag tree = tree.skipRoot(); logger.debug("Parse tree:" + tree.pennString()); // tree.pennPrint(); // check if VP is directly followed by NP // sometimes parent node is S,SINV,etc. if (tree.value().matches(Joiner.on('|').join(Lists.newArrayList(S, SBAR, SBARQ, SINV, FRAGMENT)))) { tree = tree.getChild(0); } boolean useDeterminer = false; if (tree.value().equals(VERB_PHRASE.getTag())) { for (Tree child : tree.getChildrenAsList()) { // check if first non terminal is NP and not contains a // determiner if (!child.isPreTerminal()) { if (child.value().equals(NOUN_PHRASE.getTag()) && !child.getChild(0).value().equals(DETERMINER.getTag())) { useDeterminer = true; } break; } } } // add determiner tag if (useDeterminer) { String[] split = pattern.split(" "); pattern = split[0] + " DET " + Joiner.on(" ").join(Arrays.copyOfRange(split, 1, split.length)); } } pattern = pattern.trim(); // if first token is an auxiliary can return verb if (firstTokenAuxiliary) { verbalizationType = PropertyVerbalizationType.VERB; } // check if pattern matches if (pattern.matches(VERB_PATTERN)) { logger.debug("...successfully determined type."); verbalizationType = PropertyVerbalizationType.VERB; } return new PropertyVerbalization(propertyURI, propertyText, pattern, verbalizationType); }
From source file:qmul.corpus.SwitchboardCorpus.java
License:Open Source License
/** * @param dialogueName//w w w . j ava 2s . c o m * @param genre * @param reader * @return whether to carry on or not */ private boolean getSentences(String dialogueName, String genre, TreeReader reader) { Pattern p = Pattern.compile("\\(CODE\\s+(?:\\([A-Z]+\\s+)?Speaker([A-Za-z]+)(\\d+)"); try { Dialogue dialogue = null; DialogueSpeaker speaker = null; DialogueSpeaker lastSpeaker = null; DialogueTurn currentTurn = null; int currentSubdialogue = -1; int turnNum = -1; Tree tree = reader.readTree(); Filter<Tree> nodeFilter = new NodeFilter(); while (tree != null) { Matcher m = p.matcher(tree.toString()); if (m.find()) { // get the metadata turnNum = Integer.parseInt(m.group(2)); int subDialogue = 0; // apparently no subdialogues in SWBD ... String spk = m.group(1).toUpperCase(); // start new dialogue if subdialogue changed if (subDialogue != currentSubdialogue) { if (dialogue != null) { if (!checkDialogue(dialogue)) { return false; } } // dialogue = addDialogue(dialogueName + ":" + subDialogue, genre); dialogue = addDialogue(dialogueName, genre); // TODO genre in SWBD? getGenreMap().put(dialogueName, genre); } currentSubdialogue = subDialogue; // set up speaker String spkId = dialogue.getId() + ":" + spk; if (!getSpeakerMap().containsKey(spkId)) { // TODO speaker info in SWBD? getSpeakerMap().put(spkId, new DialogueSpeaker(spkId, "", "", "", "", "")); // System.out.println("added new speaker " + spkId); } speaker = getSpeakerMap().get(spkId); } else { // get the tree and extract the transcription String trans = ""; // SWBD embeds trees within an extra unlabelled level ((S etc)) if (((tree.label() == null) || (tree.label().value() == null)) && (tree.children().length == 1)) { tree = tree.getChild(0); } if (tree != null) { tree = tree.prune(nodeFilter); if (tree != null) { for (Tree leaf : tree.getLeaves()) { trans += leaf.label() + " "; } trans = trans.substring(0, trans.length() - 1); // start new turn if speaker has changed if ((lastSpeaker == null) || !speaker.equals(lastSpeaker) || (currentTurn == null)) { currentTurn = dialogue.addTurn(turnNum, speaker); // System.out.println("new turn " + turnNum + ", " + speaker + " " + currentTurn); lastSpeaker = speaker; } // add sentence dialogue.addSent(-1, currentTurn, trans, tree); // DialogueSentence s = dialogue.addSent(-1, currentTurn, trans, tree); // System.out.println("new sent " + s); // System.out.println(s.getSyntax().pennString()); } } } tree = reader.readTree(); } return checkDialogue(dialogue); } catch (IOException e) { System.err.println("Error reading sentence line" + e.getMessage()); return false; } }
From source file:sg.edu.nus.comp.pdtb.parser.NonExplicitComp.java
License:Open Source License
private List<TreeNode> getTreeNodes(String relation, String arg) throws IOException { String[] cols = relation.split("\\|", -1); String gornAddress;/*from w w w . j ava 2 s . c o m*/ if (corpus.equals(Type.PDTB)) { gornAddress = cols[arg.equals("arg1") ? 23 : 33]; } else { String span = cols[arg.equals("arg1") ? 14 : 20]; gornAddress = Corpus.spanToSenIds(span, spanMap).getFirst().toString(); } String[] treeAddress = gornAddress.split(";"); List<TreeNode> result = new ArrayList<TreeNode>(); for (String address : treeAddress) { String[] tmp = address.split(","); int senIdx = Integer.parseInt(tmp[0]); Tree tree = trees.get(senIdx).getChild(0); Tree node = tree; for (int i = 1; i < tmp.length; ++i) { int childId = Integer.parseInt(tmp[i]); node = node.getChild(childId); } result.add(new TreeNode(tree, node, senIdx)); } return result; }
From source file:tml.utils.StanfordUtils.java
License:Apache License
public static String getPennTagFirstBranch(Tree orig, Tree t, Tree pt) { if (t.isLeaf()) return "NOBRANCH"; List<Tree> trees = t.siblings(orig); if (trees != null && trees.size() > 0 && pt != null) return pt.value(); return getPennTagFirstBranch(orig, t.getChild(0), t); }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static void getTriplets(final Tree sentence, List<Tree> leaves) { Comparator<KeyValue<Integer, Entity>> cmp = new Comparator<KeyValue<Integer, Entity>>() { public int compare(KeyValue<Integer, Entity> o1, KeyValue<Integer, Entity> o2) { return o1.getValue().compareTo(o2.getValue()); }//from w w w .j ava 2 s . c o m }; deepestSentence = null; dependancy = ""; toDependancy = ""; e = null; deepestVerbPhrase = null; dependancyDepth = 0; depthOfSentence = -10; childNumberOfDeepestSenctence = -1; getDeepestSentence(sentence, 0); if (deepestSentence != null) { tmpTriplets = new ArrayList<Triplet>(); Triplet tempTriplet = extractTriplet(deepestSentence, leaves); final Tree ancestor = deepestSentence.ancestor(1, sentence); if (ancestor == null) return; if (childNumberOfDeepestSenctence != -1) { addAttribute(cmp, sentence, deepestSentence); ancestor.removeChild(childNumberOfDeepestSenctence); } if (ancestor.value().equalsIgnoreCase("ROOT")) { return; } else { Tree parentOfSentence = ancestor.ancestor(1, sentence); int clauseIndex = parentOfSentence.indexOf(ancestor); for (int j = clauseIndex - 1; j >= 0; j--) { Tree lastNoun = parentOfSentence.getChild(j); nounFound = false; e = new Entity(); extractLastNoun(lastNoun); tempTriplet.lastNP = e; if (!tempTriplet.lastNP.isEmpty()) if (tempTriplet.subject.isEmpty()) { tempTriplet.subject = e; tempTriplet.subject.attributes = getSubjectAttributes(e.tree.ancestor(1, sentence), true, sentence, e.tree); } if (!tempTriplet.lastNP.isEmpty()) break; } } getTriplets(sentence, leaves); } else { tmpTriplets = new ArrayList<Triplet>(); Triplet tempTriplet = extractTriplet(sentence, leaves); addAttribute(cmp, sentence, sentence); } }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
public static void extractLastNoun(Tree sentence) { List<Tree> tempList = sentence.getChildrenAsList(); for (int i = tempList.size() - 1; i >= 0; i--) { if (nounFound) break; Tree tempTree = tempList.get(i); if (TripletExtractor.isNoun(tempTree) && tempTree.getChildrenAsList().size() > 0) { e.name = tempTree.getChild(0).toString(); e.tree = tempTree;// w w w .jav a2 s . com e.type = tempTree.value(); nounFound = true; } else { extractLastNoun(tempTree); } } }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static Entity getSubject(final Tree sentence, final Tree root, boolean tryAdjective) { Entity e = new Entity(); final List<Tree> childList = sentence.getChildrenAsList(); for (int i = 0; i < childList.size(); i++) { final Tree tempTree = childList.get(i); if (isNounPhrase(tempTree) || (tryAdjective && isAdjectivePhrase(tempTree))) { Tree subject = getSubjectNode(tempTree, tryAdjective); if (subject != null && subject.getChildrenAsList().size() > 0) { e.tree = tempTree;/*from w w w. ja va 2s .c om*/ e.attributes = getSubjectAttributes(tempTree, true, root, subject); e.name = subject.getChild(0).value(); e.type = subject.value(); } } if (isVerbPhrase(tempTree)) break; } return e; }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static List<Entity> getSubjectAttributes(final Tree tempTree, boolean searchUncles, final Tree root, final Tree objectOrSubject) { final ArrayList<Entity> list = new ArrayList<Entity>(); final List<Tree> childList = tempTree.getChildrenAsList(); for (int i = 0; i < childList.size(); i++) { Tree sibling = childList.get(i); if ((isAttribForNoun(sibling) || isNoun(sibling)) && sibling != objectOrSubject) { if (sibling.getChildrenAsList().size() > 0) list.add(new Entity(sibling.getChild(0).value(), sibling.value())); } else if (isAttribForNounPhrase(sibling)) { list.addAll(getSubjectAttributes(sibling, false, root, objectOrSubject)); }/*from ww w .j a v a 2s. co m*/ } if (searchUncles) { Tree parent = tempTree.ancestor(1, root); final List<Tree> childList2 = parent.getChildrenAsList(); for (int i = 0; i < childList2.size(); i++) { Tree uncle = childList2.get(i); if ((isAdjectivePhrase(uncle) || isPrepositionPhrase(uncle) || isNounPhraseAttrib(uncle)) && uncle != tempTree) { List<Entity> tmp = getSubjectAttributes(uncle, false, root, objectOrSubject); list.addAll(tmp); } } } return list; }
From source file:uk.ac.gla.mir.util.TripletExtractor.java
License:Open Source License
private static List<Entity> getVerbAttributes(final Tree tempTree, final boolean searchUncles, final Tree root, final Tree verb) { final ArrayList<Entity> list = new ArrayList<Entity>(); final List<Tree> childList = tempTree.getChildrenAsList(); for (int i = 0; i < childList.size(); i++) { Tree sibling = childList.get(i); if (isAttribForVerb(sibling)) { if (sibling.getChildrenAsList().size() > 0) list.add(new Entity(sibling.getChild(0).value(), sibling.value())); } else if (sibling.value().equalsIgnoreCase("ADVP") || sibling.value().equalsIgnoreCase("PRT")) { List<Tree> tmp = sibling.getChildrenAsList(); for (int j = 0; j < tmp.size(); j++) { Tree tr = tmp.get(j);//from ww w. ja v a 2 s . c o m if (tr.getChildrenAsList().size() > 0) list.add(new Entity(tr.getChild(0).value(), tr.value())); } } } if (searchUncles) { Tree parent = tempTree.ancestor(1, root); final List<Tree> childList2 = parent.getChildrenAsList(); for (int i = 0; i < childList2.size(); i++) { Tree uncle = childList2.get(i); if ((isVerb(uncle) || uncle.value().equalsIgnoreCase("RB") || uncle.value().equalsIgnoreCase("MD")) && uncle != verb) { if (uncle.getChildrenAsList().size() > 0) list.add(new Entity(uncle.getChild(0).value(), uncle.value())); } if (isAdjectivePhrase(uncle) || isAdVerbPhrase(uncle)) { list.addAll(getVerbAttributes(uncle, false, root, verb)); } } } return list; }