List of usage examples for edu.stanford.nlp.trees Tree getSpan
public IntPair getSpan()
From source file:KleinBilingualParser.java
private static double insideBoth(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) { IntPair spanF = nodeF.getSpan(); IntPair spanE = nodeF.getSpan();//from w w w. j a va2 s . c o m /* List<Word> sentenceF = nodeF.yieldWords(); List<Word> sentenceE = nodeE.yieldWords(); for(int h=0;h<sentenceF.size();h++) System.out.print(sentenceF.get(h)+" "); System.out.println(); for(int h=0;h<sentenceE.size();h++) System.out.print(sentenceE.get(h)+" "); System.out.println(); */ /* if(spanF.getSource()!=spanE.getSource() || spanF.getTarget()!=spanE.getTarget()) { System.out.println("DIFFERENT"); System.out.println(spanF.getSource()+" "+spanF.getTarget()); System.out.println(spanE.getSource()+" "+spanE.getTarget()); } */ double sum = 0; for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } return sum / 10; }
From source file:KleinBilingualParser.java
private static double insideSrcOutsideTgt(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) { IntPair spanF = nodeF.getSpan(); IntPair spanE = nodeF.getSpan();// w w w. j ava 2 s. c o m double sum = 0; for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex < spanE.getSource() && alignedIndex > spanE.getTarget()) { sum++; } } } } return sum / 10; }
From source file:KleinBilingualParser.java
private static double insideTgtOutsideSrc(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) { IntPair spanF = nodeF.getSpan(); IntPair spanE = nodeF.getSpan();/*from www .j a v a 2s . c o m*/ double sum = 0; for (int f = 0; f < spanF.getSource(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } for (int f = spanF.getTarget() + 1; f < nodeF.size(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } return sum / 10; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double scoreBinarizedTree(Tree tree, int start, int debugLvl) { if (tree.isLeaf()) { return 0.0; }//from ww w .j a v a2s . c om if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) { // System.out.println("NO SCORE FOR: "+iTW); // } float score = lex.score(iTW, start, wordStr, null); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); //+ DEBUG // if (ug.scoreRule(ur) < -10000) { // System.out.println("Grammar doesn't have rule: " + ur); // } // return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost); double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + lex.score(ur, start, start + tree.children()[0].yield().size()); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); //+ DEBUG // if (bg.scoreRule(br) < -10000) { // System.out.println("Grammar doesn't have rule: " + br); // } // return SloppyMath.max(bg.scoreRule(br), -10000.0) + // scoreBinarizedTree(tree.children()[0], leftmost) + // scoreBinarizedTree(tree.children()[1], false); int sz0 = tree.children()[0].yield().size(); double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl) + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start)); return score; }
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas, TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS, boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) { // Get node label String nodeLabelValue = aNode.value(); // Extract syntactic function from node label String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); }/*from w w w .j a v a2 s . co m*/ // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { Type constType = constituentMappingProvider.getTagType(nodeLabelValue); IntPair span = aNode.getSpan(); int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class); int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class); Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end); constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue); constituent.setSyntacticFunction( internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction); constituent.setParent(aParentFS); // Do we have any children? List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>(); for (Tree child : aNode.getChildrenAsList()) { org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas, aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider, tokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations)); constituent.addToIndexes(); return constituent; } // Create parent link on token else if (aNode.isPreTerminal()) { // link token to its parent constituent List<Tree> children = aNode.getChildrenAsList(); assert children.size() == 1; Tree terminal = children.get(0); CoreLabel label = (CoreLabel) terminal.label(); Token token = label.get(TokenKey.class); token.setParent(aParentFS); return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }