List of usage examples for edu.stanford.nlp.util IntPair getSource
public int getSource()
From source file:KleinBilingualParser.java
private static double insideBoth(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) { IntPair spanF = nodeF.getSpan(); IntPair spanE = nodeF.getSpan();//from w w w .j a v a 2s . c o m /* List<Word> sentenceF = nodeF.yieldWords(); List<Word> sentenceE = nodeE.yieldWords(); for(int h=0;h<sentenceF.size();h++) System.out.print(sentenceF.get(h)+" "); System.out.println(); for(int h=0;h<sentenceE.size();h++) System.out.print(sentenceE.get(h)+" "); System.out.println(); */ /* if(spanF.getSource()!=spanE.getSource() || spanF.getTarget()!=spanE.getTarget()) { System.out.println("DIFFERENT"); System.out.println(spanF.getSource()+" "+spanF.getTarget()); System.out.println(spanE.getSource()+" "+spanE.getTarget()); } */ double sum = 0; for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } return sum / 10; }
From source file:KleinBilingualParser.java
private static double insideSrcOutsideTgt(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) { IntPair spanF = nodeF.getSpan(); IntPair spanE = nodeF.getSpan();// w w w. j a va 2s . c o m double sum = 0; for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex < spanE.getSource() && alignedIndex > spanE.getTarget()) { sum++; } } } } return sum / 10; }
From source file:KleinBilingualParser.java
private static double insideTgtOutsideSrc(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) { IntPair spanF = nodeF.getSpan(); IntPair spanE = nodeF.getSpan();/*from w ww.j a v a 2s . com*/ double sum = 0; for (int f = 0; f < spanF.getSource(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } for (int f = spanF.getTarget() + 1; f < nodeF.size(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } return sum / 10; }
From source file:com.project.NLP.Requirement.AnaphoraAnalyzer.java
public String doPronounResolving() { for (int i = 1; i <= graph.size(); i++) { CorefChain cc = graph.get(i);/*from w w w . jav a 2 s. co m*/ if (cc != null) { //System.out.println("-----"+cc.toString()); //System.out.println("---TextualOrder--"+cc.getMentionsInTextualOrder()); Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = cc.getMentionMap(); //System.out.println("--MentionMap-----"+mentionMap); int mentionSize = mentionMap.size(); Set intPairSet = mentionMap.keySet(); // System.out.println("-----"+cc.getMentionsWithSameHead(1,i)); //System.out.println("---RepresentativeMention-----"+cc.getRepresentativeMention()); String mentionSpan = cc.getRepresentativeMention().mentionSpan; //System.out.println("----get the mentionspan---"+mentionSpan); String animacy = cc.getRepresentativeMention().animacy.toString(); //System.out.println("----get the animacy---"+animacy); if (animacy.equalsIgnoreCase("ANIMATE") && mentionSize > 1) { Iterator it = intPairSet.iterator(); while (it.hasNext()) { IntPair ip = (IntPair) it.next(); Set coref = mentionMap.get(ip); Iterator itC = coref.iterator(); while (itC.hasNext()) { CorefChain.CorefMention cm = (CorefMention) itC.next(); String mentionPronoun = cm.mentionSpan; //mentionPronoun.replace(mentionPronoun,mentionSpan) //System.out.println("---Sentences ------- :"+sentencesFromDoc); //System.out.println("---Words ------- :"+wordsFromDoc); //for(String[] str:wordsFromDoc){ // System.out.println("---Words from array ------- :"+str[0] + " "+str[1]); //} //System.out.println("--- cm.mentionSpan--- "+mentionPronoun+ " int pair : "+ip); int sentenceIndex = ip.getSource() - 1; int wordIndex = ip.getTarget() - 1; try { String docWord = wordsFromDoc.get(sentenceIndex)[wordIndex]; //System.out.println("From arraylist : "+docWord); if (mentionPronoun.equalsIgnoreCase(docWord)) { wordsFromDoc.get(sentenceIndex)[wordIndex] = mentionSpan; } } catch (ArrayIndexOutOfBoundsException e) { //System.err.println("----- AnaphoraAnalyzer ------- : "+e.getMessage()); } } } } } } return getPronounResolvedDocument(); }
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas, TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS, boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) { // Get node label String nodeLabelValue = aNode.value(); // Extract syntactic function from node label String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); }// w ww . j a va 2 s . co m // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { Type constType = constituentMappingProvider.getTagType(nodeLabelValue); IntPair span = aNode.getSpan(); int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class); int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class); Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end); constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue); constituent.setSyntacticFunction( internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction); constituent.setParent(aParentFS); // Do we have any children? List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>(); for (Tree child : aNode.getChildrenAsList()) { org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas, aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider, tokens); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations)); constituent.addToIndexes(); return constituent; } // Create parent link on token else if (aNode.isPreTerminal()) { // link token to its parent constituent List<Tree> children = aNode.getChildrenAsList(); assert children.size() == 1; Tree terminal = children.get(0); CoreLabel label = (CoreLabel) terminal.label(); Token token = label.get(TokenKey.class); token.setParent(aParentFS); return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Creates linked constituent annotations + POS annotations * //from w w w .ja va 2s . c o m * @param aTreebankLanguagePack * the language pack. * @param aNode * the source tree * @param aParentFS * the parent annotation * @param aCreatePos * sets whether to create or not to create POS tags * @return the child-structure (needed for recursive call only) */ private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode, Annotation aParentFS, boolean aCreatePos) { String nodeLabelValue = aNode.value(); String syntacticFunction = null; AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack; int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter()); if (gfIdx > 0) { syntacticFunction = nodeLabelValue.substring(gfIdx + 1); nodeLabelValue = nodeLabelValue.substring(0, gfIdx); } // calculate span for the current subtree IntPair span = tokenTree.getSpan(aNode); // Check if the node has been marked by a TSurgeon operation. // If so, add a tag-annotation on the constituent if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) { int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR); String tag = nodeLabelValue.substring(0, separatorIndex); nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length()); createTagAnnotation(span.getSource(), span.getTarget(), tag); } // Check if node is a constituent node on sentence or phrase-level if (aNode.isPhrasal()) { // add annotation to annotation tree Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(), nodeLabelValue, syntacticFunction); // link to parent if (aParentFS != null) { constituent.setParent(aParentFS); } // Do we have any children? List<Annotation> childAnnotations = new ArrayList<Annotation>(); for (Tree child : aNode.getChildrenAsList()) { Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child, constituent, aCreatePos); if (childAnnotation != null) { childAnnotations.add(childAnnotation); } } // Now that we know how many children we have, link annotation of // current node with its children FSArray children = new FSArray(jCas, childAnnotations.size()); int curChildNum = 0; for (FeatureStructure child : childAnnotations) { children.set(curChildNum, child); curChildNum++; } constituent.setChildren(children); // write annotation for current node to index jCas.addFsToIndexes(constituent); return constituent; } // If the node is a word-level constituent node (== POS): // create parent link on token and (if not turned off) create POS tag else if (aNode.isPreTerminal()) { // create POS-annotation (annotation over the token) POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue); // in any case: get the token that is covered by the POS // TODO how about multi word prepositions etc. (e.g. "such as") List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos); // the POS should only cover one token assert coveredTokens.size() == 1; Token token = coveredTokens.get(0); // only add POS to index if we want POS-tagging if (aCreatePos) { jCas.addFsToIndexes(pos); token.setPos(pos); } // link token to its parent constituent if (aParentFS != null) { token.setParent(aParentFS); } return token; } else { throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal"); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Recovers annotations from a Stanford Tree-Object, which have been saved within the CoreLabel * of the tree./* w ww . jav a 2s . c om*/ *<p> * Note: * Copying has to be done in batch, because we need to have ALL annotations that should be * recovered together when copying them. The reason is that some annotations reference each * other, which can cause problem if a referenced annotation has not yet been recovered. */ public void recoverAnnotationsFromNodes() { // create batch-copy list for recovered annotations List<Annotation> annoList = new ArrayList<Annotation>(); Iterator<Tree> treeIterator = tokenTree.getTree().iterator(); CAS srcCAS = null; while (treeIterator.hasNext()) { Tree curTree = treeIterator.next(); // get the collection from the label of the best-fitting node in // which we store UIMA annotations Collection<Annotation> annotations = ((CoreLabel) curTree.label()).get(UIMAAnnotations.class); // do we have any annotations stored in the node? if (annotations != null && annotations.size() > 0) { // translate values which are now relative to the // node-span back to absolute value (depending on the // new offset of the node-span within the new CAS) IntPair span = tokenTree.getSpan(curTree); // iterate over all annotations for (Annotation curAnno : annotations) { srcCAS = srcCAS == null ? curAnno.getCAS() : srcCAS; // TODO using the SPAN as new annotation index might not // be correct in all cases - if not an EXACTLY MATCHING // node had been found for the saved annotation, this will // be wrong. Find a way to incorporate the anno-index here curAnno.setBegin(span.getSource()); curAnno.setEnd(span.getTarget()); // add anno to batch-copy list annoList.add(curAnno); } // endfor iterate over annotations } // endif check for annotations in node } // endwhile iterate over subtrees /* * Now that we have gathered all annotations from the tree, batch-copy them to the new CAS */ // create CasRecoverer (=adapted version of the CasCopier) CasCopier copier = new CasCopier(srcCAS, jCas.getCas()); // now batch-copy the annos List<Annotation> copiedAnnos = copier.batchCopyAnnotations(annoList); // add copied annos to indexes for (Annotation cAnno : copiedAnnos) { jCas.addFsToIndexes(cAnno); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java
License:Open Source License
/** * <p>//from w ww .j av a 2 s .c o m * Recreates a Stanford Tree from the StanfordParser annotations and saves all * non-StanfordParser-Annotations within the scope of the sentence in the label of the best * fitting node. * </p> * * <p> * <strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!! * You do NOT want to use the source CAS after this method has been called. The * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations * do not have to be recovered or accessed in the tree.</i> * </p> * * <p> * TODO: This behavior could be changed by making COPIES of the annotations and changing the * copied instead of the originals. However, in order to being able to make copies, a dummy CAS * must be introduced to which the annotations can be copied. When they are recovered, they will * be copied to the new destination CAS anyway. * </p> * * @param root * the ROOT annotation * @return an {@link Tree} object representing the syntax structure of the sentence * @throws CASException if the JCas cannot be accessed. */ public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException { JCas aJCas = root.getCAS().getJCas(); // Create tree Tree tree = createStanfordTree(root); // Get all non-parser related annotations // and all tokens (needed for span-calculations later on) List<Annotation> nonParserAnnotations = new ArrayList<Annotation>(); List<Token> tokens = new ArrayList<Token>(); // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all // cases List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root); for (Annotation curAnno : annosWithinRoot) { if (!(curAnno instanceof POS) && !(curAnno instanceof Constituent) && !(curAnno instanceof Dependency) && !(curAnno instanceof PennTree) && !(curAnno instanceof Lemma) && !(curAnno instanceof Token) && !(curAnno instanceof DocumentMetaData)) { nonParserAnnotations.add(curAnno); } else if (curAnno instanceof Token) { tokens.add((Token) curAnno); } } // create wrapper for tree and its tokens TreeWithTokens annoTree = new TreeWithTokens(tree, tokens); /* * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the * deepest node in the tree that still completely contains the annotation. */ for (Annotation curAnno : nonParserAnnotations) { // get best fitting node Tree bestFittingNode = annoTree.getBestFit(curAnno); // Add annotation to node if (bestFittingNode != null) { // translate annotation span to a value relative to the // node-span IntPair span = annoTree.getSpan(bestFittingNode); curAnno.setBegin(curAnno.getBegin() - span.getSource()); curAnno.setEnd(curAnno.getEnd() - span.getSource()); // get the collection from the label of the best-fitting node in which we store UIMA // annotations or create it, if it does not exist Collection<Annotation> annotations = ((CoreLabel) bestFittingNode.label()) .get(UIMAAnnotations.class); if (annotations == null) { annotations = new ArrayList<Annotation>(); } // add annotation + checksum of annotated text to list and write it back to node // label annotations.add(curAnno); ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations); } } return tree; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java
License:Open Source License
/** * Finds the best-fitting node in the tree for a given annotation. * * The best-fitting node for an annotation is the deepest node in the tree * that still completely contains the span of the given annotation. * * TODO Could be done more efficiently, I think. In a recursive method, for * example, recursion could be stopped as soon as overlap becomes -1 * * @param anno//from w ww . j a va 2s . co m * the annotation to find a best fit for * * @return the node of the tree that is the best fit for <code>anno</code> */ public Tree getBestFit(Annotation anno) { Tree curBestFit = null; int curBestOverlap = Integer.MAX_VALUE; Iterator<Tree> treeIterator = getTree().iterator(); while (treeIterator.hasNext()) { Tree curTree = treeIterator.next(); IntPair span = getSpan(curTree); // calc overlap: if annotation not completely contained in span of // subtree, overlap will be -1, otherwise it will be >0 // Our goal is to find the node with minimal positive overlap int overlap = -1; int leftBorder = anno.getBegin() - span.getSource(); int rightBorder = span.getTarget() - anno.getEnd(); if (!(leftBorder < 0) && !(rightBorder < 0)) { overlap = leftBorder + rightBorder; } // determine whether node is better than the temporary best fit if ((overlap > -1) && overlap < curBestOverlap) { curBestFit = curTree; curBestOverlap = overlap; } } return curBestFit; }