Example usage for edu.stanford.nlp.util IntPair getTarget

List of usage examples for edu.stanford.nlp.util IntPair getTarget

Introduction

In this page you can find the example usage for edu.stanford.nlp.util IntPair getTarget.

Prototype

public int getTarget() 

Source Link

Document

Return the second element of the pair

Usage

From source file:KleinBilingualParser.java

private static double insideBoth(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) {

    IntPair spanF = nodeF.getSpan();
    IntPair spanE = nodeF.getSpan();/*from   w ww  . j  a va2  s  .  c  o m*/
    /*
    List<Word> sentenceF = nodeF.yieldWords();
    List<Word> sentenceE = nodeE.yieldWords();
            
          for(int h=0;h<sentenceF.size();h++)
      System.out.print(sentenceF.get(h)+" ");
    System.out.println();
            
    for(int h=0;h<sentenceE.size();h++)
      System.out.print(sentenceE.get(h)+" ");
    System.out.println();    
    */

    /* if(spanF.getSource()!=spanE.getSource() || spanF.getTarget()!=spanE.getTarget())
     {
       System.out.println("DIFFERENT");
         System.out.println(spanF.getSource()+" "+spanF.getTarget());
       System.out.println(spanE.getSource()+" "+spanE.getTarget());
     }
     */

    double sum = 0;
    for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    return sum / 10;
}

From source file:KleinBilingualParser.java

private static double insideSrcOutsideTgt(Tree nodeF, Tree nodeE,
        HashMap<Integer, ArrayList<Integer>> alignMap) {
    IntPair spanF = nodeF.getSpan();
    IntPair spanE = nodeF.getSpan();// ww  w.j  a  va 2 s. c om

    double sum = 0;
    for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex < spanE.getSource() && alignedIndex > spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    return sum / 10;
}

From source file:KleinBilingualParser.java

private static double insideTgtOutsideSrc(Tree nodeF, Tree nodeE,
        HashMap<Integer, ArrayList<Integer>> alignMap) {

    IntPair spanF = nodeF.getSpan();//  w  ww.  j av a2s  .c  om
    IntPair spanE = nodeF.getSpan();

    double sum = 0;
    for (int f = 0; f < spanF.getSource(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    for (int f = spanF.getTarget() + 1; f < nodeF.size(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    return sum / 10;
}

From source file:com.project.NLP.Requirement.AnaphoraAnalyzer.java

public String doPronounResolving() {
    for (int i = 1; i <= graph.size(); i++) {
        CorefChain cc = graph.get(i);/*from   w  w  w .  j a v  a 2s .c om*/
        if (cc != null) {
            //System.out.println("-----"+cc.toString());
            //System.out.println("---TextualOrder--"+cc.getMentionsInTextualOrder());
            Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = cc.getMentionMap();
            //System.out.println("--MentionMap-----"+mentionMap);
            int mentionSize = mentionMap.size();

            Set intPairSet = mentionMap.keySet();

            // System.out.println("-----"+cc.getMentionsWithSameHead(1,i));
            //System.out.println("---RepresentativeMention-----"+cc.getRepresentativeMention());
            String mentionSpan = cc.getRepresentativeMention().mentionSpan;
            //System.out.println("----get the mentionspan---"+mentionSpan);
            String animacy = cc.getRepresentativeMention().animacy.toString();
            //System.out.println("----get the animacy---"+animacy);
            if (animacy.equalsIgnoreCase("ANIMATE") && mentionSize > 1) {
                Iterator it = intPairSet.iterator();
                while (it.hasNext()) {
                    IntPair ip = (IntPair) it.next();
                    Set coref = mentionMap.get(ip);
                    Iterator itC = coref.iterator();
                    while (itC.hasNext()) {
                        CorefChain.CorefMention cm = (CorefMention) itC.next();
                        String mentionPronoun = cm.mentionSpan;
                        //mentionPronoun.replace(mentionPronoun,mentionSpan)
                        //System.out.println("---Sentences ------- :"+sentencesFromDoc);
                        //System.out.println("---Words ------- :"+wordsFromDoc);
                        //for(String[] str:wordsFromDoc){
                        //     System.out.println("---Words from array ------- :"+str[0] + " "+str[1]);
                        //}

                        //System.out.println("--- cm.mentionSpan---  "+mentionPronoun+ " int pair : "+ip);
                        int sentenceIndex = ip.getSource() - 1;
                        int wordIndex = ip.getTarget() - 1;
                        try {
                            String docWord = wordsFromDoc.get(sentenceIndex)[wordIndex];
                            //System.out.println("From arraylist : "+docWord);
                            if (mentionPronoun.equalsIgnoreCase(docWord)) {
                                wordsFromDoc.get(sentenceIndex)[wordIndex] = mentionSpan;
                            }
                        } catch (ArrayIndexOutOfBoundsException e) {
                            //System.err.println("----- AnaphoraAnalyzer ------- : "+e.getMessage());
                        }
                    }
                }
            }
        }

    }

    return getPronounResolvedDocument();
}

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java

License:Open Source License

private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas,
        TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS,
        boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) {
    // Get node label
    String nodeLabelValue = aNode.value();

    // Extract syntactic function from node label
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }/* w w w.j a v a2s.co m*/

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {
        Type constType = constituentMappingProvider.getTagType(nodeLabelValue);

        IntPair span = aNode.getSpan();
        int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class);
        int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class);

        Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end);
        constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue);
        constituent.setSyntacticFunction(
                internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction);
        constituent.setParent(aParentFS);

        // Do we have any children?
        List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>();
        for (Tree child : aNode.getChildrenAsList()) {
            org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas,
                    aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider,
                    tokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations));

        constituent.addToIndexes();

        return constituent;
    }
    // Create parent link on token
    else if (aNode.isPreTerminal()) {
        // link token to its parent constituent
        List<Tree> children = aNode.getChildrenAsList();
        assert children.size() == 1;
        Tree terminal = children.get(0);
        CoreLabel label = (CoreLabel) terminal.label();
        Token token = label.get(TokenKey.class);
        token.setParent(aParentFS);
        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Creates linked constituent annotations + POS annotations
 * /*from  w  w  w  .  jav  a2s. com*/
 * @param aTreebankLanguagePack
 *            the language pack.
 * @param aNode
 *            the source tree
 * @param aParentFS
 *            the parent annotation
 * @param aCreatePos
 *            sets whether to create or not to create POS tags
 * @return the child-structure (needed for recursive call only)
 */
private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode,
        Annotation aParentFS, boolean aCreatePos) {
    String nodeLabelValue = aNode.value();
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }

    // calculate span for the current subtree
    IntPair span = tokenTree.getSpan(aNode);

    // Check if the node has been marked by a TSurgeon operation.
    // If so, add a tag-annotation on the constituent
    if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) {
        int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR);
        String tag = nodeLabelValue.substring(0, separatorIndex);
        nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length());
        createTagAnnotation(span.getSource(), span.getTarget(), tag);
    }

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {

        // add annotation to annotation tree
        Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(),
                nodeLabelValue, syntacticFunction);
        // link to parent
        if (aParentFS != null) {
            constituent.setParent(aParentFS);
        }

        // Do we have any children?
        List<Annotation> childAnnotations = new ArrayList<Annotation>();
        for (Tree child : aNode.getChildrenAsList()) {
            Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child,
                    constituent, aCreatePos);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        FSArray children = new FSArray(jCas, childAnnotations.size());
        int curChildNum = 0;
        for (FeatureStructure child : childAnnotations) {
            children.set(curChildNum, child);
            curChildNum++;
        }
        constituent.setChildren(children);

        // write annotation for current node to index
        jCas.addFsToIndexes(constituent);

        return constituent;
    }
    // If the node is a word-level constituent node (== POS):
    // create parent link on token and (if not turned off) create POS tag
    else if (aNode.isPreTerminal()) {
        // create POS-annotation (annotation over the token)
        POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue);

        // in any case: get the token that is covered by the POS
        // TODO how about multi word prepositions etc. (e.g. "such as")
        List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos);
        // the POS should only cover one token
        assert coveredTokens.size() == 1;
        Token token = coveredTokens.get(0);

        // only add POS to index if we want POS-tagging
        if (aCreatePos) {
            jCas.addFsToIndexes(pos);
            token.setPos(pos);
        }

        // link token to its parent constituent
        if (aParentFS != null) {
            token.setParent(aParentFS);
        }

        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Recovers annotations from a Stanford Tree-Object, which have been saved within the CoreLabel
 * of the tree.//  w  w  w. j  a  v a  2 s  .c o  m
 *<p>
 * Note:
 * Copying has to be done in batch, because we need to have ALL annotations that should be
 * recovered together when copying them. The reason is that some annotations reference each
 * other, which can cause problem if a referenced annotation has not yet been recovered.
 */
public void recoverAnnotationsFromNodes() {
    // create batch-copy list for recovered annotations
    List<Annotation> annoList = new ArrayList<Annotation>();

    Iterator<Tree> treeIterator = tokenTree.getTree().iterator();
    CAS srcCAS = null;

    while (treeIterator.hasNext()) {

        Tree curTree = treeIterator.next();

        // get the collection from the label of the best-fitting node in
        // which we store UIMA annotations
        Collection<Annotation> annotations = ((CoreLabel) curTree.label()).get(UIMAAnnotations.class);

        // do we have any annotations stored in the node?
        if (annotations != null && annotations.size() > 0) {

            // translate values which are now relative to the
            // node-span back to absolute value (depending on the
            // new offset of the node-span within the new CAS)

            IntPair span = tokenTree.getSpan(curTree);
            // iterate over all annotations
            for (Annotation curAnno : annotations) {
                srcCAS = srcCAS == null ? curAnno.getCAS() : srcCAS;

                // TODO using the SPAN as new annotation index might not
                // be correct in all cases - if not an EXACTLY MATCHING
                // node had been found for the saved annotation, this will
                // be wrong. Find a way to incorporate the anno-index here
                curAnno.setBegin(span.getSource());
                curAnno.setEnd(span.getTarget());

                // add anno to batch-copy list
                annoList.add(curAnno);

            } // endfor iterate over annotations

        } // endif check for annotations in node

    } // endwhile iterate over subtrees

    /*
     * Now that we have gathered all annotations from the tree, batch-copy them to the new CAS
     */

    // create CasRecoverer (=adapted version of the CasCopier)
    CasCopier copier = new CasCopier(srcCAS, jCas.getCas());

    // now batch-copy the annos
    List<Annotation> copiedAnnos = copier.batchCopyAnnotations(annoList);

    // add copied annos to indexes
    for (Annotation cAnno : copiedAnnos) {
        jCas.addFsToIndexes(cAnno);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java

License:Open Source License

/**
 * Finds the best-fitting node in the tree for a given annotation.
 *
 * The best-fitting node for an annotation is the deepest node in the tree
 * that still completely contains the span of the given annotation.
 *
 * TODO Could be done more efficiently, I think. In a recursive method, for
 * example, recursion could be stopped as soon as overlap becomes -1
 *
 * @param anno//  w w w .  j ava 2s  .  c o m
 *            the annotation to find a best fit for
 *
 * @return the node of the tree that is the best fit for <code>anno</code>
 */
public Tree getBestFit(Annotation anno) {
    Tree curBestFit = null;
    int curBestOverlap = Integer.MAX_VALUE;

    Iterator<Tree> treeIterator = getTree().iterator();
    while (treeIterator.hasNext()) {
        Tree curTree = treeIterator.next();
        IntPair span = getSpan(curTree);

        // calc overlap: if annotation not completely contained in span of
        // subtree, overlap will be -1, otherwise it will be >0
        // Our goal is to find the node with minimal positive overlap
        int overlap = -1;
        int leftBorder = anno.getBegin() - span.getSource();
        int rightBorder = span.getTarget() - anno.getEnd();
        if (!(leftBorder < 0) && !(rightBorder < 0)) {
            overlap = leftBorder + rightBorder;
        }

        // determine whether node is better than the temporary best fit
        if ((overlap > -1) && overlap < curBestOverlap) {
            curBestFit = curTree;
            curBestOverlap = overlap;
        }
    }

    return curBestFit;
}