Example usage for edu.stanford.nlp.util IntPair getSource

List of usage examples for edu.stanford.nlp.util IntPair getSource

Introduction

In this page you can find the example usage for edu.stanford.nlp.util IntPair getSource.

Prototype

public int getSource() 

Source Link

Document

Return the first element of the pair

Usage

From source file:KleinBilingualParser.java

private static double insideBoth(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) {

    IntPair spanF = nodeF.getSpan();
    IntPair spanE = nodeF.getSpan();//from w w  w .j  a v a 2s .  c o m
    /*
    List<Word> sentenceF = nodeF.yieldWords();
    List<Word> sentenceE = nodeE.yieldWords();
            
          for(int h=0;h<sentenceF.size();h++)
      System.out.print(sentenceF.get(h)+" ");
    System.out.println();
            
    for(int h=0;h<sentenceE.size();h++)
      System.out.print(sentenceE.get(h)+" ");
    System.out.println();    
    */

    /* if(spanF.getSource()!=spanE.getSource() || spanF.getTarget()!=spanE.getTarget())
     {
       System.out.println("DIFFERENT");
         System.out.println(spanF.getSource()+" "+spanF.getTarget());
       System.out.println(spanE.getSource()+" "+spanE.getTarget());
     }
     */

    double sum = 0;
    for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    return sum / 10;
}

From source file:KleinBilingualParser.java

private static double insideSrcOutsideTgt(Tree nodeF, Tree nodeE,
        HashMap<Integer, ArrayList<Integer>> alignMap) {
    IntPair spanF = nodeF.getSpan();
    IntPair spanE = nodeF.getSpan();// w w  w.  j a va 2s .  c o m

    double sum = 0;
    for (int f = spanF.getSource(); f <= spanF.getTarget(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex < spanE.getSource() && alignedIndex > spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    return sum / 10;
}

From source file:KleinBilingualParser.java

private static double insideTgtOutsideSrc(Tree nodeF, Tree nodeE,
        HashMap<Integer, ArrayList<Integer>> alignMap) {

    IntPair spanF = nodeF.getSpan();
    IntPair spanE = nodeF.getSpan();/*from w ww.j a  v a  2s  .  com*/

    double sum = 0;
    for (int f = 0; f < spanF.getSource(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    for (int f = spanF.getTarget() + 1; f < nodeF.size(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    return sum / 10;
}

From source file:com.project.NLP.Requirement.AnaphoraAnalyzer.java

public String doPronounResolving() {
    for (int i = 1; i <= graph.size(); i++) {
        CorefChain cc = graph.get(i);/*from  w  w w . jav  a 2  s.  co m*/
        if (cc != null) {
            //System.out.println("-----"+cc.toString());
            //System.out.println("---TextualOrder--"+cc.getMentionsInTextualOrder());
            Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = cc.getMentionMap();
            //System.out.println("--MentionMap-----"+mentionMap);
            int mentionSize = mentionMap.size();

            Set intPairSet = mentionMap.keySet();

            // System.out.println("-----"+cc.getMentionsWithSameHead(1,i));
            //System.out.println("---RepresentativeMention-----"+cc.getRepresentativeMention());
            String mentionSpan = cc.getRepresentativeMention().mentionSpan;
            //System.out.println("----get the mentionspan---"+mentionSpan);
            String animacy = cc.getRepresentativeMention().animacy.toString();
            //System.out.println("----get the animacy---"+animacy);
            if (animacy.equalsIgnoreCase("ANIMATE") && mentionSize > 1) {
                Iterator it = intPairSet.iterator();
                while (it.hasNext()) {
                    IntPair ip = (IntPair) it.next();
                    Set coref = mentionMap.get(ip);
                    Iterator itC = coref.iterator();
                    while (itC.hasNext()) {
                        CorefChain.CorefMention cm = (CorefMention) itC.next();
                        String mentionPronoun = cm.mentionSpan;
                        //mentionPronoun.replace(mentionPronoun,mentionSpan)
                        //System.out.println("---Sentences ------- :"+sentencesFromDoc);
                        //System.out.println("---Words ------- :"+wordsFromDoc);
                        //for(String[] str:wordsFromDoc){
                        //     System.out.println("---Words from array ------- :"+str[0] + " "+str[1]);
                        //}

                        //System.out.println("--- cm.mentionSpan---  "+mentionPronoun+ " int pair : "+ip);
                        int sentenceIndex = ip.getSource() - 1;
                        int wordIndex = ip.getTarget() - 1;
                        try {
                            String docWord = wordsFromDoc.get(sentenceIndex)[wordIndex];
                            //System.out.println("From arraylist : "+docWord);
                            if (mentionPronoun.equalsIgnoreCase(docWord)) {
                                wordsFromDoc.get(sentenceIndex)[wordIndex] = mentionSpan;
                            }
                        } catch (ArrayIndexOutOfBoundsException e) {
                            //System.err.println("----- AnaphoraAnalyzer ------- : "+e.getMessage());
                        }
                    }
                }
            }
        }

    }

    return getPronounResolvedDocument();
}

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java

License:Open Source License

private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas,
        TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS,
        boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) {
    // Get node label
    String nodeLabelValue = aNode.value();

    // Extract syntactic function from node label
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }//  w  ww  . j a  va  2  s  . co  m

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {
        Type constType = constituentMappingProvider.getTagType(nodeLabelValue);

        IntPair span = aNode.getSpan();
        int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class);
        int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class);

        Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end);
        constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue);
        constituent.setSyntacticFunction(
                internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction);
        constituent.setParent(aParentFS);

        // Do we have any children?
        List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>();
        for (Tree child : aNode.getChildrenAsList()) {
            org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas,
                    aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider,
                    tokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations));

        constituent.addToIndexes();

        return constituent;
    }
    // Create parent link on token
    else if (aNode.isPreTerminal()) {
        // link token to its parent constituent
        List<Tree> children = aNode.getChildrenAsList();
        assert children.size() == 1;
        Tree terminal = children.get(0);
        CoreLabel label = (CoreLabel) terminal.label();
        Token token = label.get(TokenKey.class);
        token.setParent(aParentFS);
        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Creates linked constituent annotations + POS annotations
 * //from   w w w .ja va  2s . c  o m
 * @param aTreebankLanguagePack
 *            the language pack.
 * @param aNode
 *            the source tree
 * @param aParentFS
 *            the parent annotation
 * @param aCreatePos
 *            sets whether to create or not to create POS tags
 * @return the child-structure (needed for recursive call only)
 */
private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode,
        Annotation aParentFS, boolean aCreatePos) {
    String nodeLabelValue = aNode.value();
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }

    // calculate span for the current subtree
    IntPair span = tokenTree.getSpan(aNode);

    // Check if the node has been marked by a TSurgeon operation.
    // If so, add a tag-annotation on the constituent
    if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) {
        int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR);
        String tag = nodeLabelValue.substring(0, separatorIndex);
        nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length());
        createTagAnnotation(span.getSource(), span.getTarget(), tag);
    }

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {

        // add annotation to annotation tree
        Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(),
                nodeLabelValue, syntacticFunction);
        // link to parent
        if (aParentFS != null) {
            constituent.setParent(aParentFS);
        }

        // Do we have any children?
        List<Annotation> childAnnotations = new ArrayList<Annotation>();
        for (Tree child : aNode.getChildrenAsList()) {
            Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child,
                    constituent, aCreatePos);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        FSArray children = new FSArray(jCas, childAnnotations.size());
        int curChildNum = 0;
        for (FeatureStructure child : childAnnotations) {
            children.set(curChildNum, child);
            curChildNum++;
        }
        constituent.setChildren(children);

        // write annotation for current node to index
        jCas.addFsToIndexes(constituent);

        return constituent;
    }
    // If the node is a word-level constituent node (== POS):
    // create parent link on token and (if not turned off) create POS tag
    else if (aNode.isPreTerminal()) {
        // create POS-annotation (annotation over the token)
        POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue);

        // in any case: get the token that is covered by the POS
        // TODO how about multi word prepositions etc. (e.g. "such as")
        List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos);
        // the POS should only cover one token
        assert coveredTokens.size() == 1;
        Token token = coveredTokens.get(0);

        // only add POS to index if we want POS-tagging
        if (aCreatePos) {
            jCas.addFsToIndexes(pos);
            token.setPos(pos);
        }

        // link token to its parent constituent
        if (aParentFS != null) {
            token.setParent(aParentFS);
        }

        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Recovers annotations from a Stanford Tree-Object, which have been saved within the CoreLabel
 * of the tree./*  w  ww  .  jav  a  2s .  c  om*/
 *<p>
 * Note:
 * Copying has to be done in batch, because we need to have ALL annotations that should be
 * recovered together when copying them. The reason is that some annotations reference each
 * other, which can cause problem if a referenced annotation has not yet been recovered.
 */
public void recoverAnnotationsFromNodes() {
    // create batch-copy list for recovered annotations
    List<Annotation> annoList = new ArrayList<Annotation>();

    Iterator<Tree> treeIterator = tokenTree.getTree().iterator();
    CAS srcCAS = null;

    while (treeIterator.hasNext()) {

        Tree curTree = treeIterator.next();

        // get the collection from the label of the best-fitting node in
        // which we store UIMA annotations
        Collection<Annotation> annotations = ((CoreLabel) curTree.label()).get(UIMAAnnotations.class);

        // do we have any annotations stored in the node?
        if (annotations != null && annotations.size() > 0) {

            // translate values which are now relative to the
            // node-span back to absolute value (depending on the
            // new offset of the node-span within the new CAS)

            IntPair span = tokenTree.getSpan(curTree);
            // iterate over all annotations
            for (Annotation curAnno : annotations) {
                srcCAS = srcCAS == null ? curAnno.getCAS() : srcCAS;

                // TODO using the SPAN as new annotation index might not
                // be correct in all cases - if not an EXACTLY MATCHING
                // node had been found for the saved annotation, this will
                // be wrong. Find a way to incorporate the anno-index here
                curAnno.setBegin(span.getSource());
                curAnno.setEnd(span.getTarget());

                // add anno to batch-copy list
                annoList.add(curAnno);

            } // endfor iterate over annotations

        } // endif check for annotations in node

    } // endwhile iterate over subtrees

    /*
     * Now that we have gathered all annotations from the tree, batch-copy them to the new CAS
     */

    // create CasRecoverer (=adapted version of the CasCopier)
    CasCopier copier = new CasCopier(srcCAS, jCas.getCas());

    // now batch-copy the annos
    List<Annotation> copiedAnnos = copier.batchCopyAnnotations(annoList);

    // add copied annos to indexes
    for (Annotation cAnno : copiedAnnos) {
        jCas.addFsToIndexes(cAnno);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeUtils.java

License:Open Source License

/**
 * <p>//from  w ww .j  av a  2  s .c  o  m
 * Recreates a Stanford Tree from the StanfordParser annotations and saves all
 * non-StanfordParser-Annotations within the scope of the sentence in the label of the best
 * fitting node.
 * </p>
 * 
 * <p>
 * <strong>CAUTION: </strong><i>This method is intended for the use by CAS Multipliers, which
 * create new CASes from this tree. The annotation-spans in the source-CAS will be changed!!!!!!
 * You do NOT want to use the source CAS after this method has been called. The
 * createStanfordTree()-method does not change the CAS, so use this instead, if the annotations
 * do not have to be recovered or accessed in the tree.</i>
 * </p>
 * 
 * <p>
 * TODO: This behavior could be changed by making COPIES of the annotations and changing the
 * copied instead of the originals. However, in order to being able to make copies, a dummy CAS
 * must be introduced to which the annotations can be copied. When they are recovered, they will
 * be copied to the new destination CAS anyway.
 * </p>
 * 
 * @param root
 *            the ROOT annotation
 * @return an {@link Tree} object representing the syntax structure of the sentence
 * @throws CASException if the JCas cannot be accessed.
 */
public static Tree createStanfordTreeWithAnnotations(Annotation root) throws CASException {
    JCas aJCas = root.getCAS().getJCas();

    // Create tree
    Tree tree = createStanfordTree(root);

    // Get all non-parser related annotations
    // and all tokens (needed for span-calculations later on)
    List<Annotation> nonParserAnnotations = new ArrayList<Annotation>();
    List<Token> tokens = new ArrayList<Token>();

    // Using getCoveredAnnotations instead of iterate, because subiterators did not work in all
    // cases
    List<Annotation> annosWithinRoot = JCasUtil.selectCovered(aJCas, Annotation.class, root);

    for (Annotation curAnno : annosWithinRoot) {
        if (!(curAnno instanceof POS) && !(curAnno instanceof Constituent) && !(curAnno instanceof Dependency)
                && !(curAnno instanceof PennTree) && !(curAnno instanceof Lemma) && !(curAnno instanceof Token)
                && !(curAnno instanceof DocumentMetaData)) {
            nonParserAnnotations.add(curAnno);
        } else if (curAnno instanceof Token) {
            tokens.add((Token) curAnno);
        }

    }

    // create wrapper for tree and its tokens
    TreeWithTokens annoTree = new TreeWithTokens(tree, tokens);

    /*
     * Add annotations to the best-fitting nodes. The best-fitting node for an annotation is the
     * deepest node in the tree that still completely contains the annotation.
     */
    for (Annotation curAnno : nonParserAnnotations) {
        // get best fitting node
        Tree bestFittingNode = annoTree.getBestFit(curAnno);

        // Add annotation to node
        if (bestFittingNode != null) {

            // translate annotation span to a value relative to the
            // node-span
            IntPair span = annoTree.getSpan(bestFittingNode);
            curAnno.setBegin(curAnno.getBegin() - span.getSource());
            curAnno.setEnd(curAnno.getEnd() - span.getSource());

            // get the collection from the label of the best-fitting node in which we store UIMA
            // annotations or create it, if it does not exist
            Collection<Annotation> annotations = ((CoreLabel) bestFittingNode.label())
                    .get(UIMAAnnotations.class);
            if (annotations == null) {
                annotations = new ArrayList<Annotation>();
            }

            // add annotation + checksum of annotated text to list and write it back to node
            // label
            annotations.add(curAnno);

            ((CoreLabel) bestFittingNode.label()).set(UIMAAnnotations.class, annotations);
        }
    }

    return tree;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java

License:Open Source License

/**
 * Finds the best-fitting node in the tree for a given annotation.
 *
 * The best-fitting node for an annotation is the deepest node in the tree
 * that still completely contains the span of the given annotation.
 *
 * TODO Could be done more efficiently, I think. In a recursive method, for
 * example, recursion could be stopped as soon as overlap becomes -1
 *
 * @param anno//from  w  ww  . j a va 2s .  co m
 *            the annotation to find a best fit for
 *
 * @return the node of the tree that is the best fit for <code>anno</code>
 */
public Tree getBestFit(Annotation anno) {
    Tree curBestFit = null;
    int curBestOverlap = Integer.MAX_VALUE;

    Iterator<Tree> treeIterator = getTree().iterator();
    while (treeIterator.hasNext()) {
        Tree curTree = treeIterator.next();
        IntPair span = getSpan(curTree);

        // calc overlap: if annotation not completely contained in span of
        // subtree, overlap will be -1, otherwise it will be >0
        // Our goal is to find the node with minimal positive overlap
        int overlap = -1;
        int leftBorder = anno.getBegin() - span.getSource();
        int rightBorder = span.getTarget() - anno.getEnd();
        if (!(leftBorder < 0) && !(rightBorder < 0)) {
            overlap = leftBorder + rightBorder;
        }

        // determine whether node is better than the temporary best fit
        if ((overlap > -1) && overlap < curBestOverlap) {
            curBestFit = curTree;
            curBestOverlap = overlap;
        }
    }

    return curBestFit;
}