Example usage for edu.stanford.nlp.util IntPair IntPair

Introduction

In this page you can find the example usage for edu.stanford.nlp.util IntPair IntPair.

Prototype

public IntPair(int src, int trgt)

Source Link

Usage

From source file:com.panot.JavaCoref.MyMUCMentionExtractor.java

License:Open Source License

public List<List<Mention>> injectPronoun(List<List<Mention>> target, List<List<Mention>> source) {
    List<List<Mention>> result = new ArrayList<List<Mention>>();
    int size = target.size();

    for (int sentI = 0; sentI < size; sentI++) {
        ArrayList<Mention> thisList = new ArrayList<Mention>();
        HashSet<IntPair> intPairSet = new HashSet<IntPair>();

        for (Mention mt : target.get(sentI)) {
            thisList.add(mt);/*from w  w  w.  jav a  2s. c o m*/
            intPairSet.add(new IntPair(mt.startIndex, mt.endIndex));
        }

        for (Mention ms : source.get(sentI)) {
            if (!isPronominal(ms))
                continue;
            IntPair thisPair = new IntPair(ms.startIndex, ms.endIndex);
            if (!intPairSet.contains(thisPair)) {
                intPairSet.add(thisPair);
                thisList.add(ms);
                System.err.println("INJECTED!");
            }
        }
        result.add(thisList);
    }
    System.err.println("INJECT PRONOUN!");
    return result;
}

From source file:com.panot.JavaCoref.MyMUCMentionExtractor.java

License:Open Source License

public static List<List<Mention>> unionMentions(List<List<Mention>> set1, List<List<Mention>> set2) {
    List<List<Mention>> result = new ArrayList<List<Mention>>();
    int size = set1.size();

    for (int sentI = 0; sentI < size; sentI++) {
        ArrayList<Mention> thisList = new ArrayList<Mention>();
        HashSet<IntPair> intPairSet = new HashSet<IntPair>();

        for (Mention m1 : set1.get(sentI)) {
            thisList.add(m1);/* w  w w .j  a  v a 2 s.co  m*/
            intPairSet.add(new IntPair(m1.startIndex, m1.endIndex));
        }

        for (Mention m2 : set2.get(sentI)) {
            IntPair thisPair = new IntPair(m2.startIndex, m2.endIndex);
            if (!intPairSet.contains(thisPair)) {
                intPairSet.add(thisPair);
                thisList.add(m2);
            }
        }
        result.add(thisList);
    }

    return result;
}

From source file:com.panot.JavaCoref.MyMUCMentionExtractor.java

License:Open Source License

public static List<List<Mention>> intersectMentions(List<List<Mention>> set1, List<List<Mention>> set2) {
    List<List<Mention>> result = new ArrayList<List<Mention>>();
    int size = set1.size();

    for (int sentI = 0; sentI < size; sentI++) {
        ArrayList<Mention> thisList = new ArrayList<Mention>();
        HashSet<IntPair> intPairSet = new HashSet<IntPair>();

        for (Mention m1 : set1.get(sentI)) {
            intPairSet.add(new IntPair(m1.startIndex, m1.endIndex));
        }/* w w w .  j a va 2  s  . c o  m*/

        for (Mention m2 : set2.get(sentI)) {
            IntPair thisPair = new IntPair(m2.startIndex, m2.endIndex);
            if (intPairSet.contains(thisPair)) {
                thisList.add(m2);
            }
        }
        result.add(thisList);
    }

    return result;
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java

License:Open Source License

/**
 * Returns the span of the documentText that is covered by a given subtree,
 * that has to be taken directly from the original tree.
 * <p>// ww  w .j  av a2s  . c  om
 * NOTE: Possibly we could make this more general to also support general
 * trees that are contained in the original tree, but are not directly taken
 * from it (i.e. with different leaf-numbering). In order to do so, we would
 * have to make a Tregex-Matching of the given subtree in the original tree
 * to identify the positition of the given subtree.
 * <p>
 * This could be achieved by translating the subtree into a Tregex pattern
 * and then matching this pattern against the original tree.
 *
 * @param subtree
 *            a subtree of this TreeWithTokens (it has to be a real
 *            subtree(!), because index numbering of subtree has to fit to
 *            the numbering of the original tree)
 * @return an IntPair describing the span of the documentText that is
 *         covered by this tree
 */
public IntPair getSpan(Tree subtree) {
    // TODO check if subtree is a real subtree of tokenTree.getTree()

    int nodeIndexLeft = ((CoreLabel) getLeftmostLeaf(subtree).label()).index();
    int nodeIndexRight = ((CoreLabel) getRightmostLeaf(subtree).label()).index();
    int a = tokens.get(nodeIndexLeft - 1).getBegin();
    int b = tokens.get(nodeIndexRight - 1).getEnd();

    return new IntPair(a, b);
}

From source file:gov.llnl.ontology.util.AnnotationUtil.java

License:Open Source License

/**
 * Sets the index space for {@code annot}.
 *///from  w w  w  .  j  a  v a2 s . c  om
public static void setSpan(Annotation annot, int start, int end) {
    setSpan(annot, new IntPair(start, end));
}

From source file:knu.univ.lingvo.coref.CorefChain.java

License:Open Source License

/** get CorefMention by position */
public Set<CorefMention> getMentionsWithSameHead(int sentenceNumber, int headIndex) {
    return mentionMap.get(new IntPair(sentenceNumber, headIndex));
}

From source file:knu.univ.lingvo.coref.CorefChain.java

License:Open Source License

public CorefChain(CorefCluster c, Map<Mention, IntTuple> positions) {
    chainID = c.clusterID;//from w w  w.j a v a  2  s . c o  m
    // Collect mentions
    mentions = new ArrayList<CorefMention>();
    mentionMap = Generics.newHashMap();
    CorefMention represents = null;
    for (Mention m : c.getCorefMentions()) {
        CorefMention men = new CorefMention(m, positions.get(m));
        mentions.add(men);
    }
    Collections.sort(mentions, new CorefMentionComparator());
    // Find representative mention
    for (CorefMention men : mentions) {
        IntPair position = new IntPair(men.sentNum, men.headIndex);
        if (!mentionMap.containsKey(position))
            mentionMap.put(position, Generics.<CorefMention>newHashSet());
        mentionMap.get(position).add(men);
        if (men.moreRepresentativeThan(represents)) {
            represents = men;
        }
    }
    representative = represents;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** Mark twin mentions: All mention boundaries should be matched */
private void findTwinMentionsStrict() {
    for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.size(); sentNum++) {
        List<Mention> golds = goldOrderedMentionsBySentence.get(sentNum);
        List<Mention> predicts = predictedOrderedMentionsBySentence.get(sentNum);

        // For CoNLL training there are some documents with gold mentions with the same position offsets
        // See /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll
        //  (Packwood - Roth)
        CollectionValuedMap<IntPair, Mention> goldMentionPositions = new CollectionValuedMap<IntPair, Mention>();
        for (Mention g : golds) {
            IntPair ip = new IntPair(g.startIndex, g.endIndex);
            if (goldMentionPositions.containsKey(ip)) {
                StringBuilder existingMentions = new StringBuilder();
                for (Mention eg : goldMentionPositions.get(ip)) {
                    if (existingMentions.length() > 0) {
                        existingMentions.append(",");
                    }//from w w  w .  j ava2s . c  om
                    existingMentions.append(eg.mentionID);
                }
                SieveCoreferenceSystem.logger.warning("WARNING: gold mentions with the same offsets: " + ip
                        + " mentions=" + g.mentionID + "," + existingMentions + ", " + g.spanToString());
            }
            //assert(!goldMentionPositions.containsKey(ip));
            goldMentionPositions.add(new IntPair(g.startIndex, g.endIndex), g);
        }
        for (Mention p : predicts) {
            IntPair pos = new IntPair(p.startIndex, p.endIndex);
            if (goldMentionPositions.containsKey(pos)) {
                Collection<Mention> cm = goldMentionPositions.get(pos);
                Mention g = cm.iterator().next();
                cm.remove(g);
                p.mentionID = g.mentionID;
                p.twinless = false;
                g.twinless = false;
            }
        }
        // temp: for making easy to recognize twinless mention
        for (Mention p : predicts) {
            if (p.twinless)
                p.mentionID += 10000;
        }
    }
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** Mark twin mentions: heads of the mentions are matched */
private void findTwinMentionsRelaxed() {
    for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.size(); sentNum++) {
        List<Mention> golds = goldOrderedMentionsBySentence.get(sentNum);
        List<Mention> predicts = predictedOrderedMentionsBySentence.get(sentNum);

        Map<IntPair, Mention> goldMentionPositions = Generics.newHashMap();
        Map<Integer, LinkedList<Mention>> goldMentionHeadPositions = Generics.newHashMap();
        for (Mention g : golds) {
            goldMentionPositions.put(new IntPair(g.startIndex, g.endIndex), g);
            if (!goldMentionHeadPositions.containsKey(g.headIndex)) {
                goldMentionHeadPositions.put(g.headIndex, new LinkedList<Mention>());
            }/* ww w .j a v a2s. c  o m*/
            goldMentionHeadPositions.get(g.headIndex).add(g);
        }

        List<Mention> remains = new ArrayList<Mention>();
        for (Mention p : predicts) {
            IntPair pos = new IntPair(p.startIndex, p.endIndex);
            if (goldMentionPositions.containsKey(pos)) {
                Mention g = goldMentionPositions.get(pos);
                p.mentionID = g.mentionID;
                p.twinless = false;
                g.twinless = false;
                goldMentionHeadPositions.get(g.headIndex).remove(g);
                if (goldMentionHeadPositions.get(g.headIndex).isEmpty()) {
                    goldMentionHeadPositions.remove(g.headIndex);
                }
            } else
                remains.add(p);
        }
        for (Mention r : remains) {
            if (goldMentionHeadPositions.containsKey(r.headIndex)) {
                Mention g = goldMentionHeadPositions.get(r.headIndex).poll();
                r.mentionID = g.mentionID;
                r.twinless = false;
                g.twinless = false;
                if (goldMentionHeadPositions.get(g.headIndex).isEmpty()) {
                    goldMentionHeadPositions.remove(g.headIndex);
                }
            }
        }
    }
}