List of usage examples for edu.stanford.nlp.util IntPair IntPair
public IntPair(int src, int trgt)
From source file:com.panot.JavaCoref.MyMUCMentionExtractor.java
License:Open Source License
public List<List<Mention>> injectPronoun(List<List<Mention>> target, List<List<Mention>> source) { List<List<Mention>> result = new ArrayList<List<Mention>>(); int size = target.size(); for (int sentI = 0; sentI < size; sentI++) { ArrayList<Mention> thisList = new ArrayList<Mention>(); HashSet<IntPair> intPairSet = new HashSet<IntPair>(); for (Mention mt : target.get(sentI)) { thisList.add(mt);/*from w w w. jav a 2s. c o m*/ intPairSet.add(new IntPair(mt.startIndex, mt.endIndex)); } for (Mention ms : source.get(sentI)) { if (!isPronominal(ms)) continue; IntPair thisPair = new IntPair(ms.startIndex, ms.endIndex); if (!intPairSet.contains(thisPair)) { intPairSet.add(thisPair); thisList.add(ms); System.err.println("INJECTED!"); } } result.add(thisList); } System.err.println("INJECT PRONOUN!"); return result; }
From source file:com.panot.JavaCoref.MyMUCMentionExtractor.java
License:Open Source License
public static List<List<Mention>> unionMentions(List<List<Mention>> set1, List<List<Mention>> set2) { List<List<Mention>> result = new ArrayList<List<Mention>>(); int size = set1.size(); for (int sentI = 0; sentI < size; sentI++) { ArrayList<Mention> thisList = new ArrayList<Mention>(); HashSet<IntPair> intPairSet = new HashSet<IntPair>(); for (Mention m1 : set1.get(sentI)) { thisList.add(m1);/* w w w .j a v a 2 s.co m*/ intPairSet.add(new IntPair(m1.startIndex, m1.endIndex)); } for (Mention m2 : set2.get(sentI)) { IntPair thisPair = new IntPair(m2.startIndex, m2.endIndex); if (!intPairSet.contains(thisPair)) { intPairSet.add(thisPair); thisList.add(m2); } } result.add(thisList); } return result; }
From source file:com.panot.JavaCoref.MyMUCMentionExtractor.java
License:Open Source License
public static List<List<Mention>> intersectMentions(List<List<Mention>> set1, List<List<Mention>> set2) { List<List<Mention>> result = new ArrayList<List<Mention>>(); int size = set1.size(); for (int sentI = 0; sentI < size; sentI++) { ArrayList<Mention> thisList = new ArrayList<Mention>(); HashSet<IntPair> intPairSet = new HashSet<IntPair>(); for (Mention m1 : set1.get(sentI)) { intPairSet.add(new IntPair(m1.startIndex, m1.endIndex)); }/* w w w . j a va 2 s . c o m*/ for (Mention m2 : set2.get(sentI)) { IntPair thisPair = new IntPair(m2.startIndex, m2.endIndex); if (intPairSet.contains(thisPair)) { thisList.add(m2); } } result.add(thisList); } return result; }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java
License:Open Source License
/** * Returns the span of the documentText that is covered by a given subtree, * that has to be taken directly from the original tree. * <p>// ww w .j av a2s . c om * NOTE: Possibly we could make this more general to also support general * trees that are contained in the original tree, but are not directly taken * from it (i.e. with different leaf-numbering). In order to do so, we would * have to make a Tregex-Matching of the given subtree in the original tree * to identify the positition of the given subtree. * <p> * This could be achieved by translating the subtree into a Tregex pattern * and then matching this pattern against the original tree. * * @param subtree * a subtree of this TreeWithTokens (it has to be a real * subtree(!), because index numbering of subtree has to fit to * the numbering of the original tree) * @return an IntPair describing the span of the documentText that is * covered by this tree */ public IntPair getSpan(Tree subtree) { // TODO check if subtree is a real subtree of tokenTree.getTree() int nodeIndexLeft = ((CoreLabel) getLeftmostLeaf(subtree).label()).index(); int nodeIndexRight = ((CoreLabel) getRightmostLeaf(subtree).label()).index(); int a = tokens.get(nodeIndexLeft - 1).getBegin(); int b = tokens.get(nodeIndexRight - 1).getEnd(); return new IntPair(a, b); }
From source file:gov.llnl.ontology.util.AnnotationUtil.java
License:Open Source License
/** * Sets the index space for {@code annot}. *///from w w w . j a v a2 s . c om public static void setSpan(Annotation annot, int start, int end) { setSpan(annot, new IntPair(start, end)); }
From source file:knu.univ.lingvo.coref.CorefChain.java
License:Open Source License
/** get CorefMention by position */ public Set<CorefMention> getMentionsWithSameHead(int sentenceNumber, int headIndex) { return mentionMap.get(new IntPair(sentenceNumber, headIndex)); }
From source file:knu.univ.lingvo.coref.CorefChain.java
License:Open Source License
public CorefChain(CorefCluster c, Map<Mention, IntTuple> positions) { chainID = c.clusterID;//from w w w.j a v a 2 s . c o m // Collect mentions mentions = new ArrayList<CorefMention>(); mentionMap = Generics.newHashMap(); CorefMention represents = null; for (Mention m : c.getCorefMentions()) { CorefMention men = new CorefMention(m, positions.get(m)); mentions.add(men); } Collections.sort(mentions, new CorefMentionComparator()); // Find representative mention for (CorefMention men : mentions) { IntPair position = new IntPair(men.sentNum, men.headIndex); if (!mentionMap.containsKey(position)) mentionMap.put(position, Generics.<CorefMention>newHashSet()); mentionMap.get(position).add(men); if (men.moreRepresentativeThan(represents)) { represents = men; } } representative = represents; }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
/** Mark twin mentions: All mention boundaries should be matched */ private void findTwinMentionsStrict() { for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.size(); sentNum++) { List<Mention> golds = goldOrderedMentionsBySentence.get(sentNum); List<Mention> predicts = predictedOrderedMentionsBySentence.get(sentNum); // For CoNLL training there are some documents with gold mentions with the same position offsets // See /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll // (Packwood - Roth) CollectionValuedMap<IntPair, Mention> goldMentionPositions = new CollectionValuedMap<IntPair, Mention>(); for (Mention g : golds) { IntPair ip = new IntPair(g.startIndex, g.endIndex); if (goldMentionPositions.containsKey(ip)) { StringBuilder existingMentions = new StringBuilder(); for (Mention eg : goldMentionPositions.get(ip)) { if (existingMentions.length() > 0) { existingMentions.append(","); }//from w w w . j ava2s . c om existingMentions.append(eg.mentionID); } SieveCoreferenceSystem.logger.warning("WARNING: gold mentions with the same offsets: " + ip + " mentions=" + g.mentionID + "," + existingMentions + ", " + g.spanToString()); } //assert(!goldMentionPositions.containsKey(ip)); goldMentionPositions.add(new IntPair(g.startIndex, g.endIndex), g); } for (Mention p : predicts) { IntPair pos = new IntPair(p.startIndex, p.endIndex); if (goldMentionPositions.containsKey(pos)) { Collection<Mention> cm = goldMentionPositions.get(pos); Mention g = cm.iterator().next(); cm.remove(g); p.mentionID = g.mentionID; p.twinless = false; g.twinless = false; } } // temp: for making easy to recognize twinless mention for (Mention p : predicts) { if (p.twinless) p.mentionID += 10000; } } }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
/** Mark twin mentions: heads of the mentions are matched */ private void findTwinMentionsRelaxed() { for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.size(); sentNum++) { List<Mention> golds = goldOrderedMentionsBySentence.get(sentNum); List<Mention> predicts = predictedOrderedMentionsBySentence.get(sentNum); Map<IntPair, Mention> goldMentionPositions = Generics.newHashMap(); Map<Integer, LinkedList<Mention>> goldMentionHeadPositions = Generics.newHashMap(); for (Mention g : golds) { goldMentionPositions.put(new IntPair(g.startIndex, g.endIndex), g); if (!goldMentionHeadPositions.containsKey(g.headIndex)) { goldMentionHeadPositions.put(g.headIndex, new LinkedList<Mention>()); }/* ww w .j a v a2s. c o m*/ goldMentionHeadPositions.get(g.headIndex).add(g); } List<Mention> remains = new ArrayList<Mention>(); for (Mention p : predicts) { IntPair pos = new IntPair(p.startIndex, p.endIndex); if (goldMentionPositions.containsKey(pos)) { Mention g = goldMentionPositions.get(pos); p.mentionID = g.mentionID; p.twinless = false; g.twinless = false; goldMentionHeadPositions.get(g.headIndex).remove(g); if (goldMentionHeadPositions.get(g.headIndex).isEmpty()) { goldMentionHeadPositions.remove(g.headIndex); } } else remains.add(p); } for (Mention r : remains) { if (goldMentionHeadPositions.containsKey(r.headIndex)) { Mention g = goldMentionHeadPositions.get(r.headIndex).poll(); r.mentionID = g.mentionID; r.twinless = false; g.twinless = false; if (goldMentionHeadPositions.get(g.headIndex).isEmpty()) { goldMentionHeadPositions.remove(g.headIndex); } } } } }