List of usage examples for edu.stanford.nlp.stats Counter incrementCount
double incrementCount(E key);
From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java
License:Open Source License
/** * Builds a sigmoid model to turn the classifier outputs into probabilities. *///from ww w. ja v a 2 s .c o m private LinearClassifier<L, L> fitSigmoid(SVMLightClassifier<L, F> classifier, GeneralDataset<L, F> dataset) { RVFDataset<L, L> plattDataset = new RVFDataset<L, L>(); for (int i = 0; i < dataset.size(); i++) { RVFDatum<L, F> d = dataset.getRVFDatum(i); Counter<L> scores = classifier.scoresOf((Datum<L, F>) d); scores.incrementCount(null); plattDataset.add(new RVFDatum<L, L>(scores, d.label())); } LinearClassifierFactory<L, L> factory = new LinearClassifierFactory<L, L>(); factory.setPrior(new LogPrior(LogPrior.LogPriorType.NULL)); return factory.trainClassifier(plattDataset); }
From source file:ilcc.ccgparser.nnparser.IncNNParser.java
public Dataset genTrainExamples(List<CCGJSentence> sents, List<CCGJTreeNode> trees) throws IOException { int numTrans = actsList.size(); Dataset ret = new Dataset(config.numTokens, numTrans); Counter<Integer> tokPosCount = new IntCounter<>(); System.err.println(Config.SEPARATOR); System.err.println("Generate training examples..."); System.err.println("With #transitions: " + numTrans); double start = (long) (System.currentTimeMillis()), end; System.err.println("Started at: " + new Date(System.currentTimeMillis())); for (int i = 0; i < sents.size(); ++i) { if (i > 0) { //System.err.print(i + " "); if (i % 1000 == 0) System.err.print(i + " "); if (i % 10000 == 0 || i == sents.size() - 1) System.err.println(); }// w ww. j a va 2 s. co m CCGJSentence sent = sents.get(i); if (sent == null) continue; srparser.initVars(sent); List<ArcJAction> gActList = goldDetails.get(i + 1).getarcActs(); for (ArcJAction gAct : gActList) { ArrayList<ArcJAction> acts = getAction(srparser); ArrayList<Integer> rightPerList = null; int stacksize = srparser.stack.size(); if (srparser.incalgo && stacksize > 1) { CCGJTreeNode left = srparser.stack.get(stacksize - 2); Integer lvertex = left.getConllNode().getNodeId(); rightPerList = srparser.depGraph.getRightPer(lvertex); } List<Integer> feature = getFeatures(srparser, rightPerList, sent); List<Integer> label = new ArrayList<>(Collections.nCopies(numTrans, -1)); for (ArcJAction act : acts) { Integer id = actsMap.get(act); if (id != null) { if (act.equals(gAct)) label.set(id, 1); else label.set(id, 0); } } ret.addExample(feature, label); for (int j = 0; j < feature.size(); ++j) tokPosCount.incrementCount(feature.get(j) * feature.size() + j); srparser.applyAction(gAct); } } System.err.println("#Train Examples: " + ret.n); end = (long) System.currentTimeMillis(); System.err.println("Ended at : " + new Date(System.currentTimeMillis()) + " taking " + 0.001 * (end - start) + " secs"); List<Integer> sortedTokens = Counters.toSortedList(tokPosCount, false); preComputed = new ArrayList<>( sortedTokens.subList(0, Math.min(config.numPreComputed, sortedTokens.size()))); return ret; }
From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java
License:Open Source License
private static void printRawDoc(List<CoreMap> sentences, List<List<Mention>> allMentions, String filename, boolean gold) throws FileNotFoundException { StringBuilder doc = new StringBuilder(); int previousOffset = 0; Counter<Integer> mentionCount = new ClassicCounter<Integer>(); for (List<Mention> l : allMentions) { for (Mention m : l) { mentionCount.incrementCount(m.goldCorefClusterID); }/*from www. j av a2 s.co m*/ } for (int i = 0; i < sentences.size(); i++) { CoreMap sentence = sentences.get(i); List<Mention> mentions = allMentions.get(i); String[] tokens = sentence.get(CoreAnnotations.TextAnnotation.class).split(" "); String sent = ""; List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class); if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) sent += "\n"; previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class); Counter<Integer> startCounts = new ClassicCounter<Integer>(); Counter<Integer> endCounts = new ClassicCounter<Integer>(); Map<Integer, Set<Integer>> endID = Generics.newHashMap(); for (Mention m : mentions) { startCounts.incrementCount(m.startIndex); endCounts.incrementCount(m.endIndex); if (!endID.containsKey(m.endIndex)) endID.put(m.endIndex, Generics.<Integer>newHashSet()); endID.get(m.endIndex).add(m.goldCorefClusterID); } for (int j = 0; j < tokens.length; j++) { if (endID.containsKey(j)) { for (Integer id : endID.get(j)) { if (mentionCount.getCount(id) != 1 && gold) sent += "]_" + id; else sent += "]"; } } for (int k = 0; k < startCounts.getCount(j); k++) { if (!sent.endsWith("[")) sent += " "; sent += "["; } sent += " "; sent = sent + tokens[j]; } for (int k = 0; k < endCounts.getCount(tokens.length); k++) { sent += "]"; } sent += "\n"; doc.append(sent); } if (gold) logger.fine("New DOC: (GOLD MENTIONS) =================================================="); else logger.fine("New DOC: (Predicted Mentions) =================================================="); logger.fine(doc.toString()); }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * print a coref link information including context and parse tree *///from w ww .j ava 2 s . c om private static void printLinkWithContext(Logger logger, String header, IntTuple src, IntTuple dst, Document document, Semantics semantics) { List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions(); List<List<Mention>> goldOrderedMentionsBySentence = document.goldOrderedMentionsBySentence; Mention srcMention = orderedMentionsBySentence.get(src.get(0)).get(src.get(1)); Mention dstMention = orderedMentionsBySentence.get(dst.get(0)).get(dst.get(1)); List<CoreLabel> srcSentence = srcMention.sentenceWords; List<CoreLabel> dstSentence = dstMention.sentenceWords; printLink(logger, header, src, dst, orderedMentionsBySentence); printList(logger, "Mention:" + srcMention.spanToString(), "Gender:" + srcMention.gender.toString(), "Number:" + srcMention.number.toString(), "Animacy:" + srcMention.animacy.toString(), "Person:" + srcMention.person.toString(), "NER:" + srcMention.nerString, "Head:" + srcMention.headString, "Type:" + srcMention.mentionType.toString(), "utter: " + srcMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class), "speakerID: " + srcMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class), "twinless:" + srcMention.twinless); logger.fine("Context:"); String p = ""; for (int i = 0; i < srcSentence.size(); i++) { if (i == srcMention.startIndex) { p += "["; } if (i == srcMention.endIndex) { p += "]"; } p += srcSentence.get(i).word() + " "; } logger.fine(p); StringBuilder golds = new StringBuilder(); golds.append("Gold mentions in the sentence:\n"); Counter<Integer> mBegin = new ClassicCounter<Integer>(); Counter<Integer> mEnd = new ClassicCounter<Integer>(); for (Mention m : goldOrderedMentionsBySentence.get(src.get(0))) { mBegin.incrementCount(m.startIndex); mEnd.incrementCount(m.endIndex); } List<CoreLabel> l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(src.get(0)) .get(CoreAnnotations.TokensAnnotation.class); for (int i = 0; i < l.size(); i++) { for (int j = 0; j < mEnd.getCount(i); j++) { golds.append("]"); } for (int j = 0; j < mBegin.getCount(i); j++) { golds.append("["); } golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class)); golds.append(" "); } logger.fine(golds.toString()); printList(logger, "\nAntecedent:" + dstMention.spanToString(), "Gender:" + dstMention.gender.toString(), "Number:" + dstMention.number.toString(), "Animacy:" + dstMention.animacy.toString(), "Person:" + dstMention.person.toString(), "NER:" + dstMention.nerString, "Head:" + dstMention.headString, "Type:" + dstMention.mentionType.toString(), "utter: " + dstMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class), "speakerID: " + dstMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class), "twinless:" + dstMention.twinless); logger.fine("Context:"); p = ""; for (int i = 0; i < dstSentence.size(); i++) { if (i == dstMention.startIndex) { p += "["; } if (i == dstMention.endIndex) { p += "]"; } p += dstSentence.get(i).word() + " "; } logger.fine(p); golds = new StringBuilder(); golds.append("Gold mentions in the sentence:\n"); mBegin = new ClassicCounter<Integer>(); mEnd = new ClassicCounter<Integer>(); for (Mention m : goldOrderedMentionsBySentence.get(dst.get(0))) { mBegin.incrementCount(m.startIndex); mEnd.incrementCount(m.endIndex); } l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(dst.get(0)) .get(CoreAnnotations.TokensAnnotation.class); for (int i = 0; i < l.size(); i++) { for (int j = 0; j < mEnd.getCount(i); j++) { golds.append("]"); } for (int j = 0; j < mBegin.getCount(i); j++) { golds.append("["); } golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class)); golds.append(" "); } logger.fine(golds.toString()); logger.finer("\nMention:: --------------------------------------------------------"); try { logger.finer(srcMention.dependency.toString()); } catch (Exception e) { } //throw new RuntimeException(e);} logger.finer("Parse:"); logger.finer(formatPennTree(srcMention.contextParseTree)); logger.finer("\nAntecedent:: -----------------------------------------------------"); try { logger.finer(dstMention.dependency.toString()); } catch (Exception e) { } //throw new RuntimeException(e);} logger.finer("Parse:"); logger.finer(formatPennTree(dstMention.contextParseTree)); }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * Print raw document for analysis/* ww w .j a va 2s. c o m*/ */ public static void printRawDoc(Document document, boolean gold) throws FileNotFoundException { List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class); List<List<Mention>> allMentions; if (gold) { allMentions = document.goldOrderedMentionsBySentence; } else { allMentions = document.predictedOrderedMentionsBySentence; } // String filename = document.annotation.get() StringBuilder doc = new StringBuilder(); int previousOffset = 0; for (int i = 0; i < sentences.size(); i++) { CoreMap sentence = sentences.get(i); List<Mention> mentions = allMentions.get(i); List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class); String[] tokens = new String[t.size()]; for (CoreLabel c : t) { tokens[c.index() - 1] = c.word(); } if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) { doc.append("\n"); } previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class); Counter<Integer> startCounts = new ClassicCounter<Integer>(); Counter<Integer> endCounts = new ClassicCounter<Integer>(); Map<Integer, Set<Mention>> endMentions = Generics.newHashMap(); for (Mention m : mentions) { startCounts.incrementCount(m.startIndex); endCounts.incrementCount(m.endIndex); if (!endMentions.containsKey(m.endIndex)) { endMentions.put(m.endIndex, Generics.<Mention>newHashSet()); } endMentions.get(m.endIndex).add(m); } for (int j = 0; j < tokens.length; j++) { if (endMentions.containsKey(j)) { for (Mention m : endMentions.get(j)) { int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID; doc.append("]_").append(corefChainId); } } for (int k = 0; k < startCounts.getCount(j); k++) { if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') { doc.append(" "); } doc.append("["); } if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') { doc.append(" "); } doc.append(tokens[j]); } if (endMentions.containsKey(tokens.length)) { for (Mention m : endMentions.get(tokens.length)) { int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID; doc.append("]_").append(corefChainId); //append("_").append(m.mentionID); } } doc.append("\n"); } logger.fine(document.annotation.get(CoreAnnotations.DocIDAnnotation.class)); if (gold) { logger.fine("New DOC: (GOLD MENTIONS) =================================================="); } else { logger.fine("New DOC: (Predicted Mentions) =================================================="); } logger.fine(doc.toString()); }