List of usage examples for edu.stanford.nlp.stats ClassicCounter incrementCount
@Override public double incrementCount(E key, double count)
From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java
License:Open Source License
/** * Reads in a model file in svm light format. It needs to know if its multiclass or not * because it affects the number of header lines. Maybe there is another way to tell and we * can remove this flag?//from w w w .j a v a 2 s . co m */ private static Pair<Double, ClassicCounter<Integer>> readModel(File modelFile, boolean multiclass) { int modelLineCount = 0; try { int numLinesToSkip = multiclass ? 13 : 10; String stopToken = "#"; BufferedReader in = new BufferedReader(new FileReader(modelFile)); for (int i = 0; i < numLinesToSkip; i++) { in.readLine(); modelLineCount++; } List<Pair<Double, ClassicCounter<Integer>>> supportVectors = new ArrayList<Pair<Double, ClassicCounter<Integer>>>(); // Read Threshold String thresholdLine = in.readLine(); modelLineCount++; String[] pieces = thresholdLine.split("\\s+"); double threshold = Double.parseDouble(pieces[0]); // Read Support Vectors while (in.ready()) { String svLine = in.readLine(); modelLineCount++; pieces = svLine.split("\\s+"); // First Element is the alpha_i * y_i double alpha = Double.parseDouble(pieces[0]); ClassicCounter<Integer> supportVector = new ClassicCounter<Integer>(); for (int i = 1; i < pieces.length; ++i) { String piece = pieces[i]; if (piece.equals(stopToken)) break; // Each in featureIndex:num class String[] indexNum = piece.split(":"); String featureIndex = indexNum[0]; // mihai: we may see "qid" as indexNum[0]. just skip this piece. this is the block id useful only for reranking, which we don't do here. if (!featureIndex.equals("qid")) { double count = Double.parseDouble(indexNum[1]); supportVector.incrementCount(Integer.valueOf(featureIndex), count); } } supportVectors.add(new Pair<Double, ClassicCounter<Integer>>(alpha, supportVector)); } in.close(); return new Pair<Double, ClassicCounter<Integer>>(threshold, getWeights(supportVectors)); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Error reading SVM model (line " + modelLineCount + " in file " + modelFile.getAbsolutePath() + ")"); } }
From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java
License:Open Source License
/** * Converts the svm_light weight Counter (which uses feature indices) into a weight Counter * using the actual features and labels. Because this is svm_light, and not svm_struct, the * weights for the +1 class (which correspond to labelIndex.get(0)) and the -1 class * (which correspond to labelIndex.get(1)) are just the negation of one another. *//*from www . ja v a2s. c o m*/ private ClassicCounter<Pair<F, L>> convertSVMLightWeights(ClassicCounter<Integer> weights, Index<F> featureIndex, Index<L> labelIndex) { ClassicCounter<Pair<F, L>> newWeights = new ClassicCounter<Pair<F, L>>(); for (int i : weights.keySet()) { F f = featureIndex.get(i - 1); double w = weights.getCount(i); // the first guy in the labelIndex was the +1 class and the second guy // was the -1 class newWeights.incrementCount(new Pair<F, L>(f, labelIndex.get(0)), w); newWeights.incrementCount(new Pair<F, L>(f, labelIndex.get(1)), -w); } return newWeights; }
From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java
License:Open Source License
/** * Converts the svm_struct weight Counter (in which the weight for a feature/label pair * correspondes to ((labelIndex * numFeatures)+(featureIndex+1))) into a weight Counter * using the actual features and labels. *///w w w .ja v a 2 s . co m private ClassicCounter<Pair<F, L>> convertSVMStructWeights(ClassicCounter<Integer> weights, Index<F> featureIndex, Index<L> labelIndex) { // int numLabels = labelIndex.size(); int numFeatures = featureIndex.size(); ClassicCounter<Pair<F, L>> newWeights = new ClassicCounter<Pair<F, L>>(); for (int i : weights.keySet()) { L l = labelIndex.get((i - 1) / numFeatures); // integer division on purpose F f = featureIndex.get((i - 1) % numFeatures); double w = weights.getCount(i); newWeights.incrementCount(new Pair<F, L>(f, l), w); } return newWeights; }
From source file:gr.aueb.cs.nlp.wordtagger.data.structure.WordSet.java
License:Open Source License
/** * Converts any List with words to a Stanford set; * @param words/* w w w .j a v a 2 s. c om*/ * @return, a list of real valued datums */ public static List<RVFDatum<String, String>> toStanfordSet(List<Word> words) { List<RVFDatum<String, String>> trainignData = new ArrayList<>(); for (Word w : words) { List<Double> feats = Arrays.asList(ArrayUtils.toObject(w.getFeatureVec().getValues())); ClassicCounter<String> cc = new ClassicCounter<>(); for (int i = 0; i < feats.size(); i++) { cc.incrementCount("feature" + i, feats.get(i)); } if (w.getCategory() != null) { RVFDatum<String, String> dtm = new RVFDatum<>(cc, w.getCategory()); trainignData.add(dtm); } } System.out.println("Converted List to classifier trainset"); return trainignData; }
From source file:gr.aueb.cs.nlp.wordtagger.data.structure.WordSet.java
License:Open Source License
/** * convers a word to a stanforf real valued atum * @param w//from ww w . j a v a 2s .co m * @return */ public static RVFDatum<String, String> word2Datum(Word w) { List<Double> feats = Arrays.asList(ArrayUtils.toObject(w.getFeatureVec().getValues())); ClassicCounter<String> cc = new ClassicCounter<>(); for (int i = 0; i < feats.size(); i++) { cc.incrementCount("feature" + i, feats.get(i)); } return new RVFDatum<>(cc, w.getCategory()); }