Example usage for edu.stanford.nlp.stats ClassicCounter incrementCount

List of usage examples for edu.stanford.nlp.stats ClassicCounter incrementCount

Introduction

In this page you can find the example usage for edu.stanford.nlp.stats ClassicCounter incrementCount.

Prototype

@Override
public double incrementCount(E key, double count) 

Source Link

Usage

From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java

License:Open Source License

/**
 * Reads in a model file in svm light format.  It needs to know if its multiclass or not
 * because it affects the number of header lines.  Maybe there is another way to tell and we
 * can remove this flag?//from   w  w  w .j  a  v  a  2  s  .  co m
 */
private static Pair<Double, ClassicCounter<Integer>> readModel(File modelFile, boolean multiclass) {
    int modelLineCount = 0;
    try {

        int numLinesToSkip = multiclass ? 13 : 10;
        String stopToken = "#";

        BufferedReader in = new BufferedReader(new FileReader(modelFile));

        for (int i = 0; i < numLinesToSkip; i++) {
            in.readLine();
            modelLineCount++;
        }

        List<Pair<Double, ClassicCounter<Integer>>> supportVectors = new ArrayList<Pair<Double, ClassicCounter<Integer>>>();
        // Read Threshold
        String thresholdLine = in.readLine();
        modelLineCount++;
        String[] pieces = thresholdLine.split("\\s+");
        double threshold = Double.parseDouble(pieces[0]);
        // Read Support Vectors
        while (in.ready()) {
            String svLine = in.readLine();
            modelLineCount++;
            pieces = svLine.split("\\s+");
            // First Element is the alpha_i * y_i
            double alpha = Double.parseDouble(pieces[0]);
            ClassicCounter<Integer> supportVector = new ClassicCounter<Integer>();
            for (int i = 1; i < pieces.length; ++i) {
                String piece = pieces[i];
                if (piece.equals(stopToken))
                    break;
                // Each in featureIndex:num class
                String[] indexNum = piece.split(":");
                String featureIndex = indexNum[0];
                // mihai: we may see "qid" as indexNum[0]. just skip this piece. this is the block id useful only for reranking, which we don't do here.
                if (!featureIndex.equals("qid")) {
                    double count = Double.parseDouble(indexNum[1]);
                    supportVector.incrementCount(Integer.valueOf(featureIndex), count);
                }
            }
            supportVectors.add(new Pair<Double, ClassicCounter<Integer>>(alpha, supportVector));
        }

        in.close();

        return new Pair<Double, ClassicCounter<Integer>>(threshold, getWeights(supportVectors));
    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Error reading SVM model (line " + modelLineCount + " in file "
                + modelFile.getAbsolutePath() + ")");
    }
}

From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java

License:Open Source License

/**
 * Converts the svm_light weight Counter (which uses feature indices) into a weight Counter
 * using the actual features and labels.  Because this is svm_light, and not svm_struct, the
 * weights for the +1 class (which correspond to labelIndex.get(0)) and the -1 class
 * (which correspond to labelIndex.get(1)) are just the negation of one another.
 *//*from   www .  ja v a2s.  c o m*/
private ClassicCounter<Pair<F, L>> convertSVMLightWeights(ClassicCounter<Integer> weights,
        Index<F> featureIndex, Index<L> labelIndex) {
    ClassicCounter<Pair<F, L>> newWeights = new ClassicCounter<Pair<F, L>>();
    for (int i : weights.keySet()) {
        F f = featureIndex.get(i - 1);
        double w = weights.getCount(i);
        // the first guy in the labelIndex was the +1 class and the second guy
        // was the -1 class
        newWeights.incrementCount(new Pair<F, L>(f, labelIndex.get(0)), w);
        newWeights.incrementCount(new Pair<F, L>(f, labelIndex.get(1)), -w);
    }
    return newWeights;
}

From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java

License:Open Source License

/**
 * Converts the svm_struct weight Counter (in which the weight for a feature/label pair
 * correspondes to ((labelIndex * numFeatures)+(featureIndex+1))) into a weight Counter
 * using the actual features and labels.
 *///w  w w .ja v  a  2  s . co  m
private ClassicCounter<Pair<F, L>> convertSVMStructWeights(ClassicCounter<Integer> weights,
        Index<F> featureIndex, Index<L> labelIndex) {
    // int numLabels = labelIndex.size();
    int numFeatures = featureIndex.size();
    ClassicCounter<Pair<F, L>> newWeights = new ClassicCounter<Pair<F, L>>();
    for (int i : weights.keySet()) {
        L l = labelIndex.get((i - 1) / numFeatures); // integer division on purpose
        F f = featureIndex.get((i - 1) % numFeatures);
        double w = weights.getCount(i);
        newWeights.incrementCount(new Pair<F, L>(f, l), w);
    }

    return newWeights;
}

From source file:gr.aueb.cs.nlp.wordtagger.data.structure.WordSet.java

License:Open Source License

/**
 * Converts any List with words to a Stanford set;
 * @param words/* w w w  .j  a v  a 2  s.  c  om*/
 * @return, a list of real valued datums
 */
public static List<RVFDatum<String, String>> toStanfordSet(List<Word> words) {
    List<RVFDatum<String, String>> trainignData = new ArrayList<>();
    for (Word w : words) {
        List<Double> feats = Arrays.asList(ArrayUtils.toObject(w.getFeatureVec().getValues()));
        ClassicCounter<String> cc = new ClassicCounter<>();
        for (int i = 0; i < feats.size(); i++) {
            cc.incrementCount("feature" + i, feats.get(i));
        }
        if (w.getCategory() != null) {
            RVFDatum<String, String> dtm = new RVFDatum<>(cc, w.getCategory());
            trainignData.add(dtm);
        }
    }
    System.out.println("Converted List to classifier trainset");
    return trainignData;
}

From source file:gr.aueb.cs.nlp.wordtagger.data.structure.WordSet.java

License:Open Source License

/**
 * convers a word to a stanforf real valued atum
 * @param w//from   ww w  . j a v  a 2s .co m
 * @return
 */
public static RVFDatum<String, String> word2Datum(Word w) {
    List<Double> feats = Arrays.asList(ArrayUtils.toObject(w.getFeatureVec().getValues()));
    ClassicCounter<String> cc = new ClassicCounter<>();
    for (int i = 0; i < feats.size(); i++) {
        cc.incrementCount("feature" + i, feats.get(i));
    }
    return new RVFDatum<>(cc, w.getCategory());
}