Example usage for weka.core Instance weight

List of usage examples for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:fantail.algorithms.AbstractRanker.java

License:Open Source License

public static double[] getAvgRankValues(Instances data) throws Exception {

    if (data.numInstances() == 0) {
        throw new Exception("data can't be empty.");
    }//from   w  w w. j  a  va  2  s  .  c om
    int numLabels = Tools.getNumberTargets(data);
    double[] avgVals = new double[numLabels];
    for (int m = 0; m < data.numInstances(); m++) {
        Instance inst = data.instance(m);
        double[] targetValues = Tools.getTargetVector(inst);

        for (int j = 0; j < targetValues.length; j++) {
            avgVals[j] += (targetValues[j] * inst.weight());
        }
    }
    for (int i = 0; i < avgVals.length; i++) {
        avgVals[i] /= data.numInstances();
    }
    return avgVals;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private double computeVariance(Instances data) throws Exception {
    double[][] targets = new double[data.numInstances()][];
    for (int i = 0; i < data.numInstances(); i++) {
        targets[i] = Tools.getTargetVector(data.instance(i));
    }//from   w w w  .j a v  a 2 s .c o  m
    double sumVar = 0;
    for (int i = 0; i < m_NumTargetLabels; i++) {
        double[] target_i = new double[data.numInstances()];

        for (int j = 0; j < data.numInstances(); j++) {
            Instance metaInst = (Instance) data.instance(j);
            target_i[j] = targets[j][i] * metaInst.weight();
        }
        sumVar += weka.core.Utils.variance(target_i);
    }
    return sumVar / m_NumTargetLabels;
}

From source file:fantail.algorithms.RankingWithkNN.java

License:Open Source License

@Override
public double[] recommendRanking(Instance inst) throws Exception {
    Instances nnbrs = m_kNN.getNearestNeighbourSearchAlgorithm().kNearestNeighbours(inst, m_K);

    double[] predictedRanks = new double[Tools.getNumberTargets(inst)];

    double sumWeights = 0;
    for (int k = 0; k < m_K; k++) {
        Instance nn = (Instance) nnbrs.instance(k);
        sumWeights += nn.weight();
    }//  w w  w  .  ja v  a 2  s .  c  o  m

    for (int k = 0; k < m_K; k++) {
        Instance nn = (Instance) nnbrs.instance(k);
        double[] rankingNN = Tools.getTargetVector(nn);
        for (int j = 0; j < predictedRanks.length; j++) {
            predictedRanks[j] += (rankingNN[j] * nn.weight() / sumWeights);
        }
    }

    return Tools.doubleArrayToRanking(predictedRanks);
}

From source file:faster_pca.faster_pca.java

License:Open Source License

/**
* Transform an instance in original (unormalized) format.
* 
* @param instance an instance in the original (unormalized) format
* @return a transformed instance/*from  w  w  w  . java2 s. c  o m*/
* @throws Exception if instance can't be transformed
*/
protected Instance convertInstance(Instance instance) throws Exception {
    Instance result;
    double[] newVals;
    Instance tempInst;
    double cumulative;
    int i;
    int j;
    double tempval;
    int numAttsLowerBound;

    newVals = new double[m_OutputNumAtts];
    tempInst = (Instance) instance.copy();

    /*m_ReplaceMissingFilter.input(tempInst);
    m_ReplaceMissingFilter.batchFinished();
    tempInst = m_ReplaceMissingFilter.output();*/

    m_NominalToBinaryFilter.input(tempInst);
    m_NominalToBinaryFilter.batchFinished();
    tempInst = m_NominalToBinaryFilter.output();

    if (m_AttributeFilter != null) {
        m_AttributeFilter.input(tempInst);
        m_AttributeFilter.batchFinished();
        tempInst = m_AttributeFilter.output();
    }

    if (!super.getCenterData()) {

        tempInst = f_norm.filter(tempInst);
    } else {

        tempInst = f_center.filter(tempInst);
    }

    if (m_HasClass) {
        newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());
    }

    if (m_MaxAttributes > 0) {
        numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
    } else {
        numAttsLowerBound = 0;
    }
    if (numAttsLowerBound < 0) {
        numAttsLowerBound = 0;
    }

    double tempInstCpy[] = new double[m_NumAttribs];
    for (j = 0; j < m_NumAttribs; j++) {
        tempInstCpy[j] = tempInst.value(j);
    }

    cumulative = 0;
    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
        tempval = 0.0;
        for (j = 0; j < m_NumAttribs; j++) {
            tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInstCpy[j];
        }

        newVals[m_NumAttribs - i - 1] = tempval;
        cumulative += m_Eigenvalues[m_SortedEigens[i]];
        if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance) {
            break;
        }
    }

    // create instance
    if (instance instanceof SparseInstance) {
        result = new SparseInstance(instance.weight(), newVals);
    } else {
        result = new DenseInstance(instance.weight(), newVals);
    }

    return result;
}

From source file:feature.InfoGainEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all
 * attributes that are numeric.// w w  w  .  j a  v a 2s  .  c o  m
 *
 * @param data
 *            set of instances serving as training data
 * @throws Exception
 *             if the evaluator has not been generated successfully
 */
public double computeInfoGain(Instances data, int att) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att])
            - ContingencyTables.entropyConditionedOnRows(counts[att]));

    return m_InfoGains[att];
}

From source file:feature.InfoGainEval.java

License:Open Source License

public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {// ww  w.  j ava2 s  . c o  m
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i])
                    - ContingencyTables.entropyConditionedOnRows(counts[i]));
        }
    }
}

From source file:ffnn.MultilayerPerceptron.java

License:Open Source License

/**
 * This function sets what the m_numeric flag to represent the passed class it
 * also performs the normalization of the attributes if applicable and sets up
 * the info to normalize the class. (note that regardless of the options it
 * will fill an array with the range and base, set to normalize all attributes
 * and the class to be between -1 and 1)
 * /*from www  . java  2 s . c om*/
 * @param inst the instances.
 * @return The modified instances. This needs to be done. If the attributes
 *         are normalized then deep copies will be made of all the instances
 *         which will need to be passed back out.
 */
private Instances setClassType(Instances inst) throws Exception {
    if (inst != null) {
        // x bounds
        m_attributeRanges = new double[inst.numAttributes()];
        m_attributeBases = new double[inst.numAttributes()];
        for (int noa = 0; noa < inst.numAttributes(); noa++) {
            double min = Double.POSITIVE_INFINITY;
            double max = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < inst.numInstances(); i++) {
                if (!inst.instance(i).isMissing(noa)) {
                    double value = inst.instance(i).value(noa);
                    if (value < min) {
                        min = value;
                    }
                    if (value > max) {
                        max = value;
                    }
                }
            }
            m_attributeRanges[noa] = (max - min) / 2;
            m_attributeBases[noa] = (max + min) / 2;
        }

        if (m_normalizeAttributes) {
            for (int i = 0; i < inst.numInstances(); i++) {
                Instance currentInstance = inst.instance(i);
                double[] instance = new double[inst.numAttributes()];
                for (int noa = 0; noa < inst.numAttributes(); noa++) {
                    if (noa != inst.classIndex()) {
                        if (m_attributeRanges[noa] != 0) {
                            instance[noa] = (currentInstance.value(noa) - m_attributeBases[noa])
                                    / m_attributeRanges[noa];
                        } else {
                            instance[noa] = currentInstance.value(noa) - m_attributeBases[noa];
                        }
                    } else {
                        instance[noa] = currentInstance.value(noa);
                    }
                }
                inst.set(i, new DenseInstance(currentInstance.weight(), instance));
            }
        }

        if (inst.classAttribute().isNumeric()) {
            m_numeric = true;
        } else {
            m_numeric = false;
        }
    }
    return inst;
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Builds the classifier./*from  www .  java  2s.  co m*/
 */
private void buildClassifier() throws Exception {

    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (i == documentAtt) {
            atts.addElement(new Attribute("Term_frequency")); // 2
            atts.addElement(new Attribute("IDF")); // 
            atts.addElement(new Attribute("TFxIDF")); // 
            atts.addElement(new Attribute("First_occurrence")); // 
            atts.addElement(new Attribute("Last_occurrence")); // 
            atts.addElement(new Attribute("Spread")); // 
            atts.addElement(new Attribute("Domain_keyphraseness")); // 
            atts.addElement(new Attribute("Length")); //
            atts.addElement(new Attribute("Generality")); //
            atts.addElement(new Attribute("Node_degree")); // 
            atts.addElement(new Attribute("Semantic_relatedness")); // 
            atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 
            atts.addElement(new Attribute("Inverse_Wikip_frequency")); // 
            atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 13

        } else if (i == keyphrasesAtt) {
            if (nominalClassValue) {
                FastVector vals = new FastVector(2);
                vals.addElement("False");
                vals.addElement("True");
                atts.addElement(new Attribute("Keyphrase?", vals));
            } else {
                atts.addElement(new Attribute("Keyphrase?"));
            }
        }
    }

    classifierData = new Instances("ClassifierData", atts, 0);

    classifierData.setClassIndex(numFeatures);

    if (debugMode) {
        System.err.println("--- Converting instances for classifier");
    }
    int totalDocuments = getInputFormat().numInstances();
    // Convert pending input instances into data for classifier
    for (int i = 0; i < totalDocuments; i++) {
        Instance current = getInputFormat().instance(i);

        // Get the key phrases for the document
        String keyphrases = current.stringValue(keyphrasesAtt);
        HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases);

        // Get the phrases for the document
        HashMap<String, Candidate> candidateList = allCandidates.get(current);

        // Compute the feature values for each phrase and
        // add the instance to the data for the classifier
        int countPos = 0;
        int countNeg = 0;

        if (debugMode) {
            System.err
                    .println("--- Computing features for document " + i + " out of " + totalDocuments + "...");
        }

        for (Candidate candidate : candidateList.values()) {

            // ignore all candidates that appear less than a threshold
            if (candidate.getFrequency() < minOccurFrequency) {
                continue;
            }

            // compute feature values
            double[] vals = computeFeatureValues(candidate, true, hashKeyphrases, candidateList);

            if (vals[vals.length - 1] == 0) {
                countNeg++;
            } else {
                countPos++;
            }
            Instance inst = new Instance(current.weight(), vals);
            // System.out.println(candidate + "\t" + inst);
            classifierData.add(inst);

        }
        if (debugMode) {
            System.err.println(countPos + " positive; " + countNeg + " negative instances");
        }
    }

    if (debugMode) {
        System.err.println("--- Building classifier");
    }

    if (classifier == null) {
        // Build classifier
        if (nominalClassValue) {

            //         FilteredClassifier fclass = new FilteredClassifier();
            //         fclass.setClassifier(new NaiveBayesSimple());
            //         fclass.setFilter(new Discretize());
            //         classifier = fclass;

            classifier = new Bagging(); // try also //
            classifier.setOptions(
                    Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2"));

        } else {

            classifier = new Bagging();
            // try also
            // classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W
            // weka.classifiers.trees.J48 -- -U -M 2")) ;
            String optionsString = "-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0";
            String[] options = Utils.splitOptions(optionsString);
            classifier.setOptions(options);

        }
    }
    FileOutputStream out = new FileOutputStream(new File("19docs.arff"));
    PrintWriter printer = new PrintWriter(out);

    printer.write(classifierData.toString());

    printer.close();
    out.close();

    classifier.buildClassifier(classifierData);

    if (debugMode) {
        System.err.println(classifier);
    }

    // Save space
    classifierData = new Instances(classifierData, 0);
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance./*from w ww  . j  a va  2s.  com*/
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        System.err.println("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        System.err.println(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        // Get probability of a phrase being key phrase
        double[] probs = classifier.distributionForInstance(inst);

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex);
                newInst[pos++] = inst.value(idfIndex);
                newInst[pos++] = inst.value(tfidfIndex);
                newInst[pos++] = inst.value(firstOccurIndex);
                newInst[pos++] = inst.value(lastOccurIndex);
                newInst[pos++] = inst.value(spreadOccurIndex);
                newInst[pos++] = inst.value(domainKeyphIndex);
                newInst[pos++] = inst.value(lengthIndex);
                newInst[pos++] = inst.value(generalityIndex);
                newInst[pos++] = inst.value(nodeDegreeIndex);
                newInst[pos++] = inst.value(semRelIndex);
                newInst[pos++] = inst.value(wikipKeyphrIndex);
                newInst[pos++] = inst.value(invWikipFreqIndex);
                newInst[pos++] = inst.value(totalWikipKeyphrIndex);

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }
    }
    if (debugMode) {
        System.err.println(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Sets the class prior probabilities//  ww w. j a  v  a  2s  .  c  o  m
 *
 * @param train the training instances used to determine
 * the prior probabilities
 * @exception Exception if the class attribute of the instances is not
 * set
 */
public void setPriors(Instances train) throws Exception {

    if (!m_ClassIsNominal) {

        m_NumTrainClassVals = 0;
        m_TrainClassVals = null;
        m_TrainClassWeights = null;
        m_PriorErrorEstimator = null;
        m_ErrorEstimator = null;

        for (int i = 0; i < train.numInstances(); i++) {
            Instance currentInst = train.instance(i);
            if (!currentInst.classIsMissing()) {
                addNumericTrainClass(currentInst.classValue(), currentInst.weight());
            }
        }

    } else {
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassPriors[i] = 1;
        }
        m_ClassPriorsSum = m_NumClasses;
        for (int i = 0; i < train.numInstances(); i++) {
            if (!train.instance(i).classIsMissing()) {
                m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight();
                m_ClassPriorsSum += train.instance(i).weight();
            }
        }
    }
}