Example usage for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:fantail.algorithms.AbstractRanker.java

License:Open Source License

public static double[] getAvgRankValues(Instances data) throws Exception {

    if (data.numInstances() == 0) {
        throw new Exception("data can't be empty.");
    }//from   w  w w. j  a  va  2  s  .  c om
    int numLabels = Tools.getNumberTargets(data);
    double[] avgVals = new double[numLabels];
    for (int m = 0; m < data.numInstances(); m++) {
        Instance inst = data.instance(m);
        double[] targetValues = Tools.getTargetVector(inst);

        for (int j = 0; j < targetValues.length; j++) {
            avgVals[j] += (targetValues[j] * inst.weight());
        }
    }
    for (int i = 0; i < avgVals.length; i++) {
        avgVals[i] /= data.numInstances();
    }
    return avgVals;
}

From source file:fantail.algorithms.RankingWithBinaryPCT.java

License:Open Source License

private double computeVariance(Instances data) throws Exception {
    double[][] targets = new double[data.numInstances()][];
    for (int i = 0; i < data.numInstances(); i++) {
        targets[i] = Tools.getTargetVector(data.instance(i));
    }//from   w w w  .j a v  a 2 s .c o  m
    double sumVar = 0;
    for (int i = 0; i < m_NumTargetLabels; i++) {
        double[] target_i = new double[data.numInstances()];

        for (int j = 0; j < data.numInstances(); j++) {
            Instance metaInst = (Instance) data.instance(j);
            target_i[j] = targets[j][i] * metaInst.weight();
        }
        sumVar += weka.core.Utils.variance(target_i);
    }
    return sumVar / m_NumTargetLabels;
}

From source file:fantail.algorithms.RankingWithkNN.java

License:Open Source License

@Override
public double[] recommendRanking(Instance inst) throws Exception {
    Instances nnbrs = m_kNN.getNearestNeighbourSearchAlgorithm().kNearestNeighbours(inst, m_K);

    double[] predictedRanks = new double[Tools.getNumberTargets(inst)];

    double sumWeights = 0;
    for (int k = 0; k < m_K; k++) {
        Instance nn = (Instance) nnbrs.instance(k);
        sumWeights += nn.weight();
    }//  w w  w  .  ja v  a 2  s .  c  o  m

    for (int k = 0; k < m_K; k++) {
        Instance nn = (Instance) nnbrs.instance(k);
        double[] rankingNN = Tools.getTargetVector(nn);
        for (int j = 0; j < predictedRanks.length; j++) {
            predictedRanks[j] += (rankingNN[j] * nn.weight() / sumWeights);
        }
    }

    return Tools.doubleArrayToRanking(predictedRanks);
}

From source file:faster_pca.faster_pca.java

License:Open Source License

/**
* Transform an instance in original (unormalized) format.
* 
* @param instance an instance in the original (unormalized) format
* @return a transformed instance/*from  w  w  w  . java2 s. c  o m*/
* @throws Exception if instance can't be transformed
*/
protected Instance convertInstance(Instance instance) throws Exception {
    Instance result;
    double[] newVals;
    Instance tempInst;
    double cumulative;
    int i;
    int j;
    double tempval;
    int numAttsLowerBound;

    newVals = new double[m_OutputNumAtts];
    tempInst = (Instance) instance.copy();

    /*m_ReplaceMissingFilter.input(tempInst);
    m_ReplaceMissingFilter.batchFinished();
    tempInst = m_ReplaceMissingFilter.output();*/

    m_NominalToBinaryFilter.input(tempInst);
    m_NominalToBinaryFilter.batchFinished();
    tempInst = m_NominalToBinaryFilter.output();

    if (m_AttributeFilter != null) {
        m_AttributeFilter.input(tempInst);
        m_AttributeFilter.batchFinished();
        tempInst = m_AttributeFilter.output();
    }

    if (!super.getCenterData()) {

        tempInst = f_norm.filter(tempInst);
    } else {

        tempInst = f_center.filter(tempInst);
    }

    if (m_HasClass) {
        newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());
    }

    if (m_MaxAttributes > 0) {
        numAttsLowerBound = m_NumAttribs - m_MaxAttributes;
    } else {
        numAttsLowerBound = 0;
    }
    if (numAttsLowerBound < 0) {
        numAttsLowerBound = 0;
    }

    double tempInstCpy[] = new double[m_NumAttribs];
    for (j = 0; j < m_NumAttribs; j++) {
        tempInstCpy[j] = tempInst.value(j);
    }

    cumulative = 0;
    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {
        tempval = 0.0;
        for (j = 0; j < m_NumAttribs; j++) {
            tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInstCpy[j];
        }

        newVals[m_NumAttribs - i - 1] = tempval;
        cumulative += m_Eigenvalues[m_SortedEigens[i]];
        if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance) {
            break;
        }
    }

    // create instance
    if (instance instanceof SparseInstance) {
        result = new SparseInstance(instance.weight(), newVals);
    } else {
        result = new DenseInstance(instance.weight(), newVals);
    }

    return result;
}

From source file:feature.InfoGainEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all
 * attributes that are numeric.// w w  w  .  j a  v a 2s  .  c o  m
 *
 * @param data
 *            set of instances serving as training data
 * @throws Exception
 *             if the evaluator has not been generated successfully
 */
public double computeInfoGain(Instances data, int att) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att])
            - ContingencyTables.entropyConditionedOnRows(counts[att]));

    return m_InfoGains[att];
}

From source file:feature.InfoGainEval.java

License:Open Source License

public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {// ww  w.  j ava2 s  . c o  m
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i])
                    - ContingencyTables.entropyConditionedOnRows(counts[i]));
        }
    }
}

From source file:ffnn.MultilayerPerceptron.java

License:Open Source License

/**
 * This function sets what the m_numeric flag to represent the passed class it
 * also performs the normalization of the attributes if applicable and sets up
 * the info to normalize the class. (note that regardless of the options it
 * will fill an array with the range and base, set to normalize all attributes
 * and the class to be between -1 and 1)
 * /*from www  . java  2 s . c om*/
 * @param inst the instances.
 * @return The modified instances. This needs to be done. If the attributes
 *         are normalized then deep copies will be made of all the instances
 *         which will need to be passed back out.
 */
private Instances setClassType(Instances inst) throws Exception {
    if (inst != null) {
        // x bounds
        m_attributeRanges = new double[inst.numAttributes()];
        m_attributeBases = new double[inst.numAttributes()];
        for (int noa = 0; noa < inst.numAttributes(); noa++) {
            double min = Double.POSITIVE_INFINITY;
            double max = Double.NEGATIVE_INFINITY;
            for (int i = 0; i < inst.numInstances(); i++) {
                if (!inst.instance(i).isMissing(noa)) {
                    double value = inst.instance(i).value(noa);
                    if (value < min) {
                        min = value;
                    }
                    if (value > max) {
                        max = value;
                    }
                }
            }
            m_attributeRanges[noa] = (max - min) / 2;
            m_attributeBases[noa] = (max + min) / 2;
        }

        if (m_normalizeAttributes) {
            for (int i = 0; i < inst.numInstances(); i++) {
                Instance currentInstance = inst.instance(i);
                double[] instance = new double[inst.numAttributes()];
                for (int noa = 0; noa < inst.numAttributes(); noa++) {
                    if (noa != inst.classIndex()) {
                        if (m_attributeRanges[noa] != 0) {
                            instance[noa] = (currentInstance.value(noa) - m_attributeBases[noa])
                                    / m_attributeRanges[noa];
                        } else {
                            instance[noa] = currentInstance.value(noa) - m_attributeBases[noa];
                        }
                    } else {
                        instance[noa] = currentInstance.value(noa);
                    }
                }
                inst.set(i, new DenseInstance(currentInstance.weight(), instance));
            }
        }

        if (inst.classAttribute().isNumeric()) {
            m_numeric = true;
        } else {
            m_numeric = false;
        }
    }
    return inst;
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Builds the classifier./*from  www .  java  2s.  co m*/
 */
private void buildClassifier() throws Exception {

    // Generate input format for classifier
    FastVector atts = new FastVector();
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (i == documentAtt) {
            atts.addElement(new Attribute("Term_frequency")); // 2
            atts.addElement(new Attribute("IDF")); // 
            atts.addElement(new Attribute("TFxIDF")); // 
            atts.addElement(new Attribute("First_occurrence")); // 
            atts.addElement(new Attribute("Last_occurrence")); // 
            atts.addElement(new Attribute("Spread")); // 
            atts.addElement(new Attribute("Domain_keyphraseness")); // 
            atts.addElement(new Attribute("Length")); //
            atts.addElement(new Attribute("Generality")); //
            atts.addElement(new Attribute("Node_degree")); // 
            atts.addElement(new Attribute("Semantic_relatedness")); // 
            atts.addElement(new Attribute("Wikipedia_keyphraseness")); // 
            atts.addElement(new Attribute("Inverse_Wikip_frequency")); // 
            atts.addElement(new Attribute("Total_Wikip_keyphraseness")); // 13

        } else if (i == keyphrasesAtt) {
            if (nominalClassValue) {
                FastVector vals = new FastVector(2);
                vals.addElement("False");
                vals.addElement("True");
                atts.addElement(new Attribute("Keyphrase?", vals));
            } else {
                atts.addElement(new Attribute("Keyphrase?"));
            }
        }
    }

    classifierData = new Instances("ClassifierData", atts, 0);

    classifierData.setClassIndex(numFeatures);

    if (debugMode) {
        System.err.println("--- Converting instances for classifier");
    }
    int totalDocuments = getInputFormat().numInstances();
    // Convert pending input instances into data for classifier
    for (int i = 0; i < totalDocuments; i++) {
        Instance current = getInputFormat().instance(i);

        // Get the key phrases for the document
        String keyphrases = current.stringValue(keyphrasesAtt);
        HashMap<String, Counter> hashKeyphrases = getGivenKeyphrases(keyphrases);

        // Get the phrases for the document
        HashMap<String, Candidate> candidateList = allCandidates.get(current);

        // Compute the feature values for each phrase and
        // add the instance to the data for the classifier
        int countPos = 0;
        int countNeg = 0;

        if (debugMode) {
            System.err
                    .println("--- Computing features for document " + i + " out of " + totalDocuments + "...");
        }

        for (Candidate candidate : candidateList.values()) {

            // ignore all candidates that appear less than a threshold
            if (candidate.getFrequency() < minOccurFrequency) {
                continue;
            }

            // compute feature values
            double[] vals = computeFeatureValues(candidate, true, hashKeyphrases, candidateList);

            if (vals[vals.length - 1] == 0) {
                countNeg++;
            } else {
                countPos++;
            }
            Instance inst = new Instance(current.weight(), vals);
            // System.out.println(candidate + "\t" + inst);
            classifierData.add(inst);

        }
        if (debugMode) {
            System.err.println(countPos + " positive; " + countNeg + " negative instances");
        }
    }

    if (debugMode) {
        System.err.println("--- Building classifier");
    }

    if (classifier == null) {
        // Build classifier
        if (nominalClassValue) {

            //         FilteredClassifier fclass = new FilteredClassifier();
            //         fclass.setClassifier(new NaiveBayesSimple());
            //         fclass.setFilter(new Discretize());
            //         classifier = fclass;

            classifier = new Bagging(); // try also //
            classifier.setOptions(
                    Utils.splitOptions("-P 10 -S 1 -I 10 -W weka.classifiers.trees.J48 -- -U -M 2"));

        } else {

            classifier = new Bagging();
            // try also
            // classifier.setOptions(Utils.splitOptions("-P 10 -S 1 -I 10 -W
            // weka.classifiers.trees.J48 -- -U -M 2")) ;
            String optionsString = "-P 100 -S 1 -I 10 -W weka.classifiers.trees.M5P -- -U -M 7.0";
            String[] options = Utils.splitOptions(optionsString);
            classifier.setOptions(options);

        }
    }
    FileOutputStream out = new FileOutputStream(new File("19docs.arff"));
    PrintWriter printer = new PrintWriter(out);

    printer.write(classifierData.toString());

    printer.close();
    out.close();

    classifier.buildClassifier(classifierData);

    if (debugMode) {
        System.err.println(classifier);
    }

    // Save space
    classifierData = new Instances(classifierData, 0);
}

From source file:filters.MauiFilter.java

License:Open Source License

/**
 * Converts an instance./*from w ww  . j  a va  2s.  com*/
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    String fileName = instance.stringValue(fileNameAtt);

    if (debugMode) {
        System.err.println("-- Converting instance for document " + fileName);
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;

    if (!instance.isMissing(keyphrasesAtt)) {
        String keyphrases = instance.stringValue(keyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases);
    }

    // Get the document text
    String documentText = instance.stringValue(documentAtt);

    // Compute the candidate topics
    HashMap<String, Candidate> candidateList;
    if (allCandidates != null && allCandidates.containsKey(instance)) {
        candidateList = allCandidates.get(instance);
    } else {
        candidateList = getCandidates(documentText);
    }
    if (debugMode) {
        System.err.println(candidateList.size() + " candidates ");
    }

    // Set indices for key attributes
    int tfidfAttIndex = documentAtt + 2;
    int distAttIndex = documentAtt + 3;
    int probsAttIndex = documentAtt + numFeatures;

    int countPos = 0;
    int countNeg = 0;

    // Go through the phrases and convert them into instances
    for (Candidate candidate : candidateList.values()) {

        if (candidate.getFrequency() < minOccurFrequency) {
            continue;
        }

        String name = candidate.getName();
        String orig = candidate.getBestFullForm();
        if (!vocabularyName.equals("none")) {
            orig = candidate.getTitle();
        }

        double[] vals = computeFeatureValues(candidate, training, hashKeyphrases, candidateList);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(classifierData);

        // Get probability of a phrase being key phrase
        double[] probs = classifier.distributionForInstance(inst);

        double prob = probs[0];
        if (nominalClassValue) {
            prob = probs[1];
        }

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures + 2];

        int pos = 0;
        for (int i = 1; i < instance.numAttributes(); i++) {

            if (i == documentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(name);
                newInst[pos++] = index;

                // Add original version
                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(name);
                }

                newInst[pos++] = index;

                // Add features
                newInst[pos++] = inst.value(tfIndex);
                newInst[pos++] = inst.value(idfIndex);
                newInst[pos++] = inst.value(tfidfIndex);
                newInst[pos++] = inst.value(firstOccurIndex);
                newInst[pos++] = inst.value(lastOccurIndex);
                newInst[pos++] = inst.value(spreadOccurIndex);
                newInst[pos++] = inst.value(domainKeyphIndex);
                newInst[pos++] = inst.value(lengthIndex);
                newInst[pos++] = inst.value(generalityIndex);
                newInst[pos++] = inst.value(nodeDegreeIndex);
                newInst[pos++] = inst.value(semRelIndex);
                newInst[pos++] = inst.value(wikipKeyphrIndex);
                newInst[pos++] = inst.value(invWikipFreqIndex);
                newInst[pos++] = inst.value(totalWikipKeyphrIndex);

                // Add probability
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == keyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }

        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);

        if (inst.classValue() == 0) {
            countNeg++;
        } else {
            countPos++;
        }
    }
    if (debugMode) {
        System.err.println(countPos + " positive; " + countNeg + " negative instances");
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and
        // continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current
        // phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }

    return vector;
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Sets the class prior probabilities//  ww w. j a  v  a  2s  .  c  o  m
 *
 * @param train the training instances used to determine
 * the prior probabilities
 * @exception Exception if the class attribute of the instances is not
 * set
 */
public void setPriors(Instances train) throws Exception {

    if (!m_ClassIsNominal) {

        m_NumTrainClassVals = 0;
        m_TrainClassVals = null;
        m_TrainClassWeights = null;
        m_PriorErrorEstimator = null;
        m_ErrorEstimator = null;

        for (int i = 0; i < train.numInstances(); i++) {
            Instance currentInst = train.instance(i);
            if (!currentInst.classIsMissing()) {
                addNumericTrainClass(currentInst.classValue(), currentInst.weight());
            }
        }

    } else {
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassPriors[i] = 1;
        }
        m_ClassPriorsSum = m_NumClasses;
        for (int i = 0; i < train.numInstances(); i++) {
            if (!train.instance(i).classIsMissing()) {
                m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight();
                m_ClassPriorsSum += train.instance(i).weight();
            }
        }
    }
}