Example usage for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:DocClassifier.java

public Instances createInstances(File[] files) {
    Instances instances = new Instances("Inst" + files.hashCode(), attrList, files.length);
    for (File file : files) {
        Instance inst = createInstance(file);
        inst.setDataset(instances);
        instances.add(inst);//from w w  w .j  av a 2 s.  c o  m
        instances.setClass((Attribute) attrList.lastElement());
    }
    return instances;
}

From source file:CopiaSeg3.java

public static void main(String[] args) throws Exception {

    BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff");

    Instances data = new Instances(datafile);
    data.setClassIndex(data.numAttributes() - 1);

    // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test)
    Instances[] split = split(data, 4);// w  ww  . j  a  v a2  s  .c  o  m

    // Separa los conjuntos en los arrays trainning y testing
    Instances trainingSplits = split[0];
    Instances testingSplits = split[1];

    // Elegir un conjunto de clasificadores
    Classifier[] models = { new MultilayerPerceptron()
            //, new J48 
            //, ...
    };

    FastVector fvWekaAttributes = new FastVector(9);

    // Ejecutar cada clasificador
    for (int j = 0; j < models.length; j++) {

        // Collect every group of predictions for current model in a FastVector
        FastVector predictions = new FastVector();

        // For each training-testing split pair, train and test the classifier
        Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits);
        predictions.appendElements(validation.predictions());

        // Uncomment to see the summary for each training-testing pair.
        System.out.println(models[j].toString());

        // Calculate overall accuracy of current classifier on all splits
        double accuracy = calculateAccuracy(predictions);

        //            // Print current classifier's name and accuracy in a complicated, but nice-looking way.
        System.out.println(models[j].getClass().getSimpleName() + " Accuracy: "
                + String.format("%.2f%%", accuracy) + "\n=====================");
        //            
        //            // Step 4: use the classifier
        //            // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2:
        //            // Specify that the instance belong to the training set
        //            // in order to inherit from the set description

        Instance iUse = new DenseInstance(9);
        iUse.setValue((Attribute) predictions.elementAt(0), 4);
        iUse.setValue((Attribute) predictions.elementAt(1), 8);
        iUse.setValue((Attribute) predictions.elementAt(2), 8);
        iUse.setValue((Attribute) predictions.elementAt(3), 5);
        iUse.setValue((Attribute) predictions.elementAt(4), 4);
        iUse.setValue((Attribute) predictions.elementAt(5), 5);
        iUse.setValue((Attribute) predictions.elementAt(6), 10);
        iUse.setValue((Attribute) predictions.elementAt(7), 4);
        iUse.setValue((Attribute) predictions.elementAt(8), 1);

        iUse.setDataset(trainingSplits);
        //
        //            // Get the likelihood of each classes
        // fDistribution[0] is the probability of being positive?
        // fDistribution[1] is the probability of being negative?
        double[] fDistribution = models[j].distributionForInstance(iUse);

        System.out.println("Probabilidad positivo: " + fDistribution[0]);
        System.out.println("Probabilidad negativo: " + fDistribution[1]);
    }

}

From source file:WekaRegressor.java

License:Open Source License

@Override
public double regress(DataPoint data) {
    try {//  w  w w .j a  v  a  2 s . co m
        Instance instance = InstanceHandler.dataPointToInstance(data);
        instance.setDataset(wekaDataSet);
        return wekaClassifier.classifyInstance(instance);
    } catch (Exception ex) {
        return Double.NaN;
    }
}

From source file:PredictMention.java

protected void setTestData(String title, String description, String keywords) {
    testData = new Instances(trainedData);
    testData.clear();//ww w.j  a  v a  2s  . c  o m
    Instance inst = new DenseInstance(4);
    inst.setDataset(testData);
    inst.setValue(0, title);
    inst.setValue(1, description);
    inst.setValue(2, keywords);
    inst.setMissing(3);
    testData.add(inst);

}

From source file:CJWeka.java

License:Open Source License

/** Convert a sting of floats separated by spaces into an Instance
 *//*from  w w w .ja  v a 2  s .  co m*/
private Instance floatstringToInst(String floatvalues, Instances ii, boolean hasClass) {
    String[] flostr = floatvalues.split(" ");
    int nvals = flostr.length;
    Instance i = new DenseInstance(nvals);
    int j;

    if (hasClass)
        nvals--;

    for (j = 0; j < nvals; j++) {
        if (!flostr[j].equals("")) {
            Float f = new Float(flostr[j]);
            i.setValue(j, f);
        }
    }

    i.setDataset(ii);

    if (hasClass) {
        Attribute clsAttrib = ii.classAttribute();
        //clsAttrib.addStringValue(flostr[j]);
        i.setValue(clsAttrib, flostr[j]);
    }

    return i;
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Returns the individual predictions of the base classifiers
 * for an instance. Used by StackedMultiClassClassifier.
 * Returns the probability for the second "class" predicted
 * by each base classifier./*from w w  w  . ja v a  2 s .c  om*/
 *
 * @param inst the instance to get the prediction for
 * @return the individual predictions
 * @throws Exception if the predictions can't be computed successfully
 */
public double[] individualPredictions(Instance inst) throws Exception {

    double[] result = null;

    if (m_Classifiers.length == 1) {
        result = new double[1];
        result[0] = m_Classifiers[0].distributionForInstance(inst)[1];
    } else {
        result = new double[m_ClassFilters.length];
        for (int i = 0; i < m_ClassFilters.length; i++) {
            if (m_Classifiers[i] != null) {
                if (m_Method == METHOD_1_AGAINST_1) {
                    Instance tempInst = (Instance) inst.copy();
                    tempInst.setDataset(m_TwoClassDataset);
                    result[i] = m_Classifiers[i].distributionForInstance(tempInst)[1];
                } else {
                    m_ClassFilters[i].input(inst);
                    m_ClassFilters[i].batchFinished();
                    result[i] = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output())[1];
                }
            }
        }
    }
    return result;
}

From source file:MultiClassClassifier.java

License:Open Source License

/**
 * Returns the distribution for an instance.
 *
 * @param inst the instance to get the distribution for
 * @return the distribution/* w w w  .j a  v  a 2s  .c o  m*/
 * @throws Exception if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance inst) throws Exception {

    if (m_Classifiers.length == 1) {
        return m_Classifiers[0].distributionForInstance(inst);
    }

    double[] probs = new double[inst.numClasses()];

    if (m_Method == METHOD_1_AGAINST_1) {
        double[][] r = new double[inst.numClasses()][inst.numClasses()];
        double[][] n = new double[inst.numClasses()][inst.numClasses()];

        for (int i = 0; i < m_ClassFilters.length; i++) {
            if (m_Classifiers[i] != null) {
                Instance tempInst = (Instance) inst.copy();
                tempInst.setDataset(m_TwoClassDataset);
                double[] current = m_Classifiers[i].distributionForInstance(tempInst);
                Range range = new Range(((RemoveWithValues) m_ClassFilters[i]).getNominalIndices());
                range.setUpper(m_ClassAttribute.numValues());
                int[] pair = range.getSelection();
                if (m_pairwiseCoupling && inst.numClasses() > 2) {
                    r[pair[0]][pair[1]] = current[0];
                    n[pair[0]][pair[1]] = m_SumOfWeights[i];
                } else {
                    if (current[0] > current[1]) {
                        probs[pair[0]] += 1.0;
                    } else {
                        probs[pair[1]] += 1.0;
                    }
                }
            }
        }
        if (m_pairwiseCoupling && inst.numClasses() > 2) {
            return pairwiseCoupling(n, r);
        }
    } else {
        // error correcting style methods
        for (int i = 0; i < m_ClassFilters.length; i++) {
            m_ClassFilters[i].input(inst);
            m_ClassFilters[i].batchFinished();
            double[] current = m_Classifiers[i].distributionForInstance(m_ClassFilters[i].output());
            //Calibrate the binary classifier scores

            for (int j = 0; j < m_ClassAttribute.numValues(); j++) {
                if (((MakeIndicator) m_ClassFilters[i]).getValueRange().isInRange(j)) {
                    probs[j] += current[1];
                } else {
                    probs[j] += current[0];
                }
            }
        }
    }

    if (Utils.gr(Utils.sum(probs), 0)) {
        Utils.normalize(probs);
        return probs;
    } else {
        return m_ZeroR.distributionForInstance(inst);
    }
}

From source file:adams.data.conversion.AbstractMatchWekaInstanceAgainstHeader.java

License:Open Source License

/**
 * Matches the input instance against the header.
 *
 * @param input   the Instance to align to the header
 * @return      the aligned Instance/*ww w  . ja va2  s . co  m*/
 */
protected Instance match(Instance input) {
    Instance result;
    double[] values;
    int i;

    values = new double[m_Dataset.numAttributes()];
    for (i = 0; i < m_Dataset.numAttributes(); i++) {
        values[i] = Utils.missingValue();
        switch (m_Dataset.attribute(i).type()) {
        case Attribute.NUMERIC:
        case Attribute.DATE:
            values[i] = input.value(i);
            break;
        case Attribute.NOMINAL:
            if (m_Dataset.attribute(i).indexOfValue(input.stringValue(i)) != -1)
                values[i] = m_Dataset.attribute(i).indexOfValue(input.stringValue(i));
            break;
        case Attribute.STRING:
            values[i] = m_Dataset.attribute(i).addStringValue(input.stringValue(i));
            break;
        case Attribute.RELATIONAL:
            values[i] = m_Dataset.attribute(i).addRelation(input.relationalValue(i));
            break;
        default:
            throw new IllegalStateException(
                    "Unhandled attribute type: " + Attribute.typeToString(m_Dataset.attribute(i).type()));
        }
    }

    if (input instanceof SparseInstance)
        result = new SparseInstance(input.weight(), values);
    else
        result = new DenseInstance(input.weight(), values);
    result.setDataset(m_Dataset);

    // fix class index, if necessary
    if ((input.classIndex() != m_Dataset.classIndex()) && (m_Dataset.classIndex() < 0))
        m_Dataset.setClassIndex(input.classIndex());

    return result;
}

From source file:adams.data.conversion.ReportToWekaInstance.java

License:Open Source License

/**
 * Performs the actual conversion.//www.j a  v a2  s . c om
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
protected Object doConvert() throws Exception {
    Report report;
    Instance result;
    ArrayList atts;
    ArrayList attValues;
    int i;
    double[] values;

    report = (Report) m_Input;

    // generate header
    if (m_Header == null) {
        atts = new ArrayList();
        for (i = 0; i < m_Fields.length; i++) {
            switch (m_Fields[i].getDataType()) {
            case NUMERIC:
                atts.add(new Attribute(m_Fields[i].getName()));
                break;
            case BOOLEAN:
                attValues = new ArrayList();
                attValues.add("false");
                attValues.add("true");
                atts.add(new Attribute(m_Fields[i].getName(), attValues));
                break;
            default:
                atts.add(new Attribute(m_Fields[i].getName(), (List) null));
                break;
            }
        }
        m_Header = new Instances(getClass().getName(), atts, 0);
    }

    // generate instance
    values = new double[m_Header.numAttributes()];
    for (i = 0; i < m_Fields.length; i++) {
        if (report.hasValue(m_Fields[i])) {
            switch (m_Fields[i].getDataType()) {
            case NUMERIC:
                values[i] = report.getDoubleValue(m_Fields[i]);
                break;
            case BOOLEAN:
                if (report.getBooleanValue(m_Fields[i]))
                    values[i] = 1;
                else
                    values[i] = 0;
                break;
            default:
                values[i] = m_Header.attribute(i).addStringValue("" + report.getValue(m_Fields[i]));
                break;
            }
        } else {
            values[i] = weka.core.Utils.missingValue();
        }
    }
    result = new DenseInstance(1.0, values);
    result.setDataset(m_Header);

    return result;
}

From source file:adams.data.featureconverter.Weka.java

License:Open Source License

/**
 * Performs the actual generation of a row from the raw data.
 * //from w w w.ja va2s.  c  o  m
 * @param data   the data of the row, elements can be null (= missing)
 * @return      the dataset structure
 */
@Override
protected Instance doGenerateRow(List<Object> data) {
    Instance result;
    int i;
    Object obj;
    double[] values;

    values = new double[m_Header.numAttributes()];

    for (i = 0; i < data.size(); i++) {
        obj = data.get(i);
        if (obj == null) {
            values[i] = Utils.missingValue();
            continue;
        }
        switch (m_HeaderDefinition.getType(i)) {
        case BOOLEAN:
            values[i] = ((Boolean) obj) ? 0.0 : 1.0;
            break;
        case NUMERIC:
            values[i] = ((Number) obj).doubleValue();
            break;
        case STRING:
        case UNKNOWN:
            values[i] = m_Header.attribute(i).addStringValue(obj.toString());
            break;
        }
    }

    result = new DenseInstance(1.0, values);
    result.setDataset(m_Header);

    return result;
}