Example usage for weka.core DenseInstance DenseInstance

List of usage examples for weka.core DenseInstance DenseInstance

Introduction

In this page you can find the example usage for weka.core DenseInstance DenseInstance.

Prototype



public DenseInstance(int numAttributes) 

Source Link

Document

Constructor of an instance that sets weight to one, all values to be missing, and the reference to the dataset to null.

Usage

From source file:CopiaSeg3.java

public static void main(String[] args) throws Exception {

    BufferedReader datafile = readDataFile("breast-cancer-wisconsin.arff");

    Instances data = new Instances(datafile);
    data.setClassIndex(data.numAttributes() - 1);

    // Elije el nmero de particiones para la valicacin (4 = 75% Train, 25% Test)
    Instances[] split = split(data, 4);//from  w ww .  j  a v  a2  s. c  om

    // Separa los conjuntos en los arrays trainning y testing
    Instances trainingSplits = split[0];
    Instances testingSplits = split[1];

    // Elegir un conjunto de clasificadores
    Classifier[] models = { new MultilayerPerceptron()
            //, new J48 
            //, ...
    };

    FastVector fvWekaAttributes = new FastVector(9);

    // Ejecutar cada clasificador
    for (int j = 0; j < models.length; j++) {

        // Collect every group of predictions for current model in a FastVector
        FastVector predictions = new FastVector();

        // For each training-testing split pair, train and test the classifier
        Evaluation validation = simpleClassify(models[j], trainingSplits, testingSplits);
        predictions.appendElements(validation.predictions());

        // Uncomment to see the summary for each training-testing pair.
        System.out.println(models[j].toString());

        // Calculate overall accuracy of current classifier on all splits
        double accuracy = calculateAccuracy(predictions);

        //            // Print current classifier's name and accuracy in a complicated, but nice-looking way.
        System.out.println(models[j].getClass().getSimpleName() + " Accuracy: "
                + String.format("%.2f%%", accuracy) + "\n=====================");
        //            
        //            // Step 4: use the classifier
        //            // For real world applications, the actual use of the classifier is the ultimate goal. Heres the simplest way to achieve that. Lets say weve built an instance (named iUse) as explained in step 2:
        //            // Specify that the instance belong to the training set
        //            // in order to inherit from the set description

        Instance iUse = new DenseInstance(9);
        iUse.setValue((Attribute) predictions.elementAt(0), 4);
        iUse.setValue((Attribute) predictions.elementAt(1), 8);
        iUse.setValue((Attribute) predictions.elementAt(2), 8);
        iUse.setValue((Attribute) predictions.elementAt(3), 5);
        iUse.setValue((Attribute) predictions.elementAt(4), 4);
        iUse.setValue((Attribute) predictions.elementAt(5), 5);
        iUse.setValue((Attribute) predictions.elementAt(6), 10);
        iUse.setValue((Attribute) predictions.elementAt(7), 4);
        iUse.setValue((Attribute) predictions.elementAt(8), 1);

        iUse.setDataset(trainingSplits);
        //
        //            // Get the likelihood of each classes
        // fDistribution[0] is the probability of being positive?
        // fDistribution[1] is the probability of being negative?
        double[] fDistribution = models[j].distributionForInstance(iUse);

        System.out.println("Probabilidad positivo: " + fDistribution[0]);
        System.out.println("Probabilidad negativo: " + fDistribution[1]);
    }

}

From source file:PredictMention.java

protected void setTestData(String title, String description, String keywords) {
    testData = new Instances(trainedData);
    testData.clear();/* ww  w . j av  a  2s  . c  om*/
    Instance inst = new DenseInstance(4);
    inst.setDataset(testData);
    inst.setValue(0, title);
    inst.setValue(1, description);
    inst.setValue(2, keywords);
    inst.setMissing(3);
    testData.add(inst);

}

From source file:CJWeka.java

License:Open Source License

/** Convert a sting of floats separated by spaces into an Instance
 *//*from w  ww.j  a  v  a  2  s . c om*/
private Instance floatstringToInst(String floatvalues, Instances ii, boolean hasClass) {
    String[] flostr = floatvalues.split(" ");
    int nvals = flostr.length;
    Instance i = new DenseInstance(nvals);
    int j;

    if (hasClass)
        nvals--;

    for (j = 0; j < nvals; j++) {
        if (!flostr[j].equals("")) {
            Float f = new Float(flostr[j]);
            i.setValue(j, f);
        }
    }

    i.setDataset(ii);

    if (hasClass) {
        Attribute clsAttrib = ii.classAttribute();
        //clsAttrib.addStringValue(flostr[j]);
        i.setValue(clsAttrib, flostr[j]);
    }

    return i;
}

From source file:PCADetector.java

License:Apache License

public Instances getInstances() {
    int numAtts = m_oriDataMatrix.size();
    if (numAtts < 0)
        return null;
    ArrayList<Attribute> atts = new ArrayList<Attribute>(numAtts);
    for (int att = 0; att < numAtts; att++) {
        atts.add(new Attribute(Integer.toString(att), att));
    }//from  ww  w  . j a va  2 s .c  om
    int numInstances = m_oriDataMatrix.get(0).size();
    if (numInstances <= 0)
        return null;
    Instances dataset = new Instances("MetricInstances", atts, numInstances);
    for (int inst = 0; inst < numInstances; inst++) {
        Instance newInst = new DenseInstance(numAtts);
        for (int att = 0; att < numAtts; att++) {
            newInst.setValue(att, m_oriDataMatrix.get(att).get(inst));
        }
        dataset.add(newInst);
    }
    return dataset;
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Merges the datasets based on the collected IDs.
 *
 * @param orig   the original datasets/*www  .j a  v a 2 s.  com*/
 * @param inst   the processed datasets to merge into one
 * @param ids      the IDs for identifying the rows
 * @return      the merged dataset
 */
protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) {
    Instances result;
    ArrayList<Attribute> atts;
    int i;
    int n;
    int m;
    int index;
    String relation;
    List sortedIDs;
    Attribute att;
    int[] indexStart;
    double value;
    double[] values;
    HashMap<Integer, Integer> hashmap;
    HashSet<Instance> hs;

    // create header
    if (isLoggingEnabled())
        getLogger().info("Creating merged header...");
    atts = new ArrayList<>();
    relation = "";
    indexStart = new int[inst.length];
    for (i = 0; i < inst.length; i++) {
        indexStart[i] = atts.size();
        for (n = 0; n < inst[i].numAttributes(); n++)
            atts.add((Attribute) inst[i].attribute(n).copy());
        // assemble relation name
        if (i > 0)
            relation += "_";
        relation += inst[i].relationName();
    }
    result = new Instances(relation, atts, ids.size());

    // fill with missing values
    if (isLoggingEnabled())
        getLogger().info("Filling with missing values...");
    for (i = 0; i < ids.size(); i++) {
        if (isStopped())
            return null;
        // progress
        if (isLoggingEnabled() && ((i + 1) % 1000 == 0))
            getLogger().info("" + (i + 1));
        result.add(new DenseInstance(result.numAttributes()));
    }

    // sort IDs
    if (isLoggingEnabled())
        getLogger().info("Sorting indices...");
    sortedIDs = new ArrayList(ids);
    Collections.sort(sortedIDs);

    // generate rows
    hashmap = new HashMap<>();
    for (i = 0; i < inst.length; i++) {
        if (isStopped())
            return null;
        if (isLoggingEnabled())
            getLogger().info("Adding file #" + (i + 1));
        att = orig[i].attribute(m_UniqueID);
        for (n = 0; n < inst[i].numInstances(); n++) {
            // progress
            if (isLoggingEnabled() && ((n + 1) % 1000 == 0))
                getLogger().info("" + (n + 1));

            // determine index of row
            if (m_AttType == Attribute.NUMERIC)
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att));
            else
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att));
            if (index < 0)
                throw new IllegalStateException(
                        "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!");

            if (!hashmap.containsKey(index))
                hashmap.put(index, 0);
            hashmap.put(index, hashmap.get(index) + 1);

            // use internal representation for faster access
            values = result.instance(index).toDoubleArray();

            // add attribute values
            for (m = 0; m < inst[i].numAttributes(); m++) {
                // missing value?
                if (inst[i].instance(n).isMissing(m))
                    continue;

                switch (inst[i].attribute(m).type()) {
                case Attribute.NUMERIC:
                case Attribute.DATE:
                case Attribute.NOMINAL:
                    values[indexStart[i] + m] = inst[i].instance(n).value(m);
                    break;

                case Attribute.STRING:
                    value = result.attribute(indexStart[i] + m)
                            .addStringValue(inst[i].instance(n).stringValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                case Attribute.RELATIONAL:
                    value = result.attribute(indexStart[i] + m)
                            .addRelation(inst[i].instance(n).relationalValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                default:
                    throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type());
                }
            }

            // update row
            result.set(index, new DenseInstance(1.0, values));
        }
    }

    if (getRemove()) {
        hs = new HashSet<>();
        for (Integer x : hashmap.keySet()) {
            if (hashmap.get(x) != inst.length)
                hs.add(result.get(x));
        }
        result.removeAll(hs);
    }

    return result;
}

From source file:adams.ml.data.InstancesView.java

License:Open Source License

/**
 * Appends a row to the spreadsheet./*from   ww w  .j  av a2s .c o  m*/
 *
 * @return      the created row
 */
@Override
public DataRow addRow() {
    DenseInstance inst;

    inst = new DenseInstance(getColumnCount());
    inst.setDataset(m_Data);
    m_Data.add(inst);

    return new InstanceView(this, inst);
}

From source file:adams.ml.data.InstancesView.java

License:Open Source License

/**
 * Inserts a row at the specified location.
 *
 * @param index   the index where to insert the row
 * @return      the created row/*  w  w w .j  ava  2  s. co  m*/
 */
@Override
public DataRow insertRow(int index) {
    DenseInstance inst;

    inst = new DenseInstance(getColumnCount());
    inst.setDataset(m_Data);
    m_Data.add(index, inst);

    return new InstanceView(this, inst);
}

From source file:br.fapesp.myutils.MyUtils.java

License:Open Source License

/**
 * Generates a Gaussian data set with K clusters and m dimensions
 * //from   w  ww.j  a  v  a2 s . c om
 * @param centers
 *            K x m matrix
 * @param sigmas
 *            K x m matrix
 * @param pointsPerCluster
 *            number of points per cluster
 * @param seed
 *            for the RNG
 * @param randomize
 *            should the order of the instances be randomized?
 * @param supervised
 *            should class label be present? if true, the class is the m+1
 *            attribute
 * 
 * @return
 */
public static Instances genGaussianDataset(double[][] centers, double[][] sigmas, int pointsPerCluster,
        long seed, boolean randomize, boolean supervised) {
    Random r = new Random(seed);

    int K = centers.length; // number of clusters
    int m = centers[0].length; // number of dimensions

    FastVector atts = new FastVector(m);
    for (int i = 0; i < m; i++)
        atts.addElement(new Attribute("at" + i));

    if (supervised) {
        FastVector cls = new FastVector(K);
        for (int i = 0; i < K; i++)
            cls.addElement("Gauss-" + i);
        atts.addElement(new Attribute("Class", cls));
    }

    Instances data;
    if (supervised)
        data = new Instances(K + "-Gaussians-supervised", atts, K * pointsPerCluster);
    else
        data = new Instances(K + "-Gaussians", atts, K * pointsPerCluster);

    if (supervised)
        data.setClassIndex(m);

    Instance ith;

    for (int i = 0; i < K; i++) {
        for (int j = 0; j < pointsPerCluster; j++) {
            if (!supervised)
                ith = new DenseInstance(m);
            else
                ith = new DenseInstance(m + 1);
            ith.setDataset(data);
            for (int k = 0; k < m; k++)
                ith.setValue(k, centers[i][k] + (r.nextGaussian() * sigmas[i][k]));
            if (supervised)
                ith.setValue(m, "Gauss-" + i);
            data.add(ith);
        }
    }

    // run randomization filter if desired
    if (randomize)
        data.randomize(r);

    return data;
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(Object objData) {

    try {//from   w  ww.  j a  va 2 s. co  m
        Instances dataInstance;
        DataBag values = (DataBag) objData;
        int numAttributes = values.iterator().next().size(); // N_Features + 1 Class
        int bagSize = 0; // To set the number of train samples

        // To find the number of samples (instances in a bag)
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            it.next();
            bagSize = bagSize + 1;
        }

        // Code for find the different classes names in the input 
        String[] inputClass = new String[bagSize]; // String vector with the samples class's names
        int index = 0;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple tuple = it.next();
            inputClass[index] = DataType.toString(tuple.get(numAttributes - 1));
            index = index + 1;
        }

        HashSet classSet = new HashSet(Arrays.asList(inputClass));

        String[] classValue = (String[]) classSet.toArray(new String[0]);
        // To set the classes names in the attribute for the instance

        FastVector classNames = new FastVector();
        for (int i = 0; i < classValue.length; i++)
            classNames.addElement(classValue[i]);

        // Creating the instance model N_Features + 1_ClassNames

        FastVector atts = new FastVector();
        for (int i = 0; i < numAttributes - 1; i++)
            atts.addElement(new Attribute("att" + i));
        dataInstance = new Instances("MyRelation", atts, numAttributes);
        dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);

        // To set the instance values for the dataInstance model created 
        Instance tmpData = new DenseInstance(numAttributes);
        index = 0;
        for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
            Tuple tuple = it.next();
            for (int i = 0; i < numAttributes - 1; i++)
                tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(tuple.get(i)));
            //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
            dataInstance.add(tmpData);
            dataInstance.instance(index).setValue(numAttributes - 1,
                    DataType.toString(tuple.get(numAttributes - 1)));
            index = index + 1;
        }

        // Setting the class index
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);

        return dataInstance;
    } catch (Exception e) {
        System.err.println("Failed to process input; error - " + e.getMessage());
        return null;
    }
}

From source file:br.puc_rio.ele.lvc.interimage.datamining.DataParser.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public Instances parseData(BufferedReader buff) {

    try {/* ww  w.  j a va 2s. c  o m*/
        Instances dataInstance;
        //DataBag values = (DataBag)objData;

        int numAttributes = 0; // N_Features + 1 Class

        List<String> inputClass = new ArrayList<String>();

        List<String[]> dataset = new ArrayList<String[]>();

        // To find the number of samples (instances in a bag)
        String line;
        while ((line = buff.readLine()) != null) {
            if (!line.isEmpty()) {
                String[] data = line.split(",");
                if (numAttributes == 0)
                    numAttributes = data.length;
                inputClass.add(data[data.length - 1]);
                dataset.add(data);
            }
        }

        HashSet classSet = new HashSet(inputClass);

        String[] classValue = (String[]) classSet.toArray(new String[0]);
        // To set the classes names in the attribute for the instance

        FastVector classNames = new FastVector();
        for (int i = 0; i < classValue.length; i++)
            classNames.addElement(classValue[i]);

        // Creating the instance model N_Features + 1_ClassNames

        FastVector atts = new FastVector();
        for (int i = 0; i < numAttributes - 1; i++)
            atts.addElement(new Attribute("att" + i));
        dataInstance = new Instances("MyRelation", atts, numAttributes);
        dataInstance.insertAttributeAt(new Attribute("ClassNames", classNames), numAttributes - 1);

        // To set the instance values for the dataInstance model created 
        Instance tmpData = new DenseInstance(numAttributes);
        int index = 0;
        for (int k = 0; k < dataset.size(); k++) {

            for (int i = 0; i < numAttributes - 1; i++)
                tmpData.setValue((weka.core.Attribute) atts.elementAt(i), DataType.toDouble(dataset.get(k)[i]));
            //tmpData.setValue((weka.core.Attribute) atts.elementAt(numAttributes-1), DataType.toString(tuple.get(numAttributes-1)));
            dataInstance.add(tmpData);
            dataInstance.instance(index).setValue(numAttributes - 1,
                    DataType.toString(dataset.get(k)[numAttributes - 1]));
            index = index + 1;
        }

        // Setting the class index
        dataInstance.setClassIndex(dataInstance.numAttributes() - 1);

        return dataInstance;
    } catch (Exception e) {
        System.err.println("Failed to process input; error - " + e.getMessage());
        return null;
    }
}