Example usage for weka.core Instances Instances

List of usage examples for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset) 

Source Link

Document

Constructor copying all instances and references to the header information from the given set of instances.

Usage

From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid
 * coordinates based on it's members (objects assigned to the cluster of the
 * centroid) and the distance function being used.
 * //from  w  w w. ja v a  2s.c o  m
 * @param centroidIndex index of the centroid which the coordinates will be
 *          computed
 * @param members the objects that are assigned to the cluster of this
 *          centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster
 *          arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    // used only for Manhattan Distance
    Instances sortedMembers = null;
    int middle = 0;
    boolean dataIsEven = false;

    if (m_DistanceFunction instanceof ManhattanDistance
            || m_DistanceFunction instanceof CustomPairWiseDistance) {
        middle = (members.numInstances() - 1) / 2;
        dataIsEven = ((members.numInstances() % 2) == 0);
        if (m_PreserveOrder) {
            sortedMembers = members;
        } else {
            sortedMembers = new Instances(members);
        }
    }

    for (int j = 0; j < members.numAttributes(); j++) {

        // in case of Euclidian distance the centroid is the mean point
        // in case of Manhattan distance the centroid is the median point
        // in both cases, if the attribute is nominal, the centroid is the mode
        if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        } else if (m_DistanceFunction instanceof ManhattanDistance
                || m_DistanceFunction instanceof CustomPairWiseDistance) {
            // singleton special case
            if (members.numInstances() == 1) {
                vals[j] = members.instance(0).value(j);
            } else {
                vals[j] = sortedMembers.kthSmallestValue(j, middle + 1);
                if (dataIsEven) {
                    vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2;
                }
            }
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Instance.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Instance.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo) {
        m_ClusterCentroids.add(new Instance(1.0, vals));
    }
    return vals;
}

From source file:decisiontree.MyC45.java

public void buildClassifier(Instances instances) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // handle instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();/*  w  ww. java 2 s .  c o  m*/

    // handle missing values
    Instances processedInstances = handleMissingValues(instances);

    makeTree(processedInstances);
}

From source file:decisiontree.MyID3.java

@Override
public void buildClassifier(Instances data) throws Exception {
    getCapabilities().testWithFail(data);
    data = new Instances(data);
    data.deleteWithMissingClass();/*from ww w  . j a v  a  2  s .  co m*/
    makeTree(data);
}

From source file:decisiontreeclassifier.ITree2.java

/********************************************************************
 * Adds children nodes with all the necessary data to the Node 
 * parameter/* w w  w . j  a  v  a  2s . c  o m*/
 ********************************************************************/
public void addTreeNode(int attIndex, Node theNode) {
    double range = findRange(attIndex, theNode);
    double increment = range / binNum;
    double lowest = findLowest(attIndex, theNode);
    int numInstances = theNode.dataSet.numInstances();

    for (int i = 0; i < binNum; i++) {
        Node child;
        Instances InstCopy = new Instances(theNode.dataSet);
        ArrayList UFcopy = (ArrayList<Integer>) theNode.usedFeatures.clone();
        if (i == 0) {
            child = new Node(InstCopy, UFcopy, 0, Double.NEGATIVE_INFINITY, (lowest + increment), attIndex,
                    iTree.root);
        } else if (i == (binNum - 1)) {
            child = new Node(InstCopy, UFcopy, i, (lowest + (i * increment)), Double.POSITIVE_INFINITY,
                    attIndex, iTree.root);
        } else {
            child = new Node(InstCopy, UFcopy, i, (lowest + (increment * i)), (lowest + (increment * (i + 1))),
                    attIndex, iTree.root);
        }
        theNode.addChild(child);
    }

    filterInstances(theNode, attIndex);
}

From source file:detplagiasi.EMClustering.java

EMClustering() {
    addd = ct.getAddress();/*from   ww  w  . j ava  2 s  . c  o m*/

    try {
        ClusterEvaluation eval;
        Instances data;
        String[] options;
        DensityBasedClusterer cl;

        File he = getArffFile();
        data = new Instances(new BufferedReader(new FileReader(he)));
        System.out.println("-----EM Clustering-----");
        // normal
        try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) {
            out.write("\r\n--> normal\r\n");
            options = new String[2];
            options[0] = "-t";
            options[1] = he.getAbsolutePath();
            out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new EM(), options) + "\r\n");
            out.write("\r\n");

            // manual call
            out.write("\n--> manual\r\n");
            cl = new EM();
            out.write("\r\n");
            cl.buildClusterer(data);
            getDataUji();
            getDataTraining();
            System.out.println("jumlah kluster = " + cl.numberOfClusters());
            noClusterUji = cl.clusterInstance(dataUji.instance(0));
            totalCluster = cl.numberOfClusters();
            System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0)));
            for (int b = 0; b < dataTraining.numInstances(); b++) {
                System.out.print("file " + td.fileName[b] + " termasuk cluster ke ");
                array1[b] = td.fileName[b];
                array2[b] = cl.clusterInstance(dataTraining.instance(b));

                System.out.println(cl.clusterInstance(dataTraining.instance(b)));
                //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui
            }

            out.write("\r\n");

            eval = new ClusterEvaluation();
            eval.setClusterer(cl);
            eval.evaluateClusterer(new Instances(data));
            out.write("\r\n\n# of clusters: " + eval.getNumClusters());

        } catch (Exception e) {
            System.err.println(e.getMessage());
            System.out.println("error2 em cluster");
        }

    } catch (IOException ex) {
        Logger.getLogger(EMClustering.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("errorrrr null em");
    }
}

From source file:detplagiasi.KMeansClustering.java

KMeansClustering() {
    addd = Container.getAddress();
    try {/*from  ww w  .j a v a2s .c o  m*/
        ClusterEvaluation eval;
        Instances data;
        String[] options;
        SimpleKMeans cl;

        File he = getArffFile();
        data = new Instances(new BufferedReader(new FileReader(he)));
        System.out.println("-----KMeans Clustering-----");
        // normal
        try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) {
            out.write("\r\n--> normal\r\n");
            options = new String[2];
            options[0] = "-t";
            options[1] = he.getAbsolutePath();
            out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n");
            out.write("\r\n");

            // manual call
            out.write("\n--> manual\r\n");
            cl = new SimpleKMeans();
            cl.setNumClusters(4);
            out.write("\r\n");
            cl.buildClusterer(data);
            getDataUji();
            System.out.println("jumlah kluster = " + cl.numberOfClusters());
            System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0)));
            noClusterUji = cl.clusterInstance(dataUji.instance(0));
            totalCluster = cl.numberOfClusters();
            for (int b = 0; b < dataTraining.numInstances(); b++) {
                System.out.print("file " + td.fileName[b] + " termasuk cluster ke ");
                System.out.println(cl.clusterInstance(dataTraining.instance(b)));
                array1[b] = td.fileName[b];
                array2[b] = cl.clusterInstance(dataTraining.instance(b));
                //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui
            }

            out.write("\r\n");

            eval = new ClusterEvaluation();
            eval.setClusterer(cl);
            eval.evaluateClusterer(new Instances(data));
            out.write("\r\n\n# of clusters: " + eval.getNumClusters());

        } catch (Exception e) {
            System.err.println(e.getMessage());
            System.out.println("error2 kmeans cluster");
        }

    } catch (IOException ex) {
        Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("errorrrr null kmeans");
    }
}

From source file:development.CrossValidateShapelets.java

public static void splitTrainData(Instances train, Instances[] trainFolds, Instances[] testFolds, int folds) {
    int size = train.numInstances();
    int foldSize = size / folds;
    int[] foldCV = new int[folds];
    for (int i = 0; i < foldCV.length; i++)
        foldCV[i] = foldSize;/*from   w w  w .  j  av  a 2 s  .  co m*/
    if (size % folds != 0) //Adjust the last fold size accordingly
        foldCV[folds - 1] = size - foldSize * (folds - 1);
    int diff = foldCV[folds - 1] - foldSize;
    int c = 0;
    while (diff > 0) { //Reassign elements to other folds

        foldCV[c % (folds - 1)]++;
        foldCV[folds - 1]--;
        diff = foldCV[folds - 1] - foldCV[c % (folds - 1)];
        c++;
    }
    Instances copy = new Instances(train);
    int start = 0;
    for (int i = 0; i < folds; i++) {
        trainFolds[i] = new Instances(copy, 0);
        testFolds[i] = new Instances(copy, 0);
        for (int j = 0; j < train.numInstances(); j++) {
            if (j < start || j >= start + foldCV[i])
                trainFolds[i].add(train.instance(j));
            else
                testFolds[i].add(train.instance(j));
        }
        start += foldCV[i];
    }
}

From source file:distributions.ClassdistributionNominal.java

/**
Konstruktor /*  ww  w.  ja  va 2 s. com*/
     * @param inst
     * @param classID
     * @param kID
*/
public ClassdistributionNominal(Instances inst, int classID, int kID) {
    this.inst = new Instances(inst);
    this.classID = classID;
    this.kID = kID;
    conditionalprobs = new double[inst.attribute(kID).numValues()];
    attindexen = new int[inst.attribute(kID).numValues()];
    for (int i = 0; i < attindexen.length; i++) {
        attindexen[i] = i;
    }

}

From source file:distributions.ClassdistributionNumeric.java

/**
 *
 * @param inst//from w  w w  . j  a va  2  s.c  om
 * @param classID
 * @param kID
 */
public ClassdistributionNumeric(Instances inst, int classID, int kID) {

    this.inst = new Instances(inst);

    this.classID = classID;
    this.kID = kID;

    attwerten = new double[inst.numInstances()];
    for (int k = 0; k < inst.numInstances(); k++) {
        attwerten[k] = inst.instance(k).value(kID);
    }

    attwerten = super.EliminiereDopelt(attwerten);
    /*for(int j=0;j<attwerten.length;j++){
     for (int i = 0; i < inst.numInstances(); i++) {
               
            
     attwerten[(int) inst.instance(i).value(kID)] = inst.instance(i).value(kID);
     }
            
     }*/

    probs = new double[attwerten.length];

}

From source file:distributions.NominalDistribution.java

public NominalDistribution(Instances inst, int kID) {
    this.inst = new Instances(inst);
    this.kID = kID;
    probs = new double[inst.attribute(kID).numValues()];
    attwerten1 = new int[inst.attribute(kID).numValues()];
    for (int i = 0; i < attwerten1.length; i++) {
        attwerten1[i] = i;/*from   www. ja v  a2 s .co  m*/
    }

}