List of usage examples for weka.core Instances Instances
public Instances(Instances dataset)
From source file:de.unimannheim.dws.algorithms.CustomSimpleKMedian.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * //from w w w. ja v a 2s.c o m * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; // used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance || m_DistanceFunction instanceof CustomPairWiseDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { // in case of Euclidian distance the centroid is the mean point // in case of Manhattan distance the centroid is the median point // in both cases, if the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance || m_DistanceFunction instanceof CustomPairWiseDistance) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { vals[j] = sortedMembers.kthSmallestValue(j, middle + 1); if (dataIsEven) { vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Instance.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Instance.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) { m_ClusterCentroids.add(new Instance(1.0, vals)); } return vals; }
From source file:decisiontree.MyC45.java
public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // handle instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass();/* w ww. java 2 s . c o m*/ // handle missing values Instances processedInstances = handleMissingValues(instances); makeTree(processedInstances); }
From source file:decisiontree.MyID3.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass();/*from ww w . j a v a 2 s . co m*/ makeTree(data); }
From source file:decisiontreeclassifier.ITree2.java
/******************************************************************** * Adds children nodes with all the necessary data to the Node * parameter/* w w w . j a v a 2s . c o m*/ ********************************************************************/ public void addTreeNode(int attIndex, Node theNode) { double range = findRange(attIndex, theNode); double increment = range / binNum; double lowest = findLowest(attIndex, theNode); int numInstances = theNode.dataSet.numInstances(); for (int i = 0; i < binNum; i++) { Node child; Instances InstCopy = new Instances(theNode.dataSet); ArrayList UFcopy = (ArrayList<Integer>) theNode.usedFeatures.clone(); if (i == 0) { child = new Node(InstCopy, UFcopy, 0, Double.NEGATIVE_INFINITY, (lowest + increment), attIndex, iTree.root); } else if (i == (binNum - 1)) { child = new Node(InstCopy, UFcopy, i, (lowest + (i * increment)), Double.POSITIVE_INFINITY, attIndex, iTree.root); } else { child = new Node(InstCopy, UFcopy, i, (lowest + (increment * i)), (lowest + (increment * (i + 1))), attIndex, iTree.root); } theNode.addChild(child); } filterInstances(theNode, attIndex); }
From source file:detplagiasi.EMClustering.java
EMClustering() { addd = ct.getAddress();/*from ww w . j ava 2 s . c o m*/ try { ClusterEvaluation eval; Instances data; String[] options; DensityBasedClusterer cl; File he = getArffFile(); data = new Instances(new BufferedReader(new FileReader(he))); System.out.println("-----EM Clustering-----"); // normal try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) { out.write("\r\n--> normal\r\n"); options = new String[2]; options[0] = "-t"; options[1] = he.getAbsolutePath(); out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new EM(), options) + "\r\n"); out.write("\r\n"); // manual call out.write("\n--> manual\r\n"); cl = new EM(); out.write("\r\n"); cl.buildClusterer(data); getDataUji(); getDataTraining(); System.out.println("jumlah kluster = " + cl.numberOfClusters()); noClusterUji = cl.clusterInstance(dataUji.instance(0)); totalCluster = cl.numberOfClusters(); System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0))); for (int b = 0; b < dataTraining.numInstances(); b++) { System.out.print("file " + td.fileName[b] + " termasuk cluster ke "); array1[b] = td.fileName[b]; array2[b] = cl.clusterInstance(dataTraining.instance(b)); System.out.println(cl.clusterInstance(dataTraining.instance(b))); //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui } out.write("\r\n"); eval = new ClusterEvaluation(); eval.setClusterer(cl); eval.evaluateClusterer(new Instances(data)); out.write("\r\n\n# of clusters: " + eval.getNumClusters()); } catch (Exception e) { System.err.println(e.getMessage()); System.out.println("error2 em cluster"); } } catch (IOException ex) { Logger.getLogger(EMClustering.class.getName()).log(Level.SEVERE, null, ex); System.out.println("errorrrr null em"); } }
From source file:detplagiasi.KMeansClustering.java
KMeansClustering() { addd = Container.getAddress(); try {/*from ww w .j a v a2s .c o m*/ ClusterEvaluation eval; Instances data; String[] options; SimpleKMeans cl; File he = getArffFile(); data = new Instances(new BufferedReader(new FileReader(he))); System.out.println("-----KMeans Clustering-----"); // normal try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) { out.write("\r\n--> normal\r\n"); options = new String[2]; options[0] = "-t"; options[1] = he.getAbsolutePath(); out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n"); out.write("\r\n"); // manual call out.write("\n--> manual\r\n"); cl = new SimpleKMeans(); cl.setNumClusters(4); out.write("\r\n"); cl.buildClusterer(data); getDataUji(); System.out.println("jumlah kluster = " + cl.numberOfClusters()); System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0))); noClusterUji = cl.clusterInstance(dataUji.instance(0)); totalCluster = cl.numberOfClusters(); for (int b = 0; b < dataTraining.numInstances(); b++) { System.out.print("file " + td.fileName[b] + " termasuk cluster ke "); System.out.println(cl.clusterInstance(dataTraining.instance(b))); array1[b] = td.fileName[b]; array2[b] = cl.clusterInstance(dataTraining.instance(b)); //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui } out.write("\r\n"); eval = new ClusterEvaluation(); eval.setClusterer(cl); eval.evaluateClusterer(new Instances(data)); out.write("\r\n\n# of clusters: " + eval.getNumClusters()); } catch (Exception e) { System.err.println(e.getMessage()); System.out.println("error2 kmeans cluster"); } } catch (IOException ex) { Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex); System.out.println("errorrrr null kmeans"); } }
From source file:development.CrossValidateShapelets.java
public static void splitTrainData(Instances train, Instances[] trainFolds, Instances[] testFolds, int folds) { int size = train.numInstances(); int foldSize = size / folds; int[] foldCV = new int[folds]; for (int i = 0; i < foldCV.length; i++) foldCV[i] = foldSize;/*from w w w . j av a 2 s . co m*/ if (size % folds != 0) //Adjust the last fold size accordingly foldCV[folds - 1] = size - foldSize * (folds - 1); int diff = foldCV[folds - 1] - foldSize; int c = 0; while (diff > 0) { //Reassign elements to other folds foldCV[c % (folds - 1)]++; foldCV[folds - 1]--; diff = foldCV[folds - 1] - foldCV[c % (folds - 1)]; c++; } Instances copy = new Instances(train); int start = 0; for (int i = 0; i < folds; i++) { trainFolds[i] = new Instances(copy, 0); testFolds[i] = new Instances(copy, 0); for (int j = 0; j < train.numInstances(); j++) { if (j < start || j >= start + foldCV[i]) trainFolds[i].add(train.instance(j)); else testFolds[i].add(train.instance(j)); } start += foldCV[i]; } }
From source file:distributions.ClassdistributionNominal.java
/** Konstruktor /* ww w. ja va 2 s. com*/ * @param inst * @param classID * @param kID */ public ClassdistributionNominal(Instances inst, int classID, int kID) { this.inst = new Instances(inst); this.classID = classID; this.kID = kID; conditionalprobs = new double[inst.attribute(kID).numValues()]; attindexen = new int[inst.attribute(kID).numValues()]; for (int i = 0; i < attindexen.length; i++) { attindexen[i] = i; } }
From source file:distributions.ClassdistributionNumeric.java
/** * * @param inst//from w w w . j a va 2 s.c om * @param classID * @param kID */ public ClassdistributionNumeric(Instances inst, int classID, int kID) { this.inst = new Instances(inst); this.classID = classID; this.kID = kID; attwerten = new double[inst.numInstances()]; for (int k = 0; k < inst.numInstances(); k++) { attwerten[k] = inst.instance(k).value(kID); } attwerten = super.EliminiereDopelt(attwerten); /*for(int j=0;j<attwerten.length;j++){ for (int i = 0; i < inst.numInstances(); i++) { attwerten[(int) inst.instance(i).value(kID)] = inst.instance(i).value(kID); } }*/ probs = new double[attwerten.length]; }
From source file:distributions.NominalDistribution.java
public NominalDistribution(Instances inst, int kID) { this.inst = new Instances(inst); this.kID = kID; probs = new double[inst.attribute(kID).numValues()]; attwerten1 = new int[inst.attribute(kID).numValues()]; for (int i = 0; i < attwerten1.length; i++) { attwerten1[i] = i;/*from www. ja v a2 s .co m*/ } }