List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:gr.iti.mklab.visual.quantization.SimpleKMeansWithOutput.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid coordinates based on it's members * (objects assigned to the cluster of the centroid) and the distance function being used. * //from w ww . j a v a2s. com * @param centroidIndex * index of the centroid which the coordinates will be computed * @param members * the objects that are assigned to the cluster of this centroid * @param updateClusterInfo * if the method is supposed to update the m_Cluster arrays * @param addToCentroidInstances * true if the method is to add the computed coordinates to the Instances holding the centroids * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo, boolean addToCentroidInstances) { double[] vals = new double[members.numAttributes()]; // used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { // in case of Euclidian distance the centroid is the mean point // in case of Manhattan distance the centroid is the median point // in both cases, if the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance) { // singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { vals[j] = sortedMembers.kthSmallestValue(j, middle + 1); if (dataIsEven) { vals[j] = (vals[j] + sortedMembers.kthSmallestValue(j, middle + 2)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Utils.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Utils.missingValue(); // mark mean as missing } } } } if (addToCentroidInstances) { m_ClusterCentroids.add(new DenseInstance(1.0, vals)); } return vals; }
From source file:Graphs.Graph.java
public Graph(Instances inst) { this.inst = new Instances(inst); dieknoten = new Knoten[inst.numAttributes()]; for (int i = 0; i < dieknoten.length; i++) { dieknoten[i] = new Knoten(inst.attribute(i).name(), inst.attribute(i).name()); dieknoten[i].setMaxKinder(inst.numAttributes() - 1); }//from w w w .j a va 2 s . com adjMatrix = new int[inst.numAttributes()][inst.numAttributes()]; matrix = new int[inst.numAttributes()][inst.numAttributes()]; linearerepresentation = new int[inst.numAttributes() * inst.numAttributes()]; }
From source file:Graphs.GraphMitAngabeVernetzungsgrad.java
public GraphMitAngabeVernetzungsgrad(Instances inst, int prozent) { this.inst = new Instances(inst); dieknoten = new Knoten[inst.numAttributes()]; for (int i = 0; i < dieknoten.length; i++) { dieknoten[i] = new Knoten(inst.attribute(i).name(), inst.attribute(i).name()); }/*from www .j a va 2 s . co m*/ this.prozent = prozent; adjMatrix = new int[inst.numAttributes()][inst.numAttributes()]; matrix = new int[inst.numAttributes()][inst.numAttributes()]; maxanzahkkanten = (int) Math.ceil(binomischeFormel(inst.numAttributes(), 2) * 2) / 2; for (int i = 0; i < adjMatrix.length; i++) { for (int j = 0; j < adjMatrix[i].length; j++) { if (i == j) { adjMatrix[i][j] = 0; adjMatrix[j][i] = 0; } else { adjMatrix[i][j] = 1; } } } linearerepresentation = new int[inst.numAttributes() * inst.numAttributes()]; }
From source file:guineu.modules.dataanalysis.clustering.ClusteringTask.java
License:Open Source License
/** * Creates the weka data set for clustering of samples * @param rawData Data extracted from selected Raw data files and rows. * @return Weka library data set/*from w w w. j a v a2 s. c om*/ */ private Instances createSampleWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < rawData[0].length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringAlgorithm.toString().equals("Hierarchical Clusterer")) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < rawData.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringAlgorithm.toString().equals("Hierarchical Clusterer")) { values[data.numAttributes() - 1] = data.attribute("name") .addStringValue("\"" + this.selectedRawDataFiles[i] + "\""); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
From source file:guineu.modules.dataanalysis.clustering.ClusteringTask.java
License:Open Source License
/** * Creates the weka data set for clustering of variables (metabolites) * @param rawData Data extracted from selected Raw data files and rows. * @return Weka library data set//from w w w . ja va 2s. c o m */ private Instances createVariableWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < this.selectedRawDataFiles.length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringAlgorithm.toString().equals("Hierarchical Clusterer")) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < selectedRows.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringAlgorithm.toString().equals("Hierarchical Clusterer")) { String rowName = selectedRows[i].getName(); values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * //from ww w. ja va 2 s . com * nMajnMin * @param data * @param i * @return */ protected Instances randomSampling(Instances copia, int majC, int minC, int nMaj, int nMin, Random simplingRandom) { int[] majExamples = new int[copia.numInstances()]; int[] minExamples = new int[copia.numInstances()]; int majCount = 0, minCount = 0; // First, we copy the examples from the minority class and save the indexes of the majority // the new data-set contains samples_min + samples_min * N / 100 int size = nMaj + nMin; //selected = new int[size]; // we store the selected examples indexes String majClassName = copia.attribute(copia.classIndex()).value(majC); Instances myDataset = new Instances(copia, 0); int nData = 0; for (int i = 0; i < copia.numInstances(); i++) { if (copia.instance(i).stringValue(copia.classIndex()).equalsIgnoreCase(majClassName)) { // save index majExamples[majCount] = i; majCount++; } else { minExamples[minCount] = i; minCount++; } } if (minCount <= 0) return copia; /* random undersampling of the majority */ //boolean[] taken = new boolean[copia.numInstances()]; int r; if (nMaj == majCount) { //System.out.println("#equal"); for (int i = 0; i < nMaj; i++) { myDataset.add(copia.instance(majExamples[i])); } } else { for (int i = 0; i < nMaj; i++) { r = simplingRandom.nextInt(majCount); //selected[nData] = majExamples[r]; myDataset.add(copia.instance(majExamples[r])); //taken[majExamples[r]] = true; } } for (int i = 0; i < nMin; i++) { r = simplingRandom.nextInt(minCount); //System.out.print("_"+r); //selected[nData] = minExamples[r]; myDataset.add(copia.instance(minExamples[r])); //taken[minExamples[r]] = true; } //System.out.println(); //System.out.println("minC="+minCount+"; majC="+majCount); myDataset.randomize(simplingRandom); return myDataset; }
From source file:gyc.SMOTEBagging.java
License:Open Source License
/** * /*from w w w . j av a2 s . c o m*/ * 100%majminSMOTE (k, a). * @param data * @param i * @return */ protected Instances randomSampling(Instances copia, int majC, int minC, int a, Random simplingRandom) { int[] majExamples = new int[copia.numInstances()]; int[] minExamples = new int[copia.numInstances()]; int majCount = 0, minCount = 0; // First, we copy the examples from the minority class and save the indexes of the majority // resample min at rate (Nmaj/Nmin)*a% int size = copia.attributeStats(copia.classIndex()).nominalCounts[majC] * a / 100; // class name String majClassName = copia.attribute(copia.classIndex()).value(majC); for (int i = 0; i < copia.numInstances(); i++) { if (copia.instance(i).stringValue(copia.classIndex()).equalsIgnoreCase(majClassName)) { // save index majExamples[majCount] = i; majCount++; } else { minExamples[minCount] = i; minCount++; } } /* random undersampling of the majority */ Instances myDataset = new Instances(copia, 0); int r; //100%majC for (int i = 0; i < majCount; i++) { myDataset.add(copia.instance(majExamples[i])); } if (minCount == 0) return myDataset; //(Nmaj/Nmin)*a% minC for (int i = 0; i < size; i++) { r = simplingRandom.nextInt(minCount); myDataset.add(copia.instance(minExamples[r])); } myDataset.randomize(simplingRandom); if (size == 1) { try { //neighbor Resample filter = new Resample(); filter.setInputFormat(myDataset); filter.setBiasToUniformClass(1.0); filter.setRandomSeed(simplingRandom.nextInt()); myDataset = Filter.useFilter(myDataset, filter); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (size > 1) { try { SMOTE filter = new SMOTE(); filter.setInputFormat(myDataset); // filter capabilities are checked here //data. double value = 100.0 * majCount / size - 100; //Percentage filter.setPercentage(value); //if (nMin<5) filter.setNearestNeighbors(nMin); filter.setRandomSeed(simplingRandom.nextInt()); //filterSMOTESMOTE myDataset = Filter.useFilter(myDataset, filter); //t.stop(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } return myDataset; }
From source file:gyc.UnderOverBoostM1.java
License:Open Source License
/** * /* w ww .j av a 2 s . c o m*/ * nMajnMin * @param data * @param i * @return */ protected Instances randomSampling(Instances copia, int majC, int minC, int a, Random simplingRandom) { int[] majExamples = new int[copia.numInstances()]; int[] minExamples = new int[copia.numInstances()]; int majCount = 0, minCount = 0; // First, we copy the examples from the minority class and save the indexes of the majority // the new data-set contains samples_min + samples_min * N / 100 int size = copia.attributeStats(copia.classIndex()).nominalCounts[majC] * a / 100 * 2; // class name String majClassName = copia.attribute(copia.classIndex()).value(majC); for (int i = 0; i < copia.numInstances(); i++) { if (copia.instance(i).stringValue(copia.classIndex()).equalsIgnoreCase(majClassName)) { // save index majExamples[majCount] = i; majCount++; } else { minExamples[minCount] = i; minCount++; } } /* random undersampling of the majority */ Instances myDataset = new Instances(copia, 0); int r; for (int i = 0; i < size / 2; i++) { r = simplingRandom.nextInt(majCount); myDataset.add(copia.instance(majExamples[r])); if (minCount > 0) { r = simplingRandom.nextInt(minCount); myDataset.add(copia.instance(minExamples[r])); } } myDataset.randomize(simplingRandom); return myDataset; }
From source file:Helper.CustomFilter.java
public Instances convertNumericRange(Instances structure) throws Exception { for (int i = 0; i < structure.numAttributes() - 1; i++) { if (structure.attribute(i).typeToString(structure.attribute(i)).equals("numeric")) { structure.sort(i);//from ww w .j a v a2 s . c o m structure = toRange(structure, i); } } return structure; }
From source file:Helper.CustomFilter.java
private Instances toRange(Instances structure, int index) throws Exception { Attribute attr = structure.attribute(index); Attribute classlabel = structure.attribute(structure.numAttributes() - 1); String label = structure.instance(0).stringValue(classlabel); double threshold = structure.instance(0).value(index); for (int i = 0; i < structure.numInstances(); i++) { if (!structure.instance(i).stringValue(classlabel).equals(label)) { label = structure.instance(i).stringValue(classlabel); threshold = structure.instance(i).value(index); }/* w w w. ja v a 2s .c om*/ structure.instance(i).setValue(attr, threshold); } return structure; }