List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:adams.ml.data.WekaConverter.java
License:Open Source License
/** * Turns an ADAMS dataset row into a Weka Instance. * * @param data the dataset to use as template * @param row the row to convert// w w w. ja va2 s. c o m * @return the generated instance * @throws Exception if conversion fails */ public static Instance toInstance(Instances data, Row row) throws Exception { Instance result; double[] values; int i; Cell cell; Attribute att; values = new double[data.numAttributes()]; for (i = 0; i < data.numAttributes(); i++) { values[i] = Utils.missingValue(); if (!row.hasCell(i)) continue; cell = row.getCell(i); if (cell.isMissing()) continue; att = data.attribute(i); switch (att.type()) { case Attribute.NUMERIC: values[i] = cell.toDouble(); break; case Attribute.DATE: values[i] = cell.toAnyDateType().getTime(); break; case Attribute.NOMINAL: values[i] = att.indexOfValue(cell.getContent()); break; case Attribute.STRING: values[i] = att.addStringValue(cell.getContent()); break; default: throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att)); } } result = new DenseInstance(1.0, values); result.setDataset(data); return result; }
From source file:adams.opt.cso.HermioneSimple.java
License:Open Source License
/** * For testing only./*from w ww .ja v a2 s. c o m*/ * * @param args the dataset to use * @throws Exception if something fails */ public static void main(String[] args) throws Exception { Environment.setEnvironmentClass(Environment.class); Instances data = DataSource.read(args[0]); if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); MaxIterationsWithoutImprovement stopping = new MaxIterationsWithoutImprovement(); stopping.setNumIterations(2); stopping.setMinimumImprovement(0.001); stopping.setLoggingLevel(LoggingLevel.INFO); HermioneSimple simple = new HermioneSimple(); simple.setEvalParallel(true); simple.setMeasure(Measure.CC); simple.setStopping(stopping); simple.setLoggingLevel(LoggingLevel.INFO); simple.setInstances(data); /* simple.setClassifier(new GPD()); simple.setHandlers(new AbstractCatSwarmOptimizationDiscoveryHandler[]{ new GPDGamma(), new GPDNoise(), }); */ LinearRegressionJ cls = new LinearRegressionJ(); cls.setEliminateColinearAttributes(false); cls.setAttributeSelectionMethod( new SelectedTag(LinearRegressionJ.SELECTION_NONE, LinearRegressionJ.TAGS_SELECTION)); simple.setClassifier(new LinearRegressionJ()); GenericDouble ridge = new GenericDouble(); ridge.setClassname(new BaseClassname(cls.getClass())); ridge.setProperty("ridge"); ridge.setMinimum(1e-8); ridge.setMaximum(1); simple.setHandlers(new AbstractCatSwarmOptimizationDiscoveryHandler[] { ridge, }); DoubleMatrix best = simple.run(); System.out.println(best); }
From source file:adaptedClusteringAlgorithms.MyFarthestFirst.java
License:Open Source License
protected void initMinMax(Instances data) { m_Min = new double[data.numAttributes()]; m_Max = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { m_Min[i] = m_Max[i] = Double.NaN; }/*from w ww . j a va2s .co m*/ for (int i = 0; i < data.numInstances(); i++) { updateMinMax(data.instance(i)); } }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options./*from w ww.j av a2 s . c om*/ * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } // Save memory!! m_DistanceFunction.clean(); if (!SESAME.SESAME_GUI) MyFirstClusterer.weka_gui = true; }
From source file:adaptedClusteringAlgorithms.MySimpleKMeans.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid * coordinates based on it's members (objects assigned to the cluster of the * centroid) and the distance function being used. * /* w w w. j a v a 2 s .c o m*/ * @param centroidIndex index of the centroid which the coordinates will be * computed * @param members the objects that are assigned to the cluster of this * centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster * arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; for (int j = 0; j < members.numAttributes(); j++) { // The centroid is the mean point. If the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof ChEBIInd || m_DistanceFunction instanceof ChEBIDir || m_DistanceFunction instanceof GOInd || m_DistanceFunction instanceof GODir || m_DistanceFunction instanceof GOChEBIInd || m_DistanceFunction instanceof GOChEBIDir || m_DistanceFunction instanceof CalculusInd || m_DistanceFunction instanceof CalculusDir || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Instance.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Instance.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) { m_ClusterCentroids.add(new Instance(1.0, vals)); } return vals; }
From source file:affective.core.ArffLexiconEvaluator.java
License:Open Source License
/** * Processes all the dictionary files.//from w ww . ja va 2 s. c om * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances = new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); List<Attribute> numericAttributes = new ArrayList<Attribute>(); List<Attribute> nominalAttributes = new ArrayList<Attribute>(); // checks all numeric and nominal attributes and discards the word attribute for (int i = 0; i < lexInstances.numAttributes(); i++) { if (i != this.lexiconWordIndex.getIndex()) { if (lexInstances.attribute(i).isNumeric()) { numericAttributes.add(lexInstances.attribute(i)); // adds the attribute name to the message-level features to be calculated this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name()); } else if (lexInstances.attribute(i).isNominal()) { nominalAttributes.add(lexInstances.attribute(i)); // adds the attribute name together with the nominal value to the message-level features to be calculated int numValues = lexInstances.attribute(i).numValues(); for (int j = 0; j < numValues; j++) this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name() + "-" + lexInstances.attribute(i).value(j)); } } } // Maps all words with their affective scores discarding missing values for (Instance inst : lexInstances) { if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) { String word = inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word = this.m_stemmer.stem(word); // map numeric scores if (!numericAttributes.isEmpty()) { Map<String, Double> wordVals = new HashMap<String, Double>(); for (Attribute na : numericAttributes) { if (!weka.core.Utils.isMissingValue(inst.value(na))) wordVals.put(na.name(), inst.value(na)); } this.numDict.put(word, wordVals); } // map nominal associations if (!nominalAttributes.isEmpty()) { Map<String, String> wordCounts = new HashMap<String, String>(); for (Attribute no : nominalAttributes) { if (!weka.core.Utils.isMissingValue(inst.value(no))) { wordCounts.put(no.name(), no.value((int) inst.value(no))); } this.nomDict.put(word, wordCounts); } } } } }
From source file:affective.core.ArffLexiconWordLabeller.java
License:Open Source License
/** * Processes all the dictionary files./*from ww w. j a v a 2s . c om*/ * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances = new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); // checks all numeric and nominal attributes and discards the word attribute for (int i = 0; i < lexInstances.numAttributes(); i++) { if (i != this.lexiconWordIndex.getIndex()) { if (lexInstances.attribute(i).isNumeric() || lexInstances.attribute(i).isNominal()) { this.attributes.add(lexInstances.attribute(i)); } } } // Maps all words with their affective scores discarding missing values for (Instance inst : lexInstances) { if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) { String word = inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word = this.m_stemmer.stem(word); // map numeric scores if (!attributes.isEmpty()) { Map<Attribute, Double> wordVals = new HashMap<Attribute, Double>(); for (Attribute na : attributes) { wordVals.put(na, inst.value(na)); } this.attValMap.put(word, wordVals); } } } }
From source file:agnes.AgnesMain.java
public static Instances loadData(String filePath) { BufferedReader reader;//from w w w .j a va 2 s. co m Instances data = null; try { reader = new BufferedReader(new FileReader(filePath)); data = new Instances(reader); reader.close(); data.setClassIndex(data.numAttributes() - 1); } catch (Exception e) { } return data; }
From source file:ai.BalancedRandomForest.java
License:GNU General Public License
/** * Build Balanced Random Forest//from w ww . ja v a 2 s .c om */ public void buildClassifier(final Instances data) throws Exception { // If number of features is 0 then set it to log2 of M (number of attributes) if (numFeatures < 1) numFeatures = (int) Utils.log2(data.numAttributes()) + 1; // Check maximum number of random features if (numFeatures >= data.numAttributes()) numFeatures = data.numAttributes() - 1; // Initialize array of trees tree = new BalancedRandomTree[numTrees]; // total number of instances final int numInstances = data.numInstances(); // total number of classes final int numClasses = data.numClasses(); final ArrayList<Integer>[] indexSample = new ArrayList[numClasses]; for (int i = 0; i < numClasses; i++) indexSample[i] = new ArrayList<Integer>(); //System.out.println("numClasses = " + numClasses); // fill indexSample with the indices of each class for (int i = 0; i < numInstances; i++) { //System.out.println("data.get("+i+").classValue() = " + data.get(i).classValue()); indexSample[(int) data.get(i).classValue()].add(i); } final Random random = new Random(seed); // Executor service to run concurrent trees final ExecutorService exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); List<Future<BalancedRandomTree>> futures = new ArrayList<Future<BalancedRandomTree>>(numTrees); final boolean[][] inBag = new boolean[numTrees][numInstances]; try { for (int i = 0; i < numTrees; i++) { final ArrayList<Integer> bagIndices = new ArrayList<Integer>(); // Randomly select the indices in a balanced way for (int j = 0; j < numInstances; j++) { // Select first the class final int randomClass = random.nextInt(numClasses); // Select then a random sample of that class final int randomSample = random.nextInt(indexSample[randomClass].size()); bagIndices.add(indexSample[randomClass].get(randomSample)); inBag[i][indexSample[randomClass].get(randomSample)] = true; } // Create random tree final Splitter splitter = new Splitter( new GiniFunction(numFeatures, data.getRandomNumberGenerator(random.nextInt()))); futures.add(exe.submit(new Callable<BalancedRandomTree>() { public BalancedRandomTree call() { return new BalancedRandomTree(data, bagIndices, splitter); } })); } // Grab all trained trees before proceeding for (int treeIdx = 0; treeIdx < numTrees; treeIdx++) tree[treeIdx] = futures.get(treeIdx).get(); // Calculate out of bag error final boolean numeric = data.classAttribute().isNumeric(); List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances()); for (int i = 0; i < data.numInstances(); i++) { VotesCollector aCollector = new VotesCollector(tree, i, data, inBag); votes.add(exe.submit(aCollector)); } double outOfBagCount = 0.0; double errorSum = 0.0; for (int i = 0; i < data.numInstances(); i++) { double vote = votes.get(i).get(); // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } outOfBagError = errorSum / outOfBagCount; } catch (Exception ex) { ex.printStackTrace(); } finally { exe.shutdownNow(); } }
From source file:ai.GiniFunction.java
License:GNU General Public License
/** * Create split function based on Gini coefficient * /*from w w w . j a v a 2 s . com*/ * @param data original data * @param indices indices of the samples to use */ public void init(Instances data, ArrayList<Integer> indices) { if (indices.size() == 0) { this.index = 0; this.threshold = 0; this.allSame = true; return; } final int len = data.numAttributes(); final int numElements = indices.size(); final int numClasses = data.numClasses(); final int classIndex = data.classIndex(); /** Attribute-class pair comparator (by attribute value) */ final Comparator<AttributeClassPair> comp = new Comparator<AttributeClassPair>() { public int compare(AttributeClassPair o1, AttributeClassPair o2) { final double diff = o2.attributeValue - o1.attributeValue; if (diff < 0) return 1; else if (diff == 0) return 0; else return -1; } public boolean equals(Object o) { return false; } }; // Create and shuffle indices of features to use ArrayList<Integer> allIndices = new ArrayList<Integer>(); for (int i = 0; i < len; i++) if (i != classIndex) allIndices.add(i); double minimumGini = Double.MAX_VALUE; for (int i = 0; i < numOfFeatures; i++) { // Select the random feature final int index = random.nextInt(allIndices.size()); final int featureToUse = allIndices.get(index); allIndices.remove(index); // remove that element to prevent from repetitions // Get the smallest Gini coefficient // Create list with pairs attribute-class final ArrayList<AttributeClassPair> list = new ArrayList<AttributeClassPair>(); for (int j = 0; j < numElements; j++) { final Instance ins = data.get(indices.get(j)); list.add(new AttributeClassPair(ins.value(featureToUse), (int) ins.value(classIndex))); } // Sort pairs in increasing order Collections.sort(list, comp); final double[] probLeft = new double[numClasses]; final double[] probRight = new double[numClasses]; // initial probabilities (all samples on the right) for (int n = 0; n < list.size(); n++) probRight[list.get(n).classValue]++; // Try all splitting points, from position 0 to the end for (int splitPoint = 0; splitPoint < numElements; splitPoint++) { // Calculate Gini coefficient double giniLeft = 0; double giniRight = 0; final int rightNumElements = numElements - splitPoint; for (int nClass = 0; nClass < numClasses; nClass++) { // left set double prob = probLeft[nClass]; // Divide by the number of elements to get probabilities if (splitPoint != 0) prob /= (double) splitPoint; giniLeft += prob * prob; // right set prob = probRight[nClass]; // Divide by the number of elements to get probabilities if (rightNumElements != 0) prob /= (double) rightNumElements; giniRight += prob * prob; } // Total Gini value final double gini = ((1.0 - giniLeft) * splitPoint + (1.0 - giniRight) * rightNumElements) / (double) numElements; // Save values of minimum Gini coefficient if (gini < minimumGini) { minimumGini = gini; this.index = featureToUse; this.threshold = list.get(splitPoint).attributeValue; } // update probabilities for next iteration probLeft[list.get(splitPoint).classValue]++; probRight[list.get(splitPoint).classValue]--; } } // free list of possible indices to help garbage collector //allIndices.clear(); //allIndices = null; }