List of usage examples for weka.core Instances get
@Override
publicInstance get(int index)
From source file:AaronTest.ShapeletTransformExperiments.java
public static boolean AreInstancesEqual(Instances a, Instances b) { for (int i = 0; i < a.size(); i++) { double distance = a.get(i).value(0) - b.get(i).value(0); if (distance != 0) { return false; }/* w w w . j a v a 2 s .co m*/ } return true; }
From source file:activeSegmentation.feature.FeatureExtraction.java
License:Open Source License
/** * Add training samples from a rectangular roi * //from w w w . j a va 2 s. c o m * @param trainingData set of instances to add to * @param classIndex class index value * @param sliceNum number of 2d slice being processed * @param r shape roi * @return number of instances added */ private Instances addRectangleRoiInstances(int sliceNum, List<String> classLabels, int classes) { Instances testingData; ArrayList<Attribute> attributes = createFeatureHeader(); attributes.add(new Attribute(Common.CLASS, addClasstoHeader(classes, classLabels))); System.out.println(attributes.toString()); // create initial set of instances testingData = new Instances(Common.INSTANCE_NAME, attributes, 1); // Set the index of the class attribute testingData.setClassIndex(filterManager.getNumOfFeatures()); for (int x = 0; x < originalImage.getWidth(); x++) { for (int y = 0; y < originalImage.getHeight(); y++) { testingData.add(filterManager.createInstance(x, y, 0, sliceNum)); } } // increase number of instances for this class System.out.println("SIZe" + testingData.size()); System.out.println(testingData.get(1).toString()); return testingData; }
From source file:adams.flow.template.MyTransformer.java
License:Open Source License
/** * Counts the number of missing values in each row * /* w w w.ja v a 2 s . c om*/ * @return array with the number of missing values in each row */ private int[] getMissingValuesByRow(Instances data) { int numRows = data.size(); int numColumns = data.numAttributes(); int[] missingValues = new int[numRows]; for (int i = 0; i < numRows; i++) for (int j = 0; j < numColumns; j++) if (data.get(i).isMissing(j)) missingValues[i]++; return missingValues; }
From source file:adams.flow.template.MyTransformer.java
License:Open Source License
/** * Counts the number of missing values in each column * //from w ww .j a v a 2 s . com * @return array with the number of missing values in each column */ private int[] getMissingValuesByColumn(Instances data) { int numRows = data.size(); int numColumns = data.numAttributes(); int[] missingValues = new int[numColumns]; for (int i = 0; i < numRows; i++) for (int j = 0; j < numColumns; j++) if (data.get(i).isMissing(j)) missingValues[j]++; return missingValues; }
From source file:adams.flow.transformer.WekaInstancesMerge.java
License:Open Source License
/** * Merges the datasets based on the collected IDs. * * @param orig the original datasets//from w w w . ja v a2 s.c o m * @param inst the processed datasets to merge into one * @param ids the IDs for identifying the rows * @return the merged dataset */ protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) { Instances result; ArrayList<Attribute> atts; int i; int n; int m; int index; String relation; List sortedIDs; Attribute att; int[] indexStart; double value; double[] values; HashMap<Integer, Integer> hashmap; HashSet<Instance> hs; // create header if (isLoggingEnabled()) getLogger().info("Creating merged header..."); atts = new ArrayList<>(); relation = ""; indexStart = new int[inst.length]; for (i = 0; i < inst.length; i++) { indexStart[i] = atts.size(); for (n = 0; n < inst[i].numAttributes(); n++) atts.add((Attribute) inst[i].attribute(n).copy()); // assemble relation name if (i > 0) relation += "_"; relation += inst[i].relationName(); } result = new Instances(relation, atts, ids.size()); // fill with missing values if (isLoggingEnabled()) getLogger().info("Filling with missing values..."); for (i = 0; i < ids.size(); i++) { if (isStopped()) return null; // progress if (isLoggingEnabled() && ((i + 1) % 1000 == 0)) getLogger().info("" + (i + 1)); result.add(new DenseInstance(result.numAttributes())); } // sort IDs if (isLoggingEnabled()) getLogger().info("Sorting indices..."); sortedIDs = new ArrayList(ids); Collections.sort(sortedIDs); // generate rows hashmap = new HashMap<>(); for (i = 0; i < inst.length; i++) { if (isStopped()) return null; if (isLoggingEnabled()) getLogger().info("Adding file #" + (i + 1)); att = orig[i].attribute(m_UniqueID); for (n = 0; n < inst[i].numInstances(); n++) { // progress if (isLoggingEnabled() && ((n + 1) % 1000 == 0)) getLogger().info("" + (n + 1)); // determine index of row if (m_AttType == Attribute.NUMERIC) index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att)); else index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att)); if (index < 0) throw new IllegalStateException( "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!"); if (!hashmap.containsKey(index)) hashmap.put(index, 0); hashmap.put(index, hashmap.get(index) + 1); // use internal representation for faster access values = result.instance(index).toDoubleArray(); // add attribute values for (m = 0; m < inst[i].numAttributes(); m++) { // missing value? if (inst[i].instance(n).isMissing(m)) continue; switch (inst[i].attribute(m).type()) { case Attribute.NUMERIC: case Attribute.DATE: case Attribute.NOMINAL: values[indexStart[i] + m] = inst[i].instance(n).value(m); break; case Attribute.STRING: value = result.attribute(indexStart[i] + m) .addStringValue(inst[i].instance(n).stringValue(m)); values[indexStart[i] + m] = value; break; case Attribute.RELATIONAL: value = result.attribute(indexStart[i] + m) .addRelation(inst[i].instance(n).relationalValue(m)); values[indexStart[i] + m] = value; break; default: throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type()); } } // update row result.set(index, new DenseInstance(1.0, values)); } } if (getRemove()) { hs = new HashSet<>(); for (Integer x : hashmap.keySet()) { if (hashmap.get(x) != inst.length) hs.add(result.get(x)); } result.removeAll(hs); } return result; }
From source file:ai.BalancedRandomForest.java
License:GNU General Public License
/** * Build Balanced Random Forest/* www .ja va 2 s . co m*/ */ public void buildClassifier(final Instances data) throws Exception { // If number of features is 0 then set it to log2 of M (number of attributes) if (numFeatures < 1) numFeatures = (int) Utils.log2(data.numAttributes()) + 1; // Check maximum number of random features if (numFeatures >= data.numAttributes()) numFeatures = data.numAttributes() - 1; // Initialize array of trees tree = new BalancedRandomTree[numTrees]; // total number of instances final int numInstances = data.numInstances(); // total number of classes final int numClasses = data.numClasses(); final ArrayList<Integer>[] indexSample = new ArrayList[numClasses]; for (int i = 0; i < numClasses; i++) indexSample[i] = new ArrayList<Integer>(); //System.out.println("numClasses = " + numClasses); // fill indexSample with the indices of each class for (int i = 0; i < numInstances; i++) { //System.out.println("data.get("+i+").classValue() = " + data.get(i).classValue()); indexSample[(int) data.get(i).classValue()].add(i); } final Random random = new Random(seed); // Executor service to run concurrent trees final ExecutorService exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); List<Future<BalancedRandomTree>> futures = new ArrayList<Future<BalancedRandomTree>>(numTrees); final boolean[][] inBag = new boolean[numTrees][numInstances]; try { for (int i = 0; i < numTrees; i++) { final ArrayList<Integer> bagIndices = new ArrayList<Integer>(); // Randomly select the indices in a balanced way for (int j = 0; j < numInstances; j++) { // Select first the class final int randomClass = random.nextInt(numClasses); // Select then a random sample of that class final int randomSample = random.nextInt(indexSample[randomClass].size()); bagIndices.add(indexSample[randomClass].get(randomSample)); inBag[i][indexSample[randomClass].get(randomSample)] = true; } // Create random tree final Splitter splitter = new Splitter( new GiniFunction(numFeatures, data.getRandomNumberGenerator(random.nextInt()))); futures.add(exe.submit(new Callable<BalancedRandomTree>() { public BalancedRandomTree call() { return new BalancedRandomTree(data, bagIndices, splitter); } })); } // Grab all trained trees before proceeding for (int treeIdx = 0; treeIdx < numTrees; treeIdx++) tree[treeIdx] = futures.get(treeIdx).get(); // Calculate out of bag error final boolean numeric = data.classAttribute().isNumeric(); List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances()); for (int i = 0; i < data.numInstances(); i++) { VotesCollector aCollector = new VotesCollector(tree, i, data, inBag); votes.add(exe.submit(aCollector)); } double outOfBagCount = 0.0; double errorSum = 0.0; for (int i = 0; i < data.numInstances(); i++) { double vote = votes.get(i).get(); // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } outOfBagError = errorSum / outOfBagCount; } catch (Exception ex) { ex.printStackTrace(); } finally { exe.shutdownNow(); } }
From source file:ai.BalancedRandomTree.java
License:GNU General Public License
/** * Create random tree (non-recursively)// ww w. j a v a2 s. c o m * * @param data original data * @param indices indices of the samples to use * @param depth starting depth * @param splitFnProducer split function producer * @return root node */ private InteriorNode createTree(final Instances data, final ArrayList<Integer> indices, final int depth, final Splitter splitFnProducer) { int maxDepth = depth; // Create root node InteriorNode root = new InteriorNode(depth, splitFnProducer.getSplitFunction(data, indices)); // Create list of nodes to process and add the root to it final LinkedList<InteriorNode> remainingNodes = new LinkedList<InteriorNode>(); remainingNodes.add(root); // Create list of indices to process (it must match all the time with the node list) final LinkedList<ArrayList<Integer>> remainingIndices = new LinkedList<ArrayList<Integer>>(); remainingIndices.add(indices); // While there is still nodes to process while (!remainingNodes.isEmpty()) { final InteriorNode currentNode = remainingNodes.removeLast(); final ArrayList<Integer> currentIndices = remainingIndices.removeLast(); // new arrays of indices for the left and right sons final ArrayList<Integer> leftArray = new ArrayList<Integer>(); final ArrayList<Integer> rightArray = new ArrayList<Integer>(); // split data for (final Integer it : currentIndices) { if (currentNode.splitFn.evaluate(data.get(it.intValue()))) { leftArray.add(it); } else { rightArray.add(it); } } //System.out.println("total left = " + leftArray.size() + ", total right = " + rightArray.size() + ", depth = " + currentNode.depth); // Update maximum depth (for the record) if (currentNode.depth > maxDepth) maxDepth = currentNode.depth; if (leftArray.isEmpty()) { currentNode.left = new LeafNode(data, rightArray); //System.out.println("Created leaf with feature " + currentNode.splitFn.index); } else if (rightArray.isEmpty()) { currentNode.left = new LeafNode(data, leftArray); //System.out.println("Created leaf with feature " + currentNode.splitFn.index); } else { currentNode.left = new InteriorNode(currentNode.depth + 1, splitFnProducer.getSplitFunction(data, leftArray)); remainingNodes.add((InteriorNode) currentNode.left); remainingIndices.add(leftArray); currentNode.right = new InteriorNode(currentNode.depth + 1, splitFnProducer.getSplitFunction(data, rightArray)); remainingNodes.add((InteriorNode) currentNode.right); remainingIndices.add(rightArray); } } System.out.println("Max depth = " + maxDepth); return root; }
From source file:ai.GiniFunction.java
License:GNU General Public License
/** * Create split function based on Gini coefficient * /* w w w . java 2s. c o m*/ * @param data original data * @param indices indices of the samples to use */ public void init(Instances data, ArrayList<Integer> indices) { if (indices.size() == 0) { this.index = 0; this.threshold = 0; this.allSame = true; return; } final int len = data.numAttributes(); final int numElements = indices.size(); final int numClasses = data.numClasses(); final int classIndex = data.classIndex(); /** Attribute-class pair comparator (by attribute value) */ final Comparator<AttributeClassPair> comp = new Comparator<AttributeClassPair>() { public int compare(AttributeClassPair o1, AttributeClassPair o2) { final double diff = o2.attributeValue - o1.attributeValue; if (diff < 0) return 1; else if (diff == 0) return 0; else return -1; } public boolean equals(Object o) { return false; } }; // Create and shuffle indices of features to use ArrayList<Integer> allIndices = new ArrayList<Integer>(); for (int i = 0; i < len; i++) if (i != classIndex) allIndices.add(i); double minimumGini = Double.MAX_VALUE; for (int i = 0; i < numOfFeatures; i++) { // Select the random feature final int index = random.nextInt(allIndices.size()); final int featureToUse = allIndices.get(index); allIndices.remove(index); // remove that element to prevent from repetitions // Get the smallest Gini coefficient // Create list with pairs attribute-class final ArrayList<AttributeClassPair> list = new ArrayList<AttributeClassPair>(); for (int j = 0; j < numElements; j++) { final Instance ins = data.get(indices.get(j)); list.add(new AttributeClassPair(ins.value(featureToUse), (int) ins.value(classIndex))); } // Sort pairs in increasing order Collections.sort(list, comp); final double[] probLeft = new double[numClasses]; final double[] probRight = new double[numClasses]; // initial probabilities (all samples on the right) for (int n = 0; n < list.size(); n++) probRight[list.get(n).classValue]++; // Try all splitting points, from position 0 to the end for (int splitPoint = 0; splitPoint < numElements; splitPoint++) { // Calculate Gini coefficient double giniLeft = 0; double giniRight = 0; final int rightNumElements = numElements - splitPoint; for (int nClass = 0; nClass < numClasses; nClass++) { // left set double prob = probLeft[nClass]; // Divide by the number of elements to get probabilities if (splitPoint != 0) prob /= (double) splitPoint; giniLeft += prob * prob; // right set prob = probRight[nClass]; // Divide by the number of elements to get probabilities if (rightNumElements != 0) prob /= (double) rightNumElements; giniRight += prob * prob; } // Total Gini value final double gini = ((1.0 - giniLeft) * splitPoint + (1.0 - giniRight) * rightNumElements) / (double) numElements; // Save values of minimum Gini coefficient if (gini < minimumGini) { minimumGini = gini; this.index = featureToUse; this.threshold = list.get(splitPoint).attributeValue; } // update probabilities for next iteration probLeft[list.get(splitPoint).classValue]++; probRight[list.get(splitPoint).classValue]--; } } // free list of possible indices to help garbage collector //allIndices.clear(); //allIndices = null; }
From source file:ANN.MultilayerPerceptron.java
@Override public void buildClassifier(Instances i) { // System.out.println(listOutput.get(0).getWeightSize() + " "+ listHidden.size()); int cnt = 0;/*from w w w .ja v a 2s . c o m*/ while (true) {//ulang iterasi // System.out.println(); // System.out.println("iterasi "+itt); for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) { //buat list input ArrayList<Double> listInput = new ArrayList<>(); listInput.add(1.0); for (int idx = 0; idx < i.numAttributes() - 1; idx++) { listInput.add(i.get(idxInstance).value(idx)); } //hitung output hidden ArrayList<Double> hiddenOutput = new ArrayList<>(); hiddenOutput.add(1.0); for (int idxOutput = 1; idxOutput < listHidden.size(); idxOutput++) { output(listHidden, listInput, idxOutput); hiddenOutput.add(listHidden.get(idxOutput).getValue()); // System.out.println(outputVal); } //hitung output layer for (int idxOutput = 0; idxOutput < listOutput.size(); idxOutput++) { output(listOutput, hiddenOutput, idxOutput); // System.out.println(outputVal); } //hitung error calculateError(idxInstance); //update bobot updateWeight(listInput); } double error = 0; for (int idxErr = 0; idxErr < i.numInstances(); idxErr++) { for (int idx = 0; idx < listOutput.size(); idx++) { error += Math.pow(listOutput.get(idx).getError(), 2) / 2; // System.out.println(listOutput.get(idx).getError()); } // System.out.println(error); } if (cnt == 1000) { System.out.println(error); System.out.println(); cnt = 0; } cnt++; if (error <= 0.3) break; } // for (int idx=0;idx<listOutput.size();idx++) { // System.out.println("Output value "+listOutput.get(idx).getValue()); // System.out.println("Output error "+listOutput.get(idx).getError()); // for (int idx2=0; idx2<listOutput.get(idx).getWeightSize();idx2++) // System.out.println("Output weight"+listOutput.get(idx).getWeightFromList(idx2)); // } }
From source file:ANN.MultiplePerceptron.java
@Override public void buildClassifier(Instances i) { // System.out.println(listNodeHidden.get(0).getWeightSize()+" "+listNodeOutput.get(0).getWeightSize()); for (int itt = 0; itt < 5000; itt++) { for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) { ArrayList<Double> listInput = new ArrayList<>(); listInput.add(1.0);/*from w w w . j ava2s.c o m*/ for (int idxInstanceVal = 0; idxInstanceVal < i.numAttributes() - 1; idxInstanceVal++) { listInput.add(i.get(idxInstance).value(idxInstanceVal)); } ArrayList<Double> listOutputHidden = new ArrayList<>(); listOutputHidden.add(1.0); //set output hidden layer // System.out.println("Hidden layer\n"); for (int idxNodeHidden = 1; idxNodeHidden < listNodeHidden.size(); idxNodeHidden++) { double outputVal = listNodeHidden.get(idxNodeHidden).output(listInput); listNodeHidden.get(idxNodeHidden).setValue(outputVal); listOutputHidden.add(outputVal); // System.out.println(outputVal); } // System.out.println("Output layer\n"); //set output layer for (int idxNodeHidden = 0; idxNodeHidden < listNodeOutput.size(); idxNodeHidden++) { double outputVal = listNodeOutput.get(idxNodeHidden).output(listOutputHidden); listNodeOutput.get(idxNodeHidden).setValue(outputVal); // System.out.println(outputVal); } //calculate error (back propagation) calculateError(idxInstance); //re-calculate weight calculateWeight(i.instance(idxInstance)); } } for (int idx = 0; idx < listNodeHidden.size(); idx++) { System.out.println("Hidden value " + listNodeHidden.get(idx).getValue()); System.out.println("Hidden error " + listNodeHidden.get(idx).getError()); for (int idx2 = 0; idx2 < listNodeHidden.get(idx).getWeightSize(); idx2++) System.out.println("Hidden weight" + listNodeHidden.get(idx).getWeightFromList(idx2)); } System.out.println(); for (int idx = 0; idx < listNodeOutput.size(); idx++) { System.out.println("Output value " + listNodeOutput.get(idx).getValue()); System.out.println("Output error " + listNodeOutput.get(idx).getError()); for (int idx2 = 0; idx2 < listNodeOutput.get(idx).getWeightSize(); idx2++) System.out.println("Output weight" + listNodeOutput.get(idx).getWeightFromList(idx2)); } }