Example usage for weka.core Instances get

Introduction

In this page you can find the example usage for weka.core Instances get.

Prototype



@Override
publicInstance get(int index)

Source Link

Document

Returns the instance at the given position.

Usage

From source file:AaronTest.ShapeletTransformExperiments.java

public static boolean AreInstancesEqual(Instances a, Instances b) {
    for (int i = 0; i < a.size(); i++) {
        double distance = a.get(i).value(0) - b.get(i).value(0);

        if (distance != 0) {
            return false;
        }/* w  w w  . j a  v  a  2 s .co m*/
    }
    return true;
}

From source file:activeSegmentation.feature.FeatureExtraction.java

License:Open Source License

/**
 * Add training samples from a rectangular roi
 * //from   w w  w . j  a va  2  s. c o m
 * @param trainingData set of instances to add to
 * @param classIndex class index value
 * @param sliceNum number of 2d slice being processed
 * @param r shape roi
 * @return number of instances added
 */
private Instances addRectangleRoiInstances(int sliceNum, List<String> classLabels, int classes) {

    Instances testingData;
    ArrayList<Attribute> attributes = createFeatureHeader();
    attributes.add(new Attribute(Common.CLASS, addClasstoHeader(classes, classLabels)));
    System.out.println(attributes.toString());
    // create initial set of instances
    testingData = new Instances(Common.INSTANCE_NAME, attributes, 1);
    // Set the index of the class attribute
    testingData.setClassIndex(filterManager.getNumOfFeatures());

    for (int x = 0; x < originalImage.getWidth(); x++) {
        for (int y = 0; y < originalImage.getHeight(); y++) {

            testingData.add(filterManager.createInstance(x, y, 0, sliceNum));

        }
    }
    // increase number of instances for this class

    System.out.println("SIZe" + testingData.size());
    System.out.println(testingData.get(1).toString());
    return testingData;
}

From source file:adams.flow.template.MyTransformer.java

License:Open Source License

/**
 * Counts the number of missing values in each row
 * /*  w w w.ja v a 2 s .  c om*/
 * @return array with the number of missing values in each row
 */
private int[] getMissingValuesByRow(Instances data) {
    int numRows = data.size();
    int numColumns = data.numAttributes();

    int[] missingValues = new int[numRows];
    for (int i = 0; i < numRows; i++)
        for (int j = 0; j < numColumns; j++)
            if (data.get(i).isMissing(j))
                missingValues[i]++;

    return missingValues;
}

From source file:adams.flow.template.MyTransformer.java

License:Open Source License

/**
 * Counts the number of missing values in each column
 * //from   w ww .j  a  v a 2  s  .  com
 * @return array with the number of missing values in each column
 */
private int[] getMissingValuesByColumn(Instances data) {
    int numRows = data.size();
    int numColumns = data.numAttributes();

    int[] missingValues = new int[numColumns];
    for (int i = 0; i < numRows; i++)
        for (int j = 0; j < numColumns; j++)
            if (data.get(i).isMissing(j))
                missingValues[j]++;

    return missingValues;
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Merges the datasets based on the collected IDs.
 *
 * @param orig   the original datasets//from   w  w w .  ja v  a2  s.c o  m
 * @param inst   the processed datasets to merge into one
 * @param ids      the IDs for identifying the rows
 * @return      the merged dataset
 */
protected Instances merge(Instances[] orig, Instances[] inst, HashSet ids) {
    Instances result;
    ArrayList<Attribute> atts;
    int i;
    int n;
    int m;
    int index;
    String relation;
    List sortedIDs;
    Attribute att;
    int[] indexStart;
    double value;
    double[] values;
    HashMap<Integer, Integer> hashmap;
    HashSet<Instance> hs;

    // create header
    if (isLoggingEnabled())
        getLogger().info("Creating merged header...");
    atts = new ArrayList<>();
    relation = "";
    indexStart = new int[inst.length];
    for (i = 0; i < inst.length; i++) {
        indexStart[i] = atts.size();
        for (n = 0; n < inst[i].numAttributes(); n++)
            atts.add((Attribute) inst[i].attribute(n).copy());
        // assemble relation name
        if (i > 0)
            relation += "_";
        relation += inst[i].relationName();
    }
    result = new Instances(relation, atts, ids.size());

    // fill with missing values
    if (isLoggingEnabled())
        getLogger().info("Filling with missing values...");
    for (i = 0; i < ids.size(); i++) {
        if (isStopped())
            return null;
        // progress
        if (isLoggingEnabled() && ((i + 1) % 1000 == 0))
            getLogger().info("" + (i + 1));
        result.add(new DenseInstance(result.numAttributes()));
    }

    // sort IDs
    if (isLoggingEnabled())
        getLogger().info("Sorting indices...");
    sortedIDs = new ArrayList(ids);
    Collections.sort(sortedIDs);

    // generate rows
    hashmap = new HashMap<>();
    for (i = 0; i < inst.length; i++) {
        if (isStopped())
            return null;
        if (isLoggingEnabled())
            getLogger().info("Adding file #" + (i + 1));
        att = orig[i].attribute(m_UniqueID);
        for (n = 0; n < inst[i].numInstances(); n++) {
            // progress
            if (isLoggingEnabled() && ((n + 1) % 1000 == 0))
                getLogger().info("" + (n + 1));

            // determine index of row
            if (m_AttType == Attribute.NUMERIC)
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).value(att));
            else
                index = Collections.binarySearch(sortedIDs, inst[i].instance(n).stringValue(att));
            if (index < 0)
                throw new IllegalStateException(
                        "Failed to determine index for row #" + (n + 1) + " of dataset #" + (i + 1) + "!");

            if (!hashmap.containsKey(index))
                hashmap.put(index, 0);
            hashmap.put(index, hashmap.get(index) + 1);

            // use internal representation for faster access
            values = result.instance(index).toDoubleArray();

            // add attribute values
            for (m = 0; m < inst[i].numAttributes(); m++) {
                // missing value?
                if (inst[i].instance(n).isMissing(m))
                    continue;

                switch (inst[i].attribute(m).type()) {
                case Attribute.NUMERIC:
                case Attribute.DATE:
                case Attribute.NOMINAL:
                    values[indexStart[i] + m] = inst[i].instance(n).value(m);
                    break;

                case Attribute.STRING:
                    value = result.attribute(indexStart[i] + m)
                            .addStringValue(inst[i].instance(n).stringValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                case Attribute.RELATIONAL:
                    value = result.attribute(indexStart[i] + m)
                            .addRelation(inst[i].instance(n).relationalValue(m));
                    values[indexStart[i] + m] = value;
                    break;

                default:
                    throw new IllegalStateException("Unhandled attribute type: " + inst[i].attribute(m).type());
                }
            }

            // update row
            result.set(index, new DenseInstance(1.0, values));
        }
    }

    if (getRemove()) {
        hs = new HashSet<>();
        for (Integer x : hashmap.keySet()) {
            if (hashmap.get(x) != inst.length)
                hs.add(result.get(x));
        }
        result.removeAll(hs);
    }

    return result;
}

From source file:ai.BalancedRandomForest.java

License:GNU General Public License

/**
 * Build Balanced Random Forest/* www  .ja va 2  s  .  co m*/
 */
public void buildClassifier(final Instances data) throws Exception {
    // If number of features is 0 then set it to log2 of M (number of attributes)
    if (numFeatures < 1)
        numFeatures = (int) Utils.log2(data.numAttributes()) + 1;
    // Check maximum number of random features
    if (numFeatures >= data.numAttributes())
        numFeatures = data.numAttributes() - 1;

    // Initialize array of trees
    tree = new BalancedRandomTree[numTrees];

    // total number of instances
    final int numInstances = data.numInstances();
    // total number of classes
    final int numClasses = data.numClasses();

    final ArrayList<Integer>[] indexSample = new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++)
        indexSample[i] = new ArrayList<Integer>();

    //System.out.println("numClasses = " + numClasses);

    // fill indexSample with the indices of each class
    for (int i = 0; i < numInstances; i++) {
        //System.out.println("data.get("+i+").classValue() = " + data.get(i).classValue());
        indexSample[(int) data.get(i).classValue()].add(i);
    }

    final Random random = new Random(seed);

    // Executor service to run concurrent trees
    final ExecutorService exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());

    List<Future<BalancedRandomTree>> futures = new ArrayList<Future<BalancedRandomTree>>(numTrees);

    final boolean[][] inBag = new boolean[numTrees][numInstances];

    try {
        for (int i = 0; i < numTrees; i++) {
            final ArrayList<Integer> bagIndices = new ArrayList<Integer>();

            // Randomly select the indices in a balanced way
            for (int j = 0; j < numInstances; j++) {
                // Select first the class
                final int randomClass = random.nextInt(numClasses);
                // Select then a random sample of that class
                final int randomSample = random.nextInt(indexSample[randomClass].size());
                bagIndices.add(indexSample[randomClass].get(randomSample));
                inBag[i][indexSample[randomClass].get(randomSample)] = true;
            }

            // Create random tree
            final Splitter splitter = new Splitter(
                    new GiniFunction(numFeatures, data.getRandomNumberGenerator(random.nextInt())));

            futures.add(exe.submit(new Callable<BalancedRandomTree>() {
                public BalancedRandomTree call() {
                    return new BalancedRandomTree(data, bagIndices, splitter);
                }
            }));
        }

        // Grab all trained trees before proceeding
        for (int treeIdx = 0; treeIdx < numTrees; treeIdx++)
            tree[treeIdx] = futures.get(treeIdx).get();

        // Calculate out of bag error
        final boolean numeric = data.classAttribute().isNumeric();

        List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances());

        for (int i = 0; i < data.numInstances(); i++) {
            VotesCollector aCollector = new VotesCollector(tree, i, data, inBag);
            votes.add(exe.submit(aCollector));
        }

        double outOfBagCount = 0.0;
        double errorSum = 0.0;

        for (int i = 0; i < data.numInstances(); i++) {

            double vote = votes.get(i).get();

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }

        }

        outOfBagError = errorSum / outOfBagCount;

    } catch (Exception ex) {
        ex.printStackTrace();
    } finally {
        exe.shutdownNow();
    }

}

From source file:ai.BalancedRandomTree.java

License:GNU General Public License

/**
 * Create random tree (non-recursively)// ww  w.  j  a  v a2  s. c o m
 * 
 * @param data original data
 * @param indices indices of the samples to use
 * @param depth starting depth
 * @param splitFnProducer split function producer
 * @return root node 
 */
private InteriorNode createTree(final Instances data, final ArrayList<Integer> indices, final int depth,
        final Splitter splitFnProducer) {
    int maxDepth = depth;
    // Create root node
    InteriorNode root = new InteriorNode(depth, splitFnProducer.getSplitFunction(data, indices));

    // Create list of nodes to process and add the root to it
    final LinkedList<InteriorNode> remainingNodes = new LinkedList<InteriorNode>();
    remainingNodes.add(root);

    // Create list of indices to process (it must match all the time with the node list)
    final LinkedList<ArrayList<Integer>> remainingIndices = new LinkedList<ArrayList<Integer>>();
    remainingIndices.add(indices);

    // While there is still nodes to process
    while (!remainingNodes.isEmpty()) {
        final InteriorNode currentNode = remainingNodes.removeLast();
        final ArrayList<Integer> currentIndices = remainingIndices.removeLast();
        // new arrays of indices for the left and right sons
        final ArrayList<Integer> leftArray = new ArrayList<Integer>();
        final ArrayList<Integer> rightArray = new ArrayList<Integer>();

        // split data
        for (final Integer it : currentIndices) {
            if (currentNode.splitFn.evaluate(data.get(it.intValue()))) {
                leftArray.add(it);
            } else {
                rightArray.add(it);
            }
        }
        //System.out.println("total left = " + leftArray.size() + ", total right = " + rightArray.size() + ", depth = " + currentNode.depth);               
        // Update maximum depth (for the record)
        if (currentNode.depth > maxDepth)
            maxDepth = currentNode.depth;

        if (leftArray.isEmpty()) {
            currentNode.left = new LeafNode(data, rightArray);
            //System.out.println("Created leaf with feature " + currentNode.splitFn.index);
        } else if (rightArray.isEmpty()) {
            currentNode.left = new LeafNode(data, leftArray);
            //System.out.println("Created leaf with feature " + currentNode.splitFn.index);
        } else {
            currentNode.left = new InteriorNode(currentNode.depth + 1,
                    splitFnProducer.getSplitFunction(data, leftArray));
            remainingNodes.add((InteriorNode) currentNode.left);
            remainingIndices.add(leftArray);

            currentNode.right = new InteriorNode(currentNode.depth + 1,
                    splitFnProducer.getSplitFunction(data, rightArray));
            remainingNodes.add((InteriorNode) currentNode.right);
            remainingIndices.add(rightArray);
        }
    }

    System.out.println("Max depth = " + maxDepth);
    return root;
}

From source file:ai.GiniFunction.java

License:GNU General Public License

/**
 * Create split function based on Gini coefficient
 * /* w  w w  . java  2s. c o  m*/
 * @param data original data
 * @param indices indices of the samples to use
 */
public void init(Instances data, ArrayList<Integer> indices) {
    if (indices.size() == 0) {
        this.index = 0;
        this.threshold = 0;
        this.allSame = true;
        return;
    }

    final int len = data.numAttributes();
    final int numElements = indices.size();
    final int numClasses = data.numClasses();
    final int classIndex = data.classIndex();

    /** Attribute-class pair comparator (by attribute value) */
    final Comparator<AttributeClassPair> comp = new Comparator<AttributeClassPair>() {
        public int compare(AttributeClassPair o1, AttributeClassPair o2) {
            final double diff = o2.attributeValue - o1.attributeValue;
            if (diff < 0)
                return 1;
            else if (diff == 0)
                return 0;
            else
                return -1;
        }

        public boolean equals(Object o) {
            return false;
        }
    };

    // Create and shuffle indices of features to use
    ArrayList<Integer> allIndices = new ArrayList<Integer>();
    for (int i = 0; i < len; i++)
        if (i != classIndex)
            allIndices.add(i);

    double minimumGini = Double.MAX_VALUE;

    for (int i = 0; i < numOfFeatures; i++) {
        // Select the random feature
        final int index = random.nextInt(allIndices.size());
        final int featureToUse = allIndices.get(index);
        allIndices.remove(index); // remove that element to prevent from repetitions

        // Get the smallest Gini coefficient

        // Create list with pairs attribute-class
        final ArrayList<AttributeClassPair> list = new ArrayList<AttributeClassPair>();
        for (int j = 0; j < numElements; j++) {
            final Instance ins = data.get(indices.get(j));
            list.add(new AttributeClassPair(ins.value(featureToUse), (int) ins.value(classIndex)));
        }

        // Sort pairs in increasing order
        Collections.sort(list, comp);

        final double[] probLeft = new double[numClasses];
        final double[] probRight = new double[numClasses];
        // initial probabilities (all samples on the right)
        for (int n = 0; n < list.size(); n++)
            probRight[list.get(n).classValue]++;

        // Try all splitting points, from position 0 to the end
        for (int splitPoint = 0; splitPoint < numElements; splitPoint++) {
            // Calculate Gini coefficient
            double giniLeft = 0;
            double giniRight = 0;
            final int rightNumElements = numElements - splitPoint;

            for (int nClass = 0; nClass < numClasses; nClass++) {
                // left set
                double prob = probLeft[nClass];
                // Divide by the number of elements to get probabilities
                if (splitPoint != 0)
                    prob /= (double) splitPoint;
                giniLeft += prob * prob;

                // right set
                prob = probRight[nClass];
                // Divide by the number of elements to get probabilities
                if (rightNumElements != 0)
                    prob /= (double) rightNumElements;
                giniRight += prob * prob;
            }

            // Total Gini value
            final double gini = ((1.0 - giniLeft) * splitPoint + (1.0 - giniRight) * rightNumElements)
                    / (double) numElements;

            // Save values of minimum Gini coefficient
            if (gini < minimumGini) {
                minimumGini = gini;
                this.index = featureToUse;
                this.threshold = list.get(splitPoint).attributeValue;
            }

            // update probabilities for next iteration
            probLeft[list.get(splitPoint).classValue]++;
            probRight[list.get(splitPoint).classValue]--;
        }
    }

    // free list of possible indices to help garbage collector
    //allIndices.clear();
    //allIndices = null;
}

From source file:ANN.MultilayerPerceptron.java

@Override
public void buildClassifier(Instances i) {
    //       System.out.println(listOutput.get(0).getWeightSize() + " "+ listHidden.size());
    int cnt = 0;/*from  w  w  w  .ja v a 2s .  c o  m*/
    while (true) {//ulang iterasi
        //            System.out.println();
        //            System.out.println("iterasi "+itt);
        for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) {
            //buat list input
            ArrayList<Double> listInput = new ArrayList<>();
            listInput.add(1.0);
            for (int idx = 0; idx < i.numAttributes() - 1; idx++) {
                listInput.add(i.get(idxInstance).value(idx));
            }

            //hitung output hidden
            ArrayList<Double> hiddenOutput = new ArrayList<>();
            hiddenOutput.add(1.0);
            for (int idxOutput = 1; idxOutput < listHidden.size(); idxOutput++) {
                output(listHidden, listInput, idxOutput);
                hiddenOutput.add(listHidden.get(idxOutput).getValue());
                //                    System.out.println(outputVal);
            }
            //hitung output layer
            for (int idxOutput = 0; idxOutput < listOutput.size(); idxOutput++) {
                output(listOutput, hiddenOutput, idxOutput);
                //                    System.out.println(outputVal);
            }

            //hitung error
            calculateError(idxInstance);
            //update bobot
            updateWeight(listInput);
        }
        double error = 0;
        for (int idxErr = 0; idxErr < i.numInstances(); idxErr++) {
            for (int idx = 0; idx < listOutput.size(); idx++) {
                error += Math.pow(listOutput.get(idx).getError(), 2) / 2;
                //                    System.out.println(listOutput.get(idx).getError());
            }
            //                System.out.println(error);
        }
        if (cnt == 1000) {
            System.out.println(error);
            System.out.println();
            cnt = 0;
        }
        cnt++;
        if (error <= 0.3)
            break;
    }
    //        for (int idx=0;idx<listOutput.size();idx++) {
    //            System.out.println("Output value "+listOutput.get(idx).getValue());
    //            System.out.println("Output error "+listOutput.get(idx).getError());
    //            for (int idx2=0; idx2<listOutput.get(idx).getWeightSize();idx2++)
    //                System.out.println("Output weight"+listOutput.get(idx).getWeightFromList(idx2));
    //        }
}

From source file:ANN.MultiplePerceptron.java

@Override
public void buildClassifier(Instances i) {
    //        System.out.println(listNodeHidden.get(0).getWeightSize()+" "+listNodeOutput.get(0).getWeightSize());
    for (int itt = 0; itt < 5000; itt++) {
        for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) {
            ArrayList<Double> listInput = new ArrayList<>();
            listInput.add(1.0);/*from   w w w  .  j  ava2s.c o  m*/
            for (int idxInstanceVal = 0; idxInstanceVal < i.numAttributes() - 1; idxInstanceVal++) {
                listInput.add(i.get(idxInstance).value(idxInstanceVal));
            }

            ArrayList<Double> listOutputHidden = new ArrayList<>();
            listOutputHidden.add(1.0);

            //set output hidden layer
            //                System.out.println("Hidden layer\n");
            for (int idxNodeHidden = 1; idxNodeHidden < listNodeHidden.size(); idxNodeHidden++) {
                double outputVal = listNodeHidden.get(idxNodeHidden).output(listInput);
                listNodeHidden.get(idxNodeHidden).setValue(outputVal);
                listOutputHidden.add(outputVal);
                //                    System.out.println(outputVal);
            }

            //                System.out.println("Output layer\n");
            //set output layer
            for (int idxNodeHidden = 0; idxNodeHidden < listNodeOutput.size(); idxNodeHidden++) {
                double outputVal = listNodeOutput.get(idxNodeHidden).output(listOutputHidden);
                listNodeOutput.get(idxNodeHidden).setValue(outputVal);
                //                    System.out.println(outputVal);
            }

            //calculate error (back propagation)
            calculateError(idxInstance);
            //re-calculate weight
            calculateWeight(i.instance(idxInstance));
        }
    }
    for (int idx = 0; idx < listNodeHidden.size(); idx++) {
        System.out.println("Hidden value " + listNodeHidden.get(idx).getValue());
        System.out.println("Hidden error " + listNodeHidden.get(idx).getError());
        for (int idx2 = 0; idx2 < listNodeHidden.get(idx).getWeightSize(); idx2++)
            System.out.println("Hidden weight" + listNodeHidden.get(idx).getWeightFromList(idx2));
    }
    System.out.println();
    for (int idx = 0; idx < listNodeOutput.size(); idx++) {
        System.out.println("Output value " + listNodeOutput.get(idx).getValue());
        System.out.println("Output error " + listNodeOutput.get(idx).getError());
        for (int idx2 = 0; idx2 < listNodeOutput.get(idx).getWeightSize(); idx2++)
            System.out.println("Output weight" + listNodeOutput.get(idx).getWeightFromList(idx2));
    }
}