Example usage for weka.core Instances insertAttributeAt

List of usage examples for weka.core Instances insertAttributeAt

Introduction

In this page you can find the example usage for weka.core Instances insertAttributeAt.

Prototype



public void insertAttributeAt(Attribute att, int position) 

Source Link

Document

Inserts an attribute at the given position (0 to numAttributes()) and sets all values to be missing.

Usage

From source file:meka.classifiers.multitarget.NSR.java

License:Open Source License

public Instances convertInstances(Instances D, int L) throws Exception {

    //Gather combinations
    HashMap<String, Integer> distinctCombinations = MLUtils.classCombinationCounts(D);
    if (getDebug())
        System.out.println("Found " + distinctCombinations.size() + " unique combinations");

    //Prune combinations
    MLUtils.pruneCountHashMap(distinctCombinations, m_P);
    if (getDebug())
        System.out.println("Pruned to " + distinctCombinations.size() + " with P=" + m_P);

    // Remove all class attributes
    Instances D_ = MLUtils.deleteAttributesAt(new Instances(D), MLUtils.gen_indices(L));
    // Add a new class attribute
    D_.insertAttributeAt(new Attribute("CLASS", new ArrayList(distinctCombinations.keySet())), 0); // create the class attribute
    D_.setClassIndex(0);/*w  ww.  ja  v  a2 s .  c om*/

    //Add class values
    for (int i = 0; i < D.numInstances(); i++) {
        String y = MLUtils.encodeValue(MLUtils.toIntArray(D.instance(i), L));
        // add it
        if (distinctCombinations.containsKey(y)) //if its class value exists
            D_.instance(i).setClassValue(y);
        // decomp
        else if (m_N > 0) {
            String d_subsets[] = SuperLabelUtils.getTopNSubsets(y, distinctCombinations, m_N);
            for (String s : d_subsets) {
                int w = distinctCombinations.get(s);
                Instance copy = (Instance) (D_.instance(i)).copy();
                copy.setClassValue(s);
                copy.setWeight(1.0 / d_subsets.length);
                D_.add(copy);
            }
        }
    }

    // remove with missing class
    D_.deleteWithMissingClass();

    // keep the header of new dataset for classification
    m_InstancesTemplate = new Instances(D_, 0);

    if (getDebug())
        System.out.println("" + D_);

    return D_;
}

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * InsertZintoD - Insert data Z[][] to Instances D (e.g., as labels).
 * NOTE: Assumes binary labels!/*  ww  w . j  a  v  a2s.  c om*/
 * @see #addZtoD(Instances, double[][], int)
 */
private static Instances insertZintoD(Instances D, double Z[][]) {

    int L = Z[0].length;

    // add attributes
    for (int j = 0; j < L; j++) {
        D.insertAttributeAt(new Attribute("c" + j, Arrays.asList(new String[] { "0", "1" })), j);
    }

    // add values Z[0]...Z[N] to D
    // (note that if D.numInstances() < Z.length, only some are added)
    for (int j = 0; j < L; j++) {
        for (int i = 0; i < D.numInstances(); i++) {
            D.instance(i).setValue(j, Z[i][j] > 0.5 ? 1.0 : 0.0);
        }
    }

    D.setClassIndex(L);
    return D;
}

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * AddZtoD - Add attribute space Z[N][H] (N rows of H columns) to Instances D, which should have N rows also.
 * @param   D    dataset (of N instances)
 * @param   Z   attribute space (of N rows, H columns)
 * @param   L   column to add Z from in D
 *//*from w  w  w .  j av  a  2 s .c om*/
private static Instances addZtoD(Instances D, double Z[][], int L) {

    int H = Z[0].length;
    int N = D.numInstances();

    // add attributes
    for (int a = 0; a < H; a++) {
        D.insertAttributeAt(new Attribute("A" + a), L + a);
    }

    // add values Z[0]...Z[N] to D
    for (int a = 0; a < H; a++) {
        for (int i = 0; i < N; i++) {
            D.instance(i).setValue(L + a, Z[i][a]);
        }
    }

    D.setClassIndex(L);
    return D;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset// w  ww  .  j ava  2  s. co m
 * @param L         number of labels in the original dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels) 
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset//from   w  w  w  . ja  v  a 2  s.  c om
 * @param L         number of labels in that dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels)
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes.
 * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'.
 * @see PSUtils.PSTransformation// w  w  w .  j a  v a 2s. com
 * @param indices   m by k: m super variables, each relating to k original variables
 * @param    D   either multi-label or multi-target dataset
 * @param    p   pruning value
 * @param    n   subset relpacement value
 * @return       a multi-target dataset
 */
public static Instances SLTransformation(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    // F.removeLabels(D_,L);
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y =
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    return D_;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'.
 * @param    D   assume attributes in D labeled by original index
 * @return       Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 *///from   www.  j a v  a2 s  . c om
public static Instances mergeLabels(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y = 
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    D = null;
    return D_;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels./*from  w w w.  ja v  a2s. co  m*/
 *
 * @param   j    index 1 (assume that <code>j &lt; k</code>)
 * @param   k   index 2 (assume that <code>j &lt; k</code>)
 * @param   D   iInstances, with attributes in labeled by original index
 * @return       Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 */
public static Instances mergeLabels(Instances D, int j, int k, int p) {
    int L = D.classIndex();

    HashMap<String, Integer> count = new HashMap<String, Integer>();

    Set<String> values = new HashSet<String>();
    for (int i = 0; i < D.numInstances(); i++) {
        String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k));
        String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k);
        //System.out.println("w = "+w);
        count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1);
        values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)));
    }
    //System.out.println("("+j+","+k+")"+values);
    System.out.print("pruned from " + count.size() + " to ");
    MLUtils.pruneCountHashMap(count, p);
    String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future
    System.out.println("" + count.size() + " with p = " + p);
    System.out.println("" + count);
    values = count.keySet();

    // Create and insert the new attribute
    D.insertAttributeAt(
            new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L);

    // Set values for the new attribute
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        String y_jk = encodeValue(x.stringValue(j), x.stringValue(k));
        try {
            x.setValue(L, y_jk); // y_jk = 
        } catch (Exception e) {
            //x.setMissing(L);
            //D.delete(i);
            //i--;
            String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG
            //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close));
            int max_c = 0;
            for (String y_ : y_close) {
                int c = count.get(y_);
                if (c > max_c) {
                    max_c = c;
                    y_max = y_;
                }
            }
            //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ...");
            x.setValue(L, y_max);
            // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one)
        }
    }

    // Delete separate attributes
    D.deleteAttributeAt(k > j ? k : j);
    D.deleteAttributeAt(k > j ? j : k);

    // Set class index
    D.setClassIndex(L - 1);
    return D;
}

From source file:milk.classifiers.MIRBFNetwork.java

License:Open Source License

public Exemplars transform(Exemplars ex) throws Exception {

    // Throw all the instances together
    Instances data = new Instances(ex.exemplar(0).getInstances());
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);/*from w ww  . ja  v a 2  s .  c o m*/
        double weight = 1.0 / (double) curr.getInstances().numInstances();
        for (int j = 0; j < curr.getInstances().numInstances(); j++) {
            Instance inst = (Instance) curr.getInstances().instance(j).copy();
            inst.setWeight(weight);
            data.add(inst);
        }
    }
    double factor = (double) data.numInstances() / (double) data.sumOfWeights();
    for (int i = 0; i < data.numInstances(); i++) {
        data.instance(i).setWeight(data.instance(i).weight() * factor);
    }

    SimpleKMeans kMeans = new SimpleKMeans();
    kMeans.setNumClusters(m_num_clusters);
    MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer();
    clust.setClusterer(kMeans);
    m_clm.setDensityBasedClusterer(clust);
    m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1));
    m_clm.setInputFormat(data);

    // Use filter and discard result
    Instances tempData = Filter.useFilter(data, m_clm);
    tempData = new Instances(tempData, 0);
    tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);

    // Go through exemplars and add them to new dataset
    Exemplars newExs = new Exemplars(tempData);
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);
        Instances temp = Filter.useFilter(curr.getInstances(), m_clm);
        temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);
        for (int j = 0; j < temp.numInstances(); j++) {
            temp.instance(j).setValue(0, curr.idValue());
        }
        newExs.add(new Exemplar(temp));
    }
    //System.err.println("Finished transforming");
    //System.err.println(newExs);
    return newExs;
}

From source file:milk.classifiers.MIRBFNetwork.java

License:Open Source License

public Exemplar transform(Exemplar test) throws Exception {

    Instances temp = Filter.useFilter(test.getInstances(), m_clm);
    temp.insertAttributeAt(test.getInstances().attribute(0), 0);
    for (int j = 0; j < temp.numInstances(); j++) {
        temp.instance(j).setValue(0, test.idValue());
        //System.err.println(temp.instance(j));
    }//from w  ww . j  a v  a 2 s . c  o  m
    return new Exemplar(temp);
}