Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:milk.classifiers.MIBoost.java

License:Open Source License

/**
  * Builds the classifier//www .  j  a  v  a2  s .c  o m
  *
  * @param train the training data to be used for generating the
  * boosted classifier.
  * @exception Exception if the classifier could not be built successfully
  */
 public void buildClassifier(Exemplars exps) throws Exception {

     Exemplars train = new Exemplars(exps);

     if (train.classAttribute().type() != Attribute.NOMINAL) {
         throw new Exception("Class attribute must be nominal.");
     }
     if (train.checkForStringAttributes()) {
         throw new Exception("Can't handle string attributes!");
     }

     m_ClassIndex = train.classIndex();
     m_IdIndex = train.idIndex();
     m_NumClasses = train.numClasses();
     m_NumIterations = m_MaxIterations;

     if (m_NumClasses > 2) {
         throw new Exception("Not yet prepared to deal with multiple classes!");
     }

     if (m_Classifier == null)
         throw new Exception("A base classifier has not been specified!");
     if (!(m_Classifier instanceof WeightedInstancesHandler))
         throw new Exception("Base classifier cannot handle weighted instances!");

     m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations());
     if (m_Debug)
         System.err.println("Base classifier: " + m_Classifier.getClass().getName());

     m_Beta = new double[m_NumIterations];
     m_Attributes = new Instances(train.exemplar(0).getInstances(), 0);

     double N = (double) train.numExemplars(), sumNi = 0;
     Instances data = new Instances(m_Attributes, 0);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);

     // Initialize weights
     for (int i = 0; i < N; i++)
         sumNi += train.exemplar(i).getInstances().numInstances();

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         exi.setWeight(sumNi / N);
         Instances insts = exi.getInstances();
         double ni = (double) insts.numInstances();
         for (int j = 0; j < ni; j++) {
             Instance ins = new Instance(insts.instance(j));// Copy
             //insts.instance(j).setWeight(1.0);   

             ins.deleteAttributeAt(m_IdIndex);
             ins.setDataset(dataset);
             ins.setWeight(exi.weight() / ni);
             data.add(ins);
         }
     }

     // Assume the order of the instances are preserved in the Discretize filter
     if (m_DiscretizeBin > 0) {
         m_Filter = new Discretize();
         m_Filter.setInputFormat(new Instances(data, 0));
         m_Filter.setBins(m_DiscretizeBin);
         data = Filter.useFilter(data, m_Filter);
     }

     // Main algorithm
     int dataIdx;
     iterations: for (int m = 0; m < m_MaxIterations; m++) {
         if (m_Debug)
             System.err.println("\nIteration " + m);
         // Build a model
         m_Models[m].buildClassifier(data);

         // Prediction of each bag
         double[] err = new double[(int) N], weights = new double[(int) N];
         boolean perfect = true, tooWrong = true;
         dataIdx = 0;
         for (int n = 0; n < N; n++) {
             Exemplar exn = train.exemplar(n);
             // Prediction of each instance and the predicted class distribution
             // of the bag      
             double nn = (double) exn.getInstances().numInstances();
             for (int p = 0; p < nn; p++) {
                 Instance testIns = data.instance(dataIdx++);
                 if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors
                     err[n]++;
             }
             weights[n] = exn.weight();
             err[n] /= nn;
             if (err[n] > 0.5)
                 perfect = false;
             if (err[n] < 0.5)
                 tooWrong = false;
         }

         if (perfect || tooWrong) { // No or 100% classification error, cannot find beta
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("No errors");
             break iterations;
         }

         double[] x = new double[1];
         x[0] = 0;
         double[][] b = new double[2][x.length];
         b[0][0] = Double.NaN;
         b[1][0] = Double.NaN;

         OptEng opt = new OptEng();
         opt.setWeights(weights);
         opt.setErrs(err);
         //opt.setDebug(m_Debug);
         if (m_Debug)
             System.out.println("Start searching for c... ");
         x = opt.findArgmin(x, b);
         while (x == null) {
             x = opt.getVarbValues();
             if (m_Debug)
                 System.out.println("200 iterations finished, not enough!");
             x = opt.findArgmin(x, b);
         }
         if (m_Debug)
             System.out.println("Finished.");
         m_Beta[m] = x[0];

         if (m_Debug)
             System.err.println("c = " + m_Beta[m]);

         // Stop if error too small or error too big and ignore this model
         if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) {
             if (m == 0)
                 m_Beta[m] = 1.0;
             else
                 m_Beta[m] = 0;
             m_NumIterations = m + 1;
             if (m_Debug)
                 System.err.println("Errors out of range!");
             break iterations;
         }

         // Update weights of data and class label of wfData
         dataIdx = 0;
         double totWeights = 0;
         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0)));
             totWeights += exr.weight();
         }

         if (m_Debug)
             System.err.println("Total weights = " + totWeights);

         for (int r = 0; r < N; r++) {
             Exemplar exr = train.exemplar(r);
             double num = (double) exr.getInstances().numInstances();
             exr.setWeight(sumNi * exr.weight() / totWeights);
             //if(m_Debug)
             //    System.err.print("\nExemplar "+r+"="+exr.weight()+": \t");
             for (int s = 0; s < num; s++) {
                 Instance inss = data.instance(dataIdx);
                 inss.setWeight(exr.weight() / num);
                 //    if(m_Debug)
                 //  System.err.print("instance "+s+"="+inss.weight()+
                 //          "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t");
                 if (Double.isNaN(inss.weight()))
                     throw new Exception("instance " + s + " in bag " + r + " has weight NaN!");
                 dataIdx++;
             }
             //if(m_Debug)
             //    System.err.println();
         }
     }
 }

From source file:milk.classifiers.MIRBFNetwork.java

License:Open Source License

public Exemplars transform(Exemplars ex) throws Exception {

    // Throw all the instances together
    Instances data = new Instances(ex.exemplar(0).getInstances());
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);/*  w w w  .ja v  a2 s  .c  om*/
        double weight = 1.0 / (double) curr.getInstances().numInstances();
        for (int j = 0; j < curr.getInstances().numInstances(); j++) {
            Instance inst = (Instance) curr.getInstances().instance(j).copy();
            inst.setWeight(weight);
            data.add(inst);
        }
    }
    double factor = (double) data.numInstances() / (double) data.sumOfWeights();
    for (int i = 0; i < data.numInstances(); i++) {
        data.instance(i).setWeight(data.instance(i).weight() * factor);
    }

    SimpleKMeans kMeans = new SimpleKMeans();
    kMeans.setNumClusters(m_num_clusters);
    MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer();
    clust.setClusterer(kMeans);
    m_clm.setDensityBasedClusterer(clust);
    m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1));
    m_clm.setInputFormat(data);

    // Use filter and discard result
    Instances tempData = Filter.useFilter(data, m_clm);
    tempData = new Instances(tempData, 0);
    tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);

    // Go through exemplars and add them to new dataset
    Exemplars newExs = new Exemplars(tempData);
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);
        Instances temp = Filter.useFilter(curr.getInstances(), m_clm);
        temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);
        for (int j = 0; j < temp.numInstances(); j++) {
            temp.instance(j).setValue(0, curr.idValue());
        }
        newExs.add(new Exemplar(temp));
    }
    //System.err.println("Finished transforming");
    //System.err.println(newExs);
    return newExs;
}

From source file:milk.classifiers.MIWrapper.java

License:Open Source License

public Instances transform(Exemplars train) throws Exception {

     Instances data = new Instances(m_Attributes);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);
     double sumNi = 0, // Total number of instances
             N = train.numExemplars(); // Number of exemplars

     for (int i = 0; i < N; i++)
         sumNi += train.exemplar(i).getInstances().numInstances();

     // Initialize weights
     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         // m_Prior[(int)exi.classValue()]++;
         Instances insts = exi.getInstances();
         double ni = (double) insts.numInstances();
         for (int j = 0; j < ni; j++) {
             Instance ins = new Instance(insts.instance(j));// Copy      
             ins.deleteAttributeAt(m_IdIndex);
             ins.setDataset(dataset);//from  ww w .jav  a 2s  .com
             ins.setWeight(sumNi / (N * ni));
             //ins.setWeight(1);
             data.add(ins);
         }
     }

     return data;
 }

From source file:milk.classifiers.SimpleMI.java

License:Open Source License

public Instances transform(Exemplars train) throws Exception {

     Instances data = new Instances(m_Attributes);// Data to learn a model   
     data.deleteAttributeAt(m_IdIndex);// ID attribute useless   
     Instances dataset = new Instances(data, 0);
     Instance template = new Instance(dataset.numAttributes());
     template.setDataset(dataset);//from w  ww. j  av a  2  s .c o m
     double N = train.numExemplars(); // Number of exemplars

     for (int i = 0; i < N; i++) {
         Exemplar exi = train.exemplar(i);
         Instances insts = exi.getInstances();
         int attIdx = 0;
         Instance newIns = new Instance(template);
         newIns.setDataset(dataset);
         for (int j = 0; j < insts.numAttributes(); j++) {
             if ((j == m_IdIndex) || (j == m_ClassIndex))
                 continue;
             double value;
             if (m_TransformMethod == 1) {
                 value = insts.meanOrMode(j);
             } else {
                 double[] minimax = minimax(insts, j);
                 value = (minimax[0] + minimax[1]) / 2.0;
             }
             newIns.setValue(attIdx++, value);
         }
         newIns.setClassValue(exi.classValue());
         data.add(newIns);
     }

     return data;
 }

From source file:milk.experiment.MIInstanceQuery.java

License:Open Source License

/**
 * Makes a database query to convert a table into a set of instances
 *
 * @param query the query to convert to instances
 * @return the instances contained in the result of the query
 * @exception Exception if an error occurs
 *//*from   ww  w.  j  a v a2s. com*/
public Instances retrieveInstances(String query) throws Exception {

    System.err.println("Executing query: " + query);
    connectToDatabase();
    if (execute(query) == false) {
        throw new Exception("Query didn't produce results");
    }
    ResultSet rs = getResultSet();
    System.err.println("Getting metadata...");
    ResultSetMetaData md = rs.getMetaData();

    // Determine structure of the instances
    int numAttributes = md.getColumnCount();
    int[] attributeTypes = new int[numAttributes];
    Hashtable[] nominalIndexes = new Hashtable[numAttributes];
    FastVector[] nominalStrings = new FastVector[numAttributes];
    for (int i = 1; i <= numAttributes; i++) {
        switch (md.getColumnType(i)) {
        case Types.CHAR:
        case Types.VARCHAR:
        case Types.LONGVARCHAR:
        case Types.BINARY:
        case Types.VARBINARY:
        case Types.LONGVARBINARY:
            //System.err.println("String --> nominal");
            attributeTypes[i - 1] = Attribute.NOMINAL;
            nominalIndexes[i - 1] = new Hashtable();
            nominalStrings[i - 1] = new FastVector();
            break;
        case Types.BIT:
            ////System.err.println("boolean --> nominal");
            attributeTypes[i - 1] = Attribute.NOMINAL;
            nominalIndexes[i - 1] = new Hashtable();
            nominalIndexes[i - 1].put("false", new Double(0));
            nominalIndexes[i - 1].put("true", new Double(1));
            nominalStrings[i - 1] = new FastVector();
            nominalStrings[i - 1].addElement("false");
            nominalStrings[i - 1].addElement("true");
            break;
        case Types.NUMERIC:
        case Types.DECIMAL:
            //System.err.println("BigDecimal --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case Types.TINYINT:
            //System.err.println("byte --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case Types.SMALLINT:
            //System.err.println("short --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case Types.INTEGER:
            //System.err.println("int --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case Types.BIGINT:
            //System.err.println("long --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case Types.REAL:
            //System.err.println("float --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        case Types.FLOAT:
        case Types.DOUBLE:
            //System.err.println("double --> numeric");
            attributeTypes[i - 1] = Attribute.NUMERIC;
            break;
        /*case Types.BINARY:
           case Types.VARBINARY:
           case Types.LONGVARBINARY:
        //System.err.println("byte[] --> unsupported");
        attributeTypes[i - 1] = Attribute.STRING;
        break; */
        case Types.DATE:
        case Types.TIME:
        case Types.TIMESTAMP:
            attributeTypes[i - 1] = Attribute.DATE;
            break;
        default:
            //System.err.println("Unknown column type");
            attributeTypes[i - 1] = Attribute.STRING;
        }
    }

    // Step through the tuples
    System.err.println("Creating instances...");
    FastVector instances = new FastVector();
    int rowCount = 0;
    while (rs.next()) {
        if (rowCount % 100 == 0) {
            System.err.print("read " + rowCount + " instances \r");
            System.err.flush();
        }
        double[] vals = new double[numAttributes];
        for (int i = 1; i <= numAttributes; i++) {
            switch (md.getColumnType(i)) {
            case Types.CHAR:
            case Types.VARCHAR:
            case Types.LONGVARCHAR:
            case Types.BINARY:
            case Types.VARBINARY:
            case Types.LONGVARBINARY:
                String str = rs.getString(i);

                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    Double index = (Double) nominalIndexes[i - 1].get(str);
                    if (index == null) {
                        index = new Double(nominalStrings[i - 1].size());
                        nominalIndexes[i - 1].put(str, index);
                        nominalStrings[i - 1].addElement(str);
                    }
                    vals[i - 1] = index.doubleValue();
                }
                break;
            case Types.BIT:
                boolean boo = rs.getBoolean(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (boo ? 1.0 : 0.0);
                }
                break;
            case Types.NUMERIC:
            case Types.DECIMAL:
                //     BigDecimal bd = rs.getBigDecimal(i, 4); 
                double dd = rs.getDouble(i);
                // Use the column precision instead of 4?
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    //       newInst.setValue(i - 1, bd.doubleValue());
                    vals[i - 1] = dd;
                }
                break;
            case Types.TINYINT:
                byte by = rs.getByte(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) by;
                }
                break;
            case Types.SMALLINT:
                short sh = rs.getByte(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) sh;
                }
                break;
            case Types.INTEGER:
                int in = rs.getInt(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) in;
                }
                break;
            case Types.BIGINT:
                long lo = rs.getLong(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) lo;
                }
                break;
            case Types.REAL:
                float fl = rs.getFloat(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) fl;
                }
                break;
            case Types.FLOAT:
            case Types.DOUBLE:
                double dou = rs.getDouble(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    vals[i - 1] = (double) dou;
                }
                break;
            /*case Types.BINARY:
            case Types.VARBINARY:
            case Types.LONGVARBINARY: */
            case Types.DATE:
            case Types.TIME:
            case Types.TIMESTAMP:
                Date date = rs.getDate(i);
                if (rs.wasNull()) {
                    vals[i - 1] = Instance.missingValue();
                } else {
                    // TODO: Do a value check here.
                    vals[i - 1] = (double) date.getTime();
                }
                break;
            default:
                vals[i - 1] = Instance.missingValue();
            }
        }
        Instance newInst;
        if (m_CreateSparseData) {
            newInst = new SparseInstance(1.0, vals);
        } else {
            newInst = new Instance(1.0, vals);
        }
        instances.addElement(newInst);
        rowCount++;
    }
    //disconnectFromDatabase();  (perhaps other queries might be made)

    // Create the header and add the instances to the dataset
    System.err.println("Creating header...");
    FastVector attribInfo = new FastVector();
    for (int i = 0; i < numAttributes; i++) {
        String attribName = md.getColumnName(i + 1);
        switch (attributeTypes[i]) {
        case Attribute.NOMINAL:
            attribInfo.addElement(new Attribute(attribName, nominalStrings[i]));
            break;
        case Attribute.NUMERIC:
            attribInfo.addElement(new Attribute(attribName));
            break;
        case Attribute.STRING:
            attribInfo.addElement(new Attribute(attribName, (FastVector) null));
            break;
        case Attribute.DATE:
            attribInfo.addElement(new Attribute(attribName, (String) null));
            break;
        default:
            throw new Exception("Unknown attribute type");
        }
    }
    Instances result = new Instances("QueryResult", attribInfo, instances.size());
    for (int i = 0; i < instances.size(); i++) {
        result.add((Instance) instances.elementAt(i));
    }
    rs.close();
    return result;
}

From source file:milk.experiment.MIInstancesResultListener.java

License:Open Source License

/**
 * Perform any postprocessing. When this method is called, it indicates
 * that no more results will be sent that need to be grouped together
 * in any way.//from   w  ww  .j  a  v a 2s  .c om
 *
 * @param rp the ResultProducer that generated the results
 * @exception Exception if an error occurs
 */
public void postProcess(MIResultProducer rp) throws Exception {

    if (m_RP != rp) {
        throw new Error("Unrecognized ResultProducer sending results!!");
    }
    String[] keyNames = m_RP.getKeyNames();
    String[] resultNames = m_RP.getResultNames();
    FastVector attribInfo = new FastVector();
    for (int i = 0; i < m_AttributeTypes.length; i++) {
        String attribName = "Unknown";
        if (i < keyNames.length) {
            attribName = "Key_" + keyNames[i];
        } else {
            attribName = resultNames[i - keyNames.length];
        }

        switch (m_AttributeTypes[i]) {
        case Attribute.NOMINAL:
            if (m_NominalStrings[i].size() > 0) {
                attribInfo.addElement(new Attribute(attribName, m_NominalStrings[i]));
            } else {
                attribInfo.addElement(new Attribute(attribName, (FastVector) null));
            }
            break;
        case Attribute.NUMERIC:
            attribInfo.addElement(new Attribute(attribName));
            break;
        case Attribute.STRING:
            attribInfo.addElement(new Attribute(attribName, (FastVector) null));
            break;
        default:
            throw new Exception("Unknown attribute type");
        }
    }

    Instances result = new Instances("InstanceResultListener", attribInfo, m_Instances.size());
    for (int i = 0; i < m_Instances.size(); i++) {
        result.add((Instance) m_Instances.elementAt(i));
    }

    m_Out.println(new Instances(result, 0));
    for (int i = 0; i < result.numInstances(); i++) {
        m_Out.println(result.instance(i));
    }

    if (!(m_OutputFile == null) && !(m_OutputFile.getName().equals("-"))) {
        m_Out.close();
    }
}

From source file:ml.WekaBatteryPredictionExample.java

License:Open Source License

private static Instances loadDatasetFromTxt(String txtFile) throws IOException {
    ArrayList<Attribute> atts = new ArrayList<>(2);
    atts.add(new Attribute("time_charged", Attribute.NUMERIC));
    atts.add(new Attribute("battery_lasted_time", Attribute.NUMERIC));
    Instances data = new Instances("battery-prediction-training-set", atts, 0);
    data.setClassIndex(1);/*from   w  w  w  .j av a 2  s . c  o  m*/

    File file = new File(txtFile);
    FileReader fr = new FileReader(file);
    BufferedReader br = new BufferedReader(fr);
    String line;
    while ((line = br.readLine()) != null) {
        String[] values = line.split(",");
        double[] newInst = new double[2];
        newInst[0] = Double.valueOf(values[0]);
        newInst[1] = Double.valueOf(values[1]);

        data.add(new DenseInstance(1.0, newInst));
    }
    br.close();
    fr.close();

    return data;
}

From source file:mlflex.learners.WekaLearner.java

License:Open Source License

/** Creates Weka instances from ML-Flex collections.
 *
 * @param dependentVariableInstances ML-Flex collection of dataInstances
 * @return Weka instances//from   w w w  .  ja  v a  2  s  .c o  m
 * @throws Exception
 */
private static Instances GetEvaluationInstances(Predictions predictions) throws Exception {
    FastVector wekaAttributeVector = GetAttributeVector(predictions);

    Instances wekaInstances = new Instances("DataSet", wekaAttributeVector, predictions.Size());
    wekaInstances.setClass((Attribute) wekaAttributeVector.elementAt(1));

    for (Prediction prediction : predictions.GetAll())
        wekaInstances.add(GetInstance(wekaInstances, wekaAttributeVector, prediction));

    return wekaInstances;
}

From source file:mlflex.WekaInMemoryLearner.java

License:Open Source License

/** Creates Weka instances from ML-Flex collections.
 *
 *
 * @param dependentVariableInstances Dependent variable data instances
 * @param attVector Vector of Weka attributes
 * @param instances ML-Flex collection of instances
 * @return Weka instances/*from  w  w  w.  j  av a2 s  .c o m*/
 * @throws Exception
 */
public static Instances GetInstances(DataInstanceCollection dependentVariableInstances, FastVector attVector,
        DataInstanceCollection instances) throws Exception {
    Instances wekaInstances = new Instances("DataSet", attVector, instances.Size());

    if (dependentVariableInstances != null)
        wekaInstances.setClass((Attribute) attVector.elementAt(attVector.size() - 1));

    for (DataValues instance : instances)
        wekaInstances.add(GetInstance(wekaInstances, attVector, instance, dependentVariableInstances));

    return wekaInstances;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * This is not your grandpa's E-M algorithm... it has multiple mini-steps,
 * but "The e1-m1-e2-m2-e3-m3-Algorithm" is a mouthful, so we just call it *-Means Clustering
 * {Pronounced "Any-means (necessary) clustering"}
 * @param D//  w w  w .  jav a 2  s .co  m
 * @param subclusters
 * @param maxK
 * @return score at the end of the process
 */
protected final double EMStep(List<ClusterPointPair> D, Collection<Riffle> subclusters, int maxK) {
    double ret = 0;
    // clear the pallette
    for (Riffle c : subclusters) {
        if (c.instances == null) {
            c.instances = c.getHeader();
        }
        c.instances.clear();
        c.cleanTallies();
    }

    // Assign by X's to nearest clusters (Maximization step 1)
    for (ClusterPointPair cxp : D) {
        if (this.potentialNovels.contains(cxp.x)) { // could also be if cxp.c == null, but this is safer
            continue; // ignore the outliers for a moment
        }
        final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, cxp.x);
        //            double ds[] = new double[nearestClusters.length];
        //            int foo = 0;
        //            for(NearestClusterTuple gnarf : nearestClusters) {
        //                ds[foo++] = gnarf.getDistance();
        //            }

        cxp.c = nearestClusters[0].getCluster();

        nearestClusters[0].getCluster().instances.add(cxp.x);
        if (cxp.x.weight() > 0.99) {
            nearestClusters[0].getCluster().addLabeling((int) cxp.x.classValue(), cxp.x.weight());
        }
    }

    // Find new radius (Expectation step)
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }

    // Remove empty clusters to make room for splits (Expectation-ish)
    Iterator<Riffle> cIter = subclusters.iterator();
    while (cIter.hasNext()) {
        Riffle rc = cIter.next();
        if (rc.instances.size() < 1) {
            cIter.remove();
        }
    }

    // Are we full?
    if (subclusters.size() < maxK) {
        // Fix bad clusters (Maximization step 2 - breaking up noisy clusters)
        Riffle sortedClusters[] = new Riffle[subclusters.size()];
        int tmpIdx = 0;
        for (Riffle tmpRfl : subclusters) {
            if (tmpIdx >= sortedClusters.length) {
                break;
            }
            sortedClusters[tmpIdx] = tmpRfl;
            tmpIdx++;
        }
        Arrays.sort(sortedClusters, new Comparator<Riffle>() {
            @Override
            public int compare(Riffle first, Riffle second) {
                if (first == null) {
                    return 1;
                }
                if (second == null) {
                    return -1;
                }
                double[] votes1 = first.getVotes().clone();
                double[] votes2 = second.getVotes().clone();
                double total1 = weka.core.Utils.sum(votes1);
                double total2 = weka.core.Utils.sum(votes2);
                Arrays.sort(votes1);
                Arrays.sort(votes2);
                double pentultimate1 = 1e-16 + ((votes1.length > 1) ? votes1[votes1.length - 2] : 0);
                double pentultimate2 = 1e-16 + ((votes2.length > 1) ? votes2[votes2.length - 2] : 0);
                // this is equiv to purity - margin... yea... really... it's awesome... gotta love math...
                double score1 = (total1 > 0) ? first.size() * pentultimate1 / total1 : 0;
                double score2 = (total2 > 0) ? second.size() * pentultimate2 / total2 : 0;
                return Double.compare(score2, score1);
            }
        }); // end Anon sort
        for (int cIdx = 0; cIdx < sortedClusters.length && subclusters.size() < maxK; cIdx++) {
            Riffle splitMe = sortedClusters[cIdx];
            if (splitMe.getPurity() > 0.9) {
                continue;
            }
            double[] votes = splitMe.getVotes();
            final double totalVotes = weka.core.Utils.sum(votes);
            final double critVotes = 1.0 / (votes.length * 2);
            if (totalVotes < 2) {
                continue;
            }
            ArrayList<Riffle> splitSet = new ArrayList<>(votes.length);
            int numberOfNewClusters = 0;
            for (int lblIdx = 0; lblIdx < votes.length; ++lblIdx) {
                double labelVote = votes[lblIdx] / totalVotes;
                if (labelVote >= critVotes) {
                    splitSet.add(this.createNewCluster(splitMe.toInstance()));
                    numberOfNewClusters++;
                } else {
                    splitSet.add(null);
                }
            }
            if (numberOfNewClusters < 2) {
                continue;
            }
            Instances extras = new Instances(splitMe.getHeader());
            for (Instance x : splitMe.instances) {
                if (x.weight() > 0.999) {
                    Riffle myHopefulCluster = splitSet.get((int) x.classValue());
                    if (myHopefulCluster != null) {
                        myHopefulCluster.instances.add(x);
                        myHopefulCluster.addLabeling((int) x.classValue(), x.weight());
                    } else {
                        extras.add(x);
                    }
                } else {
                    extras.add(x);
                }
            }
            LinkedList<Riffle> goodSet = new LinkedList<>();
            for (Riffle rfc : splitSet) {
                if (rfc == null) {
                    continue;
                }
                rfc.recomputeAll();
                goodSet.add(rfc);
                subclusters.add(rfc);
            }
            for (Instance x : extras) {
                final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(goodSet, x);
                nearestClusters[0].getCluster().instances.add(x);
            }
            subclusters.remove(splitMe);
        }
    }

    // The pentultimate Expectation step
    ret = 0;
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }

    // See if any outliers should actually be consumed by a cluster now... (Maximization step 3)
    Iterator<Instance> xIter = potentialNovels.iterator();
    while (xIter.hasNext()) {
        Instance xOut = xIter.next();
        final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, xOut);
        if (nearestClusters == null || nearestClusters.length < 1) {
            continue;
        }
        Riffle c = nearestClusters[0].getCluster();
        double d = nearestClusters[0].getDistance();
        if (d > c.getRadius()) { // Welcome home wayward tuple!
            c.instances.add(xOut);
            xIter.remove();
        }
    }

    // And the final Expectation step
    ret = 0;
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }
    // 
    return ret;
}