Example usage for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance)

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:meka.core.MLUtils.java

License:Open Source License

/**
 * Stack two Instances together row-wise.
 *///from   w  w w .  ja  v a 2 s . c  om
public static final Instances combineInstances(Instances D1, Instances D2) {
    Instances D = new Instances(D1);
    for (int i = 0; i < D2.numInstances(); i++) {
        D.add(D2.instance(i));
    }
    return D;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset/*from  ww w. ja v a  2s . c o m*/
 * @param L         number of labels in the original dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels) 
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.PSUtils.java

License:Open Source License

/**
 * Transform instances into a multi-class representation.
 * @param D         original dataset/*from  w  w w  .  j  av  a  2s.c  om*/
 * @param L         number of labels in that dataset
 * @param cname      class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods)
 * @param p         pruning value
 * @param n         restoration value
 * @return transformed dataset
 */
public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) {
    D = new Instances(D);

    // Gather combinations
    HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L);

    // Prune combinations
    if (p > 0)
        MLUtils.pruneCountHashMap(distinctCombinations, p);

    // Check there are > 2
    if (distinctCombinations.size() <= 1 && p > 0) {
        // ... or try again if not ...
        System.err.println("[Warning] You did too much pruning, setting P = P-1");
        return PSTransformation(D, L, cname, p - 1, n);
    }

    // Create class attribute
    ArrayList<String> ClassValues = new ArrayList<String>();
    for (LabelSet y : distinctCombinations.keySet())
        ClassValues.add(y.toString());
    Attribute C = new Attribute(cname, ClassValues);

    // Insert new special attribute (which has all possible combinations of labels)
    D.insertAttributeAt(C, L);
    D.setClassIndex(L);

    //Add class values
    int N = D.numInstances();
    for (int i = 0; i < N; i++) {
        Instance x = D.instance(i);
        LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L));
        String y_string = y.toString();

        // add it
        if (ClassValues.contains(y_string)) //if its class value exists
            x.setClassValue(y_string);
        // decomp
        else if (n > 0) {
            //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n);
            LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n);
            //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations);
            if (d_subsets.length > 0) {
                // fast
                x.setClassValue(d_subsets[0].toString());
                // additional
                if (d_subsets.length > 1) {
                    for (int s_i = 1; s_i < d_subsets.length; s_i++) {
                        Instance x_ = (Instance) (x).copy();
                        x_.setClassValue(d_subsets[s_i].toString());
                        D.add(x_);
                    }
                }
            } else {
                x.setClassMissing();
            }
        }
    }

    // remove with missing class
    D.deleteWithMissingClass();

    try {
        D = F.removeLabels(D, L);
    } catch (Exception e) {
        // should never happen
    }
    D.setClassIndex(0);

    return D;
}

From source file:meka.core.Result.java

License:Open Source License

/**
 * Convert a list of Results into an Instances.
 * @param results An ArrayList of Results
 * @return   Instances/*from  w  ww  .j  a  v a  2s. co  m*/
 */
public static Instances getResultsAsInstances(ArrayList<HashMap<String, Object>> metrics) {

    HashMap<String, Object> o_master = metrics.get(0);
    ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
    for (String key : o_master.keySet()) {
        if (o_master.get(key) instanceof Double) {
            //System.out.println("key="+key);
            attInfo.add(new Attribute(key));
        }
    }

    Instances resultInstances = new Instances("Results", attInfo, metrics.size());

    for (HashMap<String, Object> o : metrics) {
        Instance rx = new DenseInstance(attInfo.size());
        for (Attribute att : attInfo) {
            String name = att.name();
            rx.setValue(att, (double) o.get(name));
        }
        resultInstances.add(rx);
    }

    //System.out.println(""+resultInstances);
    return resultInstances;

}

From source file:meka.core.SuperLabelUtils.java

License:Open Source License

/**
 * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes.
 * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'.
 * @see PSUtils.PSTransformation/*from   w ww. j a va  2  s .  com*/
 * @param indices   m by k: m super variables, each relating to k original variables
 * @param    D   either multi-label or multi-target dataset
 * @param    p   pruning value
 * @param    n   subset relpacement value
 * @return       a multi-target dataset
 */
public static Instances SLTransformation(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    // F.removeLabels(D_,L);
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y =
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    return D_;
}

From source file:meka.experiment.statisticsexporters.WekaFilter.java

License:Open Source License

/**
 * Turns the statistics into Instances./*from w  w  w.  java 2 s. co m*/
 *
 * @param stats         the statistics to convert
 * @return              the generated data
 */
protected Instances toInstances(List<EvaluationStatistics> stats) {
    Instances result;
    ArrayList<Attribute> atts;
    List<String> headers;
    Instance inst;
    double[] values;
    int i;

    // header
    headers = EvaluationStatisticsUtils.headers(stats, true, true);
    atts = new ArrayList<>();
    for (String header : headers) {
        if (header.equals(EvaluationStatistics.KEY_CLASSIFIER)
                || header.equals(EvaluationStatistics.KEY_RELATION))
            atts.add(new Attribute(header, (List) null));
        else
            atts.add(new Attribute(header));
    }
    result = new Instances("stats", atts, stats.size());

    // data
    for (EvaluationStatistics stat : stats) {
        values = new double[result.numAttributes()];
        for (i = 0; i < values.length; i++) {
            if (headers.get(i).equals(EvaluationStatistics.KEY_CLASSIFIER))
                values[i] = result.attribute(i).addStringValue(stat.getCommandLine());
            else if (headers.get(i).equals(EvaluationStatistics.KEY_RELATION))
                values[i] = result.attribute(i).addStringValue(stat.getRelation());
            else if (stat.containsKey(headers.get(i)))
                values[i] = stat.get(headers.get(i)).doubleValue();
            else
                values[i] = Utils.missingValue();
        }
        inst = new DenseInstance(1.0, values);
        result.add(inst);
    }

    return result;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'.
 * @param    D   assume attributes in D labeled by original index
 * @return       Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 *///  w w w . j  a  va2s . c om
public static Instances mergeLabels(Instances D, int indices[][], int p, int n) {

    int L = D.classIndex();
    int K = indices.length;
    ArrayList<String> values[] = new ArrayList[K];
    HashMap<String, Integer> counts[] = new HashMap[K];

    // create D_
    Instances D_ = new Instances(D);

    // clear D_
    for (int j = 0; j < L; j++) {
        D_.deleteAttributeAt(0);
    }

    // create atts
    for (int j = 0; j < K; j++) {
        int att[] = indices[j];
        //int values[] = new int[2]; //getValues(indices,D,p);
        counts[j] = getCounts(D, att, p);
        Set<String> vals = counts[j].keySet(); //getValues(D,att,p);
        values[j] = new ArrayList(vals);
        D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j);
    }

    // copy over values
    ArrayList<Integer> deleteList = new ArrayList<Integer>();
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        for (int j = 0; j < K; j++) {
            String y = encodeValue(x, indices[j]);
            try {
                D_.instance(i).setValue(j, y); // y = 
            } catch (Exception e) {
                // value not allowed
                deleteList.add(i); // mark it for deletion
                String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets
                for (int m = 0; m < y_close.length; m++) {
                    //System.out.println("add "+y_close[m]+" "+counts[j]);
                    Instance x_copy = (Instance) D_.instance(i).copy();
                    x_copy.setValue(j, y_close[m]);
                    x_copy.setWeight(1.0 / y_close.length);
                    D_.add(x_copy);
                }
            }
        }
    }
    // clean up
    Collections.sort(deleteList, Collections.reverseOrder());
    //System.out.println("Deleting "+deleteList.size()+" defunct instances.");
    for (int i : deleteList) {
        D_.delete(i);
    }
    // set class
    D_.setClassIndex(K);
    // done!
    D = null;
    return D_;
}

From source file:meka.gui.dataviewer.DataTableModel.java

License:Open Source License

/**
 * sorts the instances via the given attribute
 *
 * @param columnIndex the index of the column
 * @param ascending ascending if true, otherwise descending
 *//*  w  w w  .  ja v  a 2s.  c o m*/
public void sortInstances(int columnIndex, boolean ascending) {
    if ((columnIndex > 0) && (columnIndex < getColumnCount())) {
        addUndoPoint();
        m_Data.stableSort(columnIndex - 1);
        if (!ascending) {
            Instances reversedData = new Instances(m_Data, m_Data.numInstances());
            int i = m_Data.numInstances();
            while (i > 0) {
                i--;
                int equalCount = 1;
                while ((i > 0) && (m_Data.instance(i).value(columnIndex - 1) == m_Data.instance(i - 1)
                        .value(columnIndex - 1))) {
                    equalCount++;
                    i--;
                }
                int j = 0;
                while (j < equalCount) {
                    reversedData.add(m_Data.instance(i + j));
                    j++;
                }
            }
            m_Data = reversedData;
        }
        notifyListener(new TableModelEvent(this));
    }
}

From source file:meka.gui.explorer.classify.PredictionsOnTestset.java

License:Open Source License

/**
 * Returns the action lister to use in the menu.
 *
 * @param history   the current history//from   www.  jav  a  2  s.  c o  m
 * @param index     the selected history item
 * @return          the listener
 */
@Override
public ActionListener getActionListener(final ResultHistoryList history, final int index) {
    final MultiLabelClassifier classifier = (MultiLabelClassifier) getClassifier(history, index);
    final Instances header = getHeader(history, index);

    return new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            Runnable run = new Runnable() {
                @Override
                public void run() {
                    ClassifyTab owner = (ClassifyTab) getOwner();
                    Instances test;
                    owner.startBusy("Predictions on test...");
                    try {
                        MLUtils.prepareData(owner.getTestData());
                        test = new Instances(owner.getTestData());
                        test.setClassIndex(owner.getTestData().classIndex());
                        String msg = header.equalHeadersMsg(test);
                        if (msg != null)
                            throw new IllegalArgumentException(
                                    "Model's training set and current test set are not compatible:\n" + msg);
                        // collect predictions
                        Instances predicted = new Instances(test, 0);
                        for (int i = 0; i < test.numInstances(); i++) {
                            double pred[] = classifier.distributionForInstance(test.instance(i));
                            // Cut off any [no-longer-needed] probabalistic information from MT classifiers.
                            if (classifier instanceof MultiTargetClassifier)
                                pred = Arrays.copyOf(pred, test.classIndex());
                            Instance predInst = (Instance) test.instance(i).copy();
                            for (int j = 0; j < pred.length; j++)
                                predInst.setValue(j, pred[j]);
                            predicted.add(predInst);
                            if ((i + 1) % 100 == 0)
                                owner.showStatus(
                                        "Predictions on test (" + (i + 1) + "/" + test.numInstances() + ")...");
                        }
                        owner.finishBusy();
                        // display predictions
                        DataViewerDialog dialog = new DataViewerDialog(GUIHelper.getParentFrame(owner),
                                ModalityType.MODELESS);
                        dialog.setDefaultCloseOperation(DataViewerDialog.DISPOSE_ON_CLOSE);
                        dialog.setInstances(predicted);
                        dialog.setSize(800, 600);
                        dialog.setLocationRelativeTo(owner);
                        dialog.setVisible(true);
                    } catch (Exception e) {
                        owner.handleException("Predictions failed on test set:", e);
                        owner.finishBusy("Predictions failed: " + e);
                        JOptionPane.showMessageDialog(owner, "Predictions failed:\n" + e, "Error",
                                JOptionPane.ERROR_MESSAGE);
                    }
                }
            };
            ((ClassifyTab) getOwner()).start(run);
        }
    };
}

From source file:MetaBlocking.EnhancedMetaBlocking.FastImplementations.RedefinedCardinalityNodePruning.java

License:Open Source License

protected void verifyValidEntities(int entityId, Instances trainingInstances) {
    if (validEntities.isEmpty()) {
        return;//from  w w w  . ja v  a 2 s. co m
    }

    topKEdges.clear();
    minimumWeight = Double.MIN_VALUE;
    Iterator<Integer> it = validEntitiesNeighbor.iterator();
    for (int neighborId : validEntities) {
        // System.out.println("comparison A" + entityId +"   "+ neighborId);
        // if(entityId==2516)
        //    System.out.println("2516 ---");
        double weight = getWeight(entityId, neighborId);
        int blockId = it.next();

        if (neighborId == 6792)
            System.out.println("ok");
        if (weight < minimumWeight) {
            continue;
        }

        Comparison comparison = getComparison(entityId, neighborId);

        comparison.setUtilityMeasure(weight);
        comparison.blockId = blockId;
        topKEdges.add(comparison);
        if (threshold < topKEdges.size()) {
            Comparison lastComparison = topKEdges.poll();
            minimumWeight = lastComparison.getUtilityMeasure();
        }
    }

    nearestEntities[entityId] = new HashSet<Comparison>(topKEdges);
    Iterator<Comparison> itb = nearestEntities[entityId].iterator();
    while (itb.hasNext()) {
        Comparison c = itb.next();
        int neighborId_clean;
        int neighborId = c.getEntityId1() == entityId ? c.getEntityId2() : c.getEntityId1();
        neighborId_clean = neighborId;
        if (neighborId_clean == 6792 || neighborId == 6792)
            System.out.println("ok");
        if (cleanCleanER && entityId < datasetLimit) {
            neighborId += datasetLimit;
        }
        //
        //            if (nearestEntities[neighborId] == null) {
        //                continue;
        //            }
        //
        //            if (nearestEntities[neighborId].contains(c)) {
        //                if(! (entityId < neighborId))
        //                   continue;
        //            }

        // System.out.println(entityId +" "+ neighborId);
        //            if(entityId>datasetLimit){
        //              int temp=neighborId_clean;
        //              neighborId=entityId;
        //              entityId=temp;
        //           }
        Comparison comp = new Comparison(true, entityId, neighborId_clean);

        final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(c.blockId, comp);
        if (commonBlockIndices == null)
            continue;

        double[] instanceValues = new double[8];

        double ibf1 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(entityId, 0));
        double ibf2 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(neighborId, 0));

        instanceValues[0] = commonBlockIndices.size() * ibf1 * ibf2;

        double raccb = 0;
        for (Integer index1 : commonBlockIndices) {
            raccb += 1.0 / comparisonsPerBlock[index1];
        }
        if (raccb < 1.0E-6) {
            raccb = 1.0E-6;
        }
        instanceValues[1] = raccb;
        instanceValues[2] = commonBlockIndices.size()
                / (redundantCPE[entityId] + redundantCPE[neighborId] - commonBlockIndices.size());
        instanceValues[3] = nonRedundantCPE[entityId];
        instanceValues[4] = nonRedundantCPE[neighborId];
        //      instanceValues[5] =   ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2());

        instanceValues[5] = neighborId;
        instanceValues[6] = entityId;//c.getUtilityMeasure(); 
        //(Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) *  getWeight(c.getEntityId1(), c.getEntityId2()+datasetLimit);

        instanceValues[7] = adp.isSuperfluous(c) == true ? 0 : 1;//adp.isSuperfluous(getComparison(c.getEntityId1(), c.getEntityId2()+datasetLimit))?1:0;

        Instance newInstance = new DenseInstance(1.0, instanceValues);
        newInstance.setDataset(trainingInstances);
        trainingInstances.add(newInstance);
        //         for (int i = 5; i < instanceValues.length-1; i++) {
        //            System.out.print(instanceValues[i] +" ");
        //         }
        //           System.out.println();
        //         if(instanceValues[6]!=instanceValues[5])
        //            System.out.println("erro");
        //         else
        //            System.out.print("...");
    }

}