Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:org.mcennis.graphrat.algorithm.machinelearning.MultiInstanceSVM.java

License:Open Source License

protected void addInstances(Graph g, Instances dataSet, Actor artist, int skipCount, int positiveSkipCount) {
    int skipCounter = 0;
    int positiveSkipCounter = 0;
    for (int i = 0; i < user.length; ++i) {
        String result = "false";
        if (g.getLink((String) parameter[3].getValue(), user[i], artist) != null) {
            result = "true";
        }/*from w  w w  .  j a  va 2  s.c  om*/
        Link[] interests = g.getLinkBySource((String) parameter[4].getValue(), user[i]);
        if (interests != null) {
            for (int j = 0; j < interests.length; ++j) {
                Link[] music = g.getLink((String) parameter[5].getValue(), user[i],
                        interests[j].getDestination());
                Link[] given = g.getLinkBySource((String) parameter[3].getValue(),
                        interests[j].getDestination());
                if ((given != null) && (music != null)) {
                    if (((result.contentEquals("true")) && (positiveSkipCounter % positiveSkipCount == 0))
                            || ((result.contentEquals("false")) && (skipCounter % skipCount == 0))) {
                        double[] values = new double[artists.length + 3];
                        java.util.Arrays.fill(values, 0.0);
                        values[0] = interests[j].getStrength();
                        values[1] = music[0].getStrength();
                        for (int k = 0; k < given.length; ++k) {
                            values[java.util.Arrays.binarySearch(artists, given[k].getDestination()) + 2] = 1.0;
                        }
                        if (result.compareTo("true") == 0) {
                            values[values.length - 1] = 1.0;
                        }
                        Instance instance = new SparseInstance(3 + artists.length, values);
                        instance.setDataset(dataSet);
                        instance.setClassValue(result);
                        dataSet.add(instance);
                        //                            System.out.println("Adding instance for user "+i);
                        if (result.contentEquals("false")) {
                            skipCounter++;
                        } else {
                            positiveSkipCounter++;
                        }
                    } else if (result.contentEquals("false")) {
                        skipCounter++;
                    } else {
                        positiveSkipCounter++;
                    }
                }
            }
        }
    }
}

From source file:org.mcennis.graphrat.algorithm.machinelearning.SVM.java

License:Open Source License

protected void addInstances(Graph g, Instances dataSet, Actor artist, int skipCount, int positiveSkipCount) {
    int skipCounter = 0;
    int positiveSkipCounter = 0;
    for (int i = 0; i < user.length; ++i) {
        String result = "false";
        if (g.getLink((String) parameter[3].getValue(), user[i], artist) != null) {
            result = "true";
        }//w w  w  . j  ava2 s.c o m
        Link[] given = g.getLinkBySource((String) parameter[3].getValue(), user[i]);
        if (given != null) {
            if (((result.contentEquals("true")) && (positiveSkipCounter % positiveSkipCount == 0))
                    || ((result.contentEquals("false")) && (skipCounter % skipCount == 0))) {
                double[] values = new double[artists.length + 1];
                java.util.Arrays.fill(values, 0.0);
                for (int k = 0; k < given.length; ++k) {
                    if (given[k].getDestination() == artist) {
                        values[java.util.Arrays.binarySearch(artists, given[k].getDestination())] = Double.NaN;
                    } else {
                        values[java.util.Arrays.binarySearch(artists, given[k].getDestination())] = 1.0;
                    }
                }
                if (result.compareTo("true") == 0) {
                    values[values.length - 1] = 1.0;
                }
                Instance instance = new SparseInstance(1 + artists.length, values);
                instance.setDataset(dataSet);
                instance.setClassValue(result);
                dataSet.add(instance);
                //                            System.out.println("Adding instance for user "+i);
                if (result.contentEquals("false")) {
                    skipCounter++;
                } else {
                    positiveSkipCounter++;
                }
            } else if (result.contentEquals("false")) {
                skipCounter++;
            } else {
                positiveSkipCounter++;
            }
        }
    }
}

From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierMultiAttribute.java

License:Open Source License

@Override
public void execute(Graph g) {
    Actor[] source = g.getActor((String) parameter[1].getValue());
    if (source != null) {

        // create the atributes for each artist
        FastVector sourceTypes = new FastVector();
        Actor[] dest = g.getActor((String) parameter[3].getValue());
        if (dest != null) {
            // create the Instances set backing this object
            Instances masterSet = null;
            Instance[] trainingData = new Instance[source.length];
            for (int i = 0; i < source.length; ++i) {
                // First, acquire the instance objects for each actor
                Property p = null;//from   w  ww. ja  v a  2 s. co  m
                if ((Boolean) parameter[10].getValue()) {
                    p = source[i].getProperty((String) parameter[2].getValue() + g.getID());
                } else {
                    p = source[i].getProperty((String) parameter[2].getValue());
                }
                if (p != null) {
                    Object[] values = p.getValue();
                    if (values.length > 0) {
                        sourceTypes.addElement(source[i].getID());
                        trainingData[i] = (Instance) ((Instance) values[0]).copy();
                        // assume that this Instance has a backing dataset 
                        // that contains all Instance objects to be tested
                        if (masterSet == null) {
                            masterSet = new Instances(trainingData[i].dataset(), source.length);
                        }
                        masterSet.add(trainingData[i]);
                        sourceTypes.addElement(source[i].getID());
                    } else {
                        trainingData[i] = null;
                        Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                                "Actor " + source[i].getType() + ":" + source[i].getID()
                                        + " does not have an Instance value of property ID " + p.getType());
                    }
                } else {
                    trainingData[i] = null;
                    Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                            "Actor " + source[i].getType() + ":" + source[i].getID()
                                    + " does not have a property of ID " + p.getType());
                }
            }

            Vector<Attribute> destVector = new Vector<Attribute>();
            for (int i = 0; i < dest.length; ++i) {
                FastVector type = new FastVector();
                type.addElement("false");
                type.addElement("true");
                Attribute tmp = new Attribute(dest[i].getID(), type);
                destVector.add(tmp);
                masterSet.insertAttributeAt(tmp, masterSet.numAttributes());
            }
            Attribute sourceID = new Attribute("sourceID", sourceTypes);
            masterSet.insertAttributeAt(sourceID, masterSet.numAttributes());

            //set ground truth for evaluation
            for (int i = 0; i < masterSet.numInstances(); ++i) {
                Instance inst = masterSet.instance(i);
                Actor user = g.getActor((String) parameter[i].getValue(),
                        sourceID.value((int) inst.value(sourceID)));
                if (user != null) {
                    for (int j = 0; j < dest.length; ++j) {
                        if (g.getLink((String) parameter[4].getValue(), user, dest[j]) != null) {
                            inst.setValue(sourceID, "true");
                        } else {
                            if ((Boolean) parameter[9].getValue()) {
                                inst.setValue(sourceID, "false");
                            } else {
                                inst.setValue(sourceID, Double.NaN);
                            }
                        }
                    }
                } else {
                    Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE,
                            "Actor " + sourceID.value((int) inst.value(sourceID)) + " does not exist in graph");
                }
            }

            // perform cross fold evaluation of each classifier in turn
            String[] opts = ((String) parameter[9].getValue()).split("\\s+");
            Properties props = new Properties();
            if ((Boolean) parameter[11].getValue()) {
                props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID());
            } else {
                props.setProperty("LinkType", (String) parameter[5].getValue());
            }
            props.setProperty("LinkClass", "Basic");
            try {
                for (int destCount = 0; destCount < dest.length; ++destCount) {
                    masterSet.setClass(destVector.get(destCount));
                    for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) {
                        Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i);
                        Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i);
                        Classifier classifier = (Classifier) ((Class) parameter[7].getValue()).newInstance();
                        classifier.setOptions(opts);
                        classifier.buildClassifier(train);
                        for (int j = 0; j < test.numInstances(); ++j) {
                            String sourceName = sourceID.value((int) test.instance(j).value(sourceID));
                            double result = classifier.classifyInstance(test.instance(j));
                            String predicted = masterSet.classAttribute().value((int) result);
                            Link derived = LinkFactory.newInstance().create(props);
                            derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0,
                                    g.getActor((String) parameter[3].getValue(), predicted));
                            g.add(derived);
                        }
                    }
                }
            } catch (InstantiationException ex) {
                Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IllegalAccessException ex) {
                Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (Exception ex) {
                Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex);
            }

        } else { // dest==null
            Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                    "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors");
        }
    } else { // source==null
        Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING,
                "Source mode '" + (String) parameter[2].getValue() + "' has no actors");
    }
}

From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierOneAttribute.java

License:Open Source License

@Override
public void execute(Graph g) {
    Actor[] source = g.getActor((String) parameter[1].getValue());
    if (source != null) {

        // create the Instance sets for each ac
        FastVector classTypes = new FastVector();
        FastVector sourceTypes = new FastVector();
        Actor[] dest = g.getActor((String) parameter[3].getValue());
        if (dest != null) {
            for (int i = 0; i < dest.length; ++i) {
                classTypes.addElement(dest[i].getID());
            }/*  w  w w  . j a  v a  2 s . c  o m*/
            Attribute classAttribute = new Attribute((String) parameter[5].getValue(), classTypes);

            Instance[] trainingData = new Instance[source.length];
            Instances masterSet = null;
            for (int i = 0; i < source.length; ++i) {

                // First, acquire the instance objects for each actor
                Property p = null;
                if ((Boolean) parameter[9].getValue()) {
                    p = source[i].getProperty((String) parameter[2].getValue() + g.getID());
                } else {
                    p = source[i].getProperty((String) parameter[2].getValue());
                }
                if (p != null) {
                    Object[] values = p.getValue();
                    if (values.length > 0) {
                        sourceTypes.addElement(source[i].getID());
                        trainingData[i] = (Instance) ((Instance) values[0]).copy();
                        // assume that this Instance has a backing dataset 
                        // that contains all Instance objects to be tested
                        if (masterSet == null) {
                            masterSet = new Instances(trainingData[i].dataset(), source.length);
                        }
                        masterSet.add(trainingData[i]);
                    } else {
                        trainingData[i] = null;
                        Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                                "Actor " + source[i].getType() + ":" + source[i].getID()
                                        + " does not have an Instance value of property ID " + p.getType());
                    }
                } else {
                    trainingData[i] = null;
                    Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                            "Actor " + source[i].getType() + ":" + source[i].getID()
                                    + " does not have a property of ID " + p.getType());
                }

            } // for every actor, fix the instance
            Attribute sourceID = new Attribute("sourceID", sourceTypes);
            masterSet.insertAttributeAt(sourceID, masterSet.numAttributes());
            masterSet.insertAttributeAt(classAttribute, masterSet.numAttributes());
            masterSet.setClass(classAttribute);
            for (int i = 0; i < source.length; ++i) {
                if (trainingData[i] != null) {
                    trainingData[i].setValue(sourceID, source[i].getID());
                    Link[] link = g.getLinkBySource((String) parameter[4].getValue(), source[i]);
                    if (link == null) {
                        trainingData[i].setClassValue(Double.NaN);
                    } else {
                        trainingData[i].setClassValue(link[0].getDestination().getID());
                    }
                }
            }

            String[] opts = ((String) parameter[7].getValue()).split("\\s+");
            Properties props = new Properties();
            if ((Boolean) parameter[10].getValue()) {
                props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID());
            } else {
                props.setProperty("LinkType", (String) parameter[5].getValue());
            }
            props.setProperty("LinkClass", "Basic");
            try {
                for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) {
                    Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i);
                    Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i);
                    Classifier classifier = (Classifier) ((Class) parameter[6].getValue()).newInstance();
                    classifier.setOptions(opts);
                    classifier.buildClassifier(train);
                    for (int j = 0; j < test.numInstances(); ++j) {
                        String sourceName = sourceID.value((int) test.instance(j).value(sourceID));
                        double result = classifier.classifyInstance(test.instance(j));
                        String predicted = masterSet.classAttribute().value((int) result);
                        Link derived = LinkFactory.newInstance().create(props);
                        derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0,
                                g.getActor((String) parameter[3].getValue(), predicted));
                        g.add(derived);
                    }
                }
            } catch (InstantiationException ex) {
                Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (IllegalAccessException ex) {
                Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex);
            } catch (Exception ex) {
                Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex);
            }

        } else { // dest==null
            Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                    "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors");
        }
    } else { // source==null
        Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING,
                "Source mode '" + (String) parameter[2].getValue() + "' has no actors");
    }
}

From source file:org.montp2.m1decol.ter.utils.WekaUtils.java

License:Open Source License

public static void createARFF(String inPath, String outPath, List<String> excludeFiles) throws IOException {

    FastVector atts = new FastVector(1);
    atts.addElement(new Attribute("data", (FastVector) null));
    Instances data = new Instances("CategorizeUserForum", atts, 0);

    for (File file : FileUtils.ls(inPath)) {
        if (!excludeFiles.contains(file.getName())) {
            double[] newInstance = new double[1];
            newInstance[0] = (double) data.attribute(0)
                    .addStringValue(InputStreamUtils.readInputStream(file.getAbsolutePath()));
            data.add(new Instance(1.0, newInstance));
        }//from  w  w w. j  a v  a  2  s.co  m
    }

    OutputStreamUtils.writeSimple(data.toString(), outPath);
}

From source file:org.openml.webapplication.algorithm.InstancesHelper.java

License:Open Source License

@SuppressWarnings("unchecked")
public static void stratify(Instances dataset) {
    int numClasses = dataset.classAttribute().numValues();
    int numInstances = dataset.numInstances();
    double[] classRatios = classRatios(dataset);
    double[] currentRatios = new double[numClasses];
    int[] currentCounts = new int[numClasses];
    List<Instance>[] instancesSorted = new LinkedList[numClasses];

    for (int i = 0; i < numClasses; ++i) {
        instancesSorted[i] = new LinkedList<Instance>();
    }/*from   w  w  w .jav a2s  .  co  m*/

    // first, sort all instances based on class in different lists
    for (int i = 0; i < numInstances; ++i) {
        Instance current = dataset.instance(i);
        instancesSorted[(int) current.classValue()].add(current);
    }

    // now empty the original dataset, all instances are stored in the L.L.
    for (int i = 0; i < numInstances; i++) {
        dataset.delete(dataset.numInstances() - 1);
    }

    for (int i = 0; i < numInstances; ++i) {
        int idx = biggestDifference(classRatios, currentRatios);
        dataset.add(instancesSorted[idx].remove(0));
        currentCounts[idx]++;

        for (int j = 0; j < currentRatios.length; ++j) {
            currentRatios[j] = (currentCounts[j] * 1.0) / (i + 1);
        }
    }
}

From source file:org.openml.webapplication.generatefolds.GenerateFolds.java

License:Open Source License

private Instances sample_splits_holdout(String name) {
    Instances splits = new Instances(name, am.getArffHeader(), splits_size);
    for (int r = 0; r < evaluationMethod.getRepeats(); ++r) {
        dataset.randomize(rand);/*  ww w.j  a va 2s. com*/
        int testSetSize = Math.round(dataset.numInstances() * evaluationMethod.getPercentage() / 100);

        for (int i = 0; i < dataset.numInstances(); ++i) {
            int rowid = (int) dataset.instance(i).value(0);
            splits.add(am.createInstance(i >= testSetSize, rowid, r, 0));
        }
    }
    return splits;
}

From source file:org.openml.webapplication.generatefolds.GenerateFolds.java

License:Open Source License

private Instances sample_splits_crossvalidation(String name) {
    Instances splits = new Instances(name, am.getArffHeader(), splits_size);
    for (int r = 0; r < evaluationMethod.getRepeats(); ++r) {
        dataset.randomize(rand);/*from w  ww.j  a v a2s .c om*/
        if (dataset.classAttribute().isNominal())
            dataset.stratify(evaluationMethod.getFolds());

        for (int f = 0; f < evaluationMethod.getFolds(); ++f) {
            Instances train = dataset.trainCV(evaluationMethod.getFolds(), f);
            Instances test = dataset.testCV(evaluationMethod.getFolds(), f);

            for (int i = 0; i < train.numInstances(); ++i) {
                int rowid = (int) train.instance(i).value(0);
                splits.add(am.createInstance(true, rowid, r, f));
            }
            for (int i = 0; i < test.numInstances(); ++i) {
                int rowid = (int) test.instance(i).value(0);
                splits.add(am.createInstance(false, rowid, r, f));
            }
        }
    }
    return splits;
}

From source file:org.openml.webapplication.generatefolds.GenerateFolds.java

License:Open Source License

private Instances sample_splits_leaveoneout(String name) {
    Instances splits = new Instances(name, am.getArffHeader(), splits_size);
    for (int f = 0; f < dataset.numInstances(); ++f) {
        for (int i = 0; i < dataset.numInstances(); ++i) {
            int rowid = (int) dataset.instance(i).value(0);
            splits.add(am.createInstance(f != i, rowid, 0, f));
        }/*from w  w  w  . j a  v  a 2  s  .  c o  m*/
    }
    return splits;
}

From source file:org.openml.webapplication.generatefolds.GenerateFolds.java

License:Open Source License

private Instances sample_splits_learningcurve(String name) {
    Instances splits = new Instances(name, am.getArffHeader(), splits_size);
    for (int r = 0; r < evaluationMethod.getRepeats(); ++r) {
        dataset.randomize(rand);/*from  w w w.  j a va 2  s. c  om*/
        if (dataset.classAttribute().isNominal())
            InstancesHelper.stratify(dataset); // do our own stratification

        for (int f = 0; f < evaluationMethod.getFolds(); ++f) {
            Instances train = dataset.trainCV(evaluationMethod.getFolds(), f);
            Instances test = dataset.testCV(evaluationMethod.getFolds(), f);

            for (int s = 0; s < EstimationProcedure.getNumberOfSamples(train.numInstances()); ++s) {
                for (int i = 0; i < EstimationProcedure.sampleSize(s, train.numInstances()); ++i) {
                    int rowid = (int) train.instance(i).value(0);
                    splits.add(am.createInstance(true, rowid, r, f, s));
                }
                for (int i = 0; i < test.numInstances(); ++i) {
                    int rowid = (int) test.instance(i).value(0);
                    splits.add(am.createInstance(false, rowid, r, f, s));
                }
            }
        }
    }
    return splits;
}