List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:org.mcennis.graphrat.algorithm.machinelearning.MultiInstanceSVM.java
License:Open Source License
protected void addInstances(Graph g, Instances dataSet, Actor artist, int skipCount, int positiveSkipCount) { int skipCounter = 0; int positiveSkipCounter = 0; for (int i = 0; i < user.length; ++i) { String result = "false"; if (g.getLink((String) parameter[3].getValue(), user[i], artist) != null) { result = "true"; }/*from w w w . j a va 2 s.c om*/ Link[] interests = g.getLinkBySource((String) parameter[4].getValue(), user[i]); if (interests != null) { for (int j = 0; j < interests.length; ++j) { Link[] music = g.getLink((String) parameter[5].getValue(), user[i], interests[j].getDestination()); Link[] given = g.getLinkBySource((String) parameter[3].getValue(), interests[j].getDestination()); if ((given != null) && (music != null)) { if (((result.contentEquals("true")) && (positiveSkipCounter % positiveSkipCount == 0)) || ((result.contentEquals("false")) && (skipCounter % skipCount == 0))) { double[] values = new double[artists.length + 3]; java.util.Arrays.fill(values, 0.0); values[0] = interests[j].getStrength(); values[1] = music[0].getStrength(); for (int k = 0; k < given.length; ++k) { values[java.util.Arrays.binarySearch(artists, given[k].getDestination()) + 2] = 1.0; } if (result.compareTo("true") == 0) { values[values.length - 1] = 1.0; } Instance instance = new SparseInstance(3 + artists.length, values); instance.setDataset(dataSet); instance.setClassValue(result); dataSet.add(instance); // System.out.println("Adding instance for user "+i); if (result.contentEquals("false")) { skipCounter++; } else { positiveSkipCounter++; } } else if (result.contentEquals("false")) { skipCounter++; } else { positiveSkipCounter++; } } } } } }
From source file:org.mcennis.graphrat.algorithm.machinelearning.SVM.java
License:Open Source License
protected void addInstances(Graph g, Instances dataSet, Actor artist, int skipCount, int positiveSkipCount) { int skipCounter = 0; int positiveSkipCounter = 0; for (int i = 0; i < user.length; ++i) { String result = "false"; if (g.getLink((String) parameter[3].getValue(), user[i], artist) != null) { result = "true"; }//w w w . j ava2 s.c o m Link[] given = g.getLinkBySource((String) parameter[3].getValue(), user[i]); if (given != null) { if (((result.contentEquals("true")) && (positiveSkipCounter % positiveSkipCount == 0)) || ((result.contentEquals("false")) && (skipCounter % skipCount == 0))) { double[] values = new double[artists.length + 1]; java.util.Arrays.fill(values, 0.0); for (int k = 0; k < given.length; ++k) { if (given[k].getDestination() == artist) { values[java.util.Arrays.binarySearch(artists, given[k].getDestination())] = Double.NaN; } else { values[java.util.Arrays.binarySearch(artists, given[k].getDestination())] = 1.0; } } if (result.compareTo("true") == 0) { values[values.length - 1] = 1.0; } Instance instance = new SparseInstance(1 + artists.length, values); instance.setDataset(dataSet); instance.setClassValue(result); dataSet.add(instance); // System.out.println("Adding instance for user "+i); if (result.contentEquals("false")) { skipCounter++; } else { positiveSkipCounter++; } } else if (result.contentEquals("false")) { skipCounter++; } else { positiveSkipCounter++; } } } }
From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierMultiAttribute.java
License:Open Source License
@Override public void execute(Graph g) { Actor[] source = g.getActor((String) parameter[1].getValue()); if (source != null) { // create the atributes for each artist FastVector sourceTypes = new FastVector(); Actor[] dest = g.getActor((String) parameter[3].getValue()); if (dest != null) { // create the Instances set backing this object Instances masterSet = null; Instance[] trainingData = new Instance[source.length]; for (int i = 0; i < source.length; ++i) { // First, acquire the instance objects for each actor Property p = null;//from w ww. ja v a 2 s. co m if ((Boolean) parameter[10].getValue()) { p = source[i].getProperty((String) parameter[2].getValue() + g.getID()); } else { p = source[i].getProperty((String) parameter[2].getValue()); } if (p != null) { Object[] values = p.getValue(); if (values.length > 0) { sourceTypes.addElement(source[i].getID()); trainingData[i] = (Instance) ((Instance) values[0]).copy(); // assume that this Instance has a backing dataset // that contains all Instance objects to be tested if (masterSet == null) { masterSet = new Instances(trainingData[i].dataset(), source.length); } masterSet.add(trainingData[i]); sourceTypes.addElement(source[i].getID()); } else { trainingData[i] = null; Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have an Instance value of property ID " + p.getType()); } } else { trainingData[i] = null; Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have a property of ID " + p.getType()); } } Vector<Attribute> destVector = new Vector<Attribute>(); for (int i = 0; i < dest.length; ++i) { FastVector type = new FastVector(); type.addElement("false"); type.addElement("true"); Attribute tmp = new Attribute(dest[i].getID(), type); destVector.add(tmp); masterSet.insertAttributeAt(tmp, masterSet.numAttributes()); } Attribute sourceID = new Attribute("sourceID", sourceTypes); masterSet.insertAttributeAt(sourceID, masterSet.numAttributes()); //set ground truth for evaluation for (int i = 0; i < masterSet.numInstances(); ++i) { Instance inst = masterSet.instance(i); Actor user = g.getActor((String) parameter[i].getValue(), sourceID.value((int) inst.value(sourceID))); if (user != null) { for (int j = 0; j < dest.length; ++j) { if (g.getLink((String) parameter[4].getValue(), user, dest[j]) != null) { inst.setValue(sourceID, "true"); } else { if ((Boolean) parameter[9].getValue()) { inst.setValue(sourceID, "false"); } else { inst.setValue(sourceID, Double.NaN); } } } } else { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, "Actor " + sourceID.value((int) inst.value(sourceID)) + " does not exist in graph"); } } // perform cross fold evaluation of each classifier in turn String[] opts = ((String) parameter[9].getValue()).split("\\s+"); Properties props = new Properties(); if ((Boolean) parameter[11].getValue()) { props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID()); } else { props.setProperty("LinkType", (String) parameter[5].getValue()); } props.setProperty("LinkClass", "Basic"); try { for (int destCount = 0; destCount < dest.length; ++destCount) { masterSet.setClass(destVector.get(destCount)); for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) { Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i); Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i); Classifier classifier = (Classifier) ((Class) parameter[7].getValue()).newInstance(); classifier.setOptions(opts); classifier.buildClassifier(train); for (int j = 0; j < test.numInstances(); ++j) { String sourceName = sourceID.value((int) test.instance(j).value(sourceID)); double result = classifier.classifyInstance(test.instance(j)); String predicted = masterSet.classAttribute().value((int) result); Link derived = LinkFactory.newInstance().create(props); derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0, g.getActor((String) parameter[3].getValue(), predicted)); g.add(derived); } } } } catch (InstantiationException ex) { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (IllegalAccessException ex) { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.SEVERE, null, ex); } } else { // dest==null Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors"); } } else { // source==null Logger.getLogger(WekaClassifierMultiAttribute.class.getName()).log(Level.WARNING, "Source mode '" + (String) parameter[2].getValue() + "' has no actors"); } }
From source file:org.mcennis.graphrat.algorithm.machinelearning.WekaClassifierOneAttribute.java
License:Open Source License
@Override public void execute(Graph g) { Actor[] source = g.getActor((String) parameter[1].getValue()); if (source != null) { // create the Instance sets for each ac FastVector classTypes = new FastVector(); FastVector sourceTypes = new FastVector(); Actor[] dest = g.getActor((String) parameter[3].getValue()); if (dest != null) { for (int i = 0; i < dest.length; ++i) { classTypes.addElement(dest[i].getID()); }/* w w w . j a v a 2 s . c o m*/ Attribute classAttribute = new Attribute((String) parameter[5].getValue(), classTypes); Instance[] trainingData = new Instance[source.length]; Instances masterSet = null; for (int i = 0; i < source.length; ++i) { // First, acquire the instance objects for each actor Property p = null; if ((Boolean) parameter[9].getValue()) { p = source[i].getProperty((String) parameter[2].getValue() + g.getID()); } else { p = source[i].getProperty((String) parameter[2].getValue()); } if (p != null) { Object[] values = p.getValue(); if (values.length > 0) { sourceTypes.addElement(source[i].getID()); trainingData[i] = (Instance) ((Instance) values[0]).copy(); // assume that this Instance has a backing dataset // that contains all Instance objects to be tested if (masterSet == null) { masterSet = new Instances(trainingData[i].dataset(), source.length); } masterSet.add(trainingData[i]); } else { trainingData[i] = null; Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have an Instance value of property ID " + p.getType()); } } else { trainingData[i] = null; Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Actor " + source[i].getType() + ":" + source[i].getID() + " does not have a property of ID " + p.getType()); } } // for every actor, fix the instance Attribute sourceID = new Attribute("sourceID", sourceTypes); masterSet.insertAttributeAt(sourceID, masterSet.numAttributes()); masterSet.insertAttributeAt(classAttribute, masterSet.numAttributes()); masterSet.setClass(classAttribute); for (int i = 0; i < source.length; ++i) { if (trainingData[i] != null) { trainingData[i].setValue(sourceID, source[i].getID()); Link[] link = g.getLinkBySource((String) parameter[4].getValue(), source[i]); if (link == null) { trainingData[i].setClassValue(Double.NaN); } else { trainingData[i].setClassValue(link[0].getDestination().getID()); } } } String[] opts = ((String) parameter[7].getValue()).split("\\s+"); Properties props = new Properties(); if ((Boolean) parameter[10].getValue()) { props.setProperty("LinkType", (String) parameter[5].getValue() + g.getID()); } else { props.setProperty("LinkType", (String) parameter[5].getValue()); } props.setProperty("LinkClass", "Basic"); try { for (int i = 0; i < (Integer) parameter[8].getValue(); ++i) { Instances test = masterSet.testCV((Integer) parameter[8].getValue(), i); Instances train = masterSet.testCV((Integer) parameter[8].getValue(), i); Classifier classifier = (Classifier) ((Class) parameter[6].getValue()).newInstance(); classifier.setOptions(opts); classifier.buildClassifier(train); for (int j = 0; j < test.numInstances(); ++j) { String sourceName = sourceID.value((int) test.instance(j).value(sourceID)); double result = classifier.classifyInstance(test.instance(j)); String predicted = masterSet.classAttribute().value((int) result); Link derived = LinkFactory.newInstance().create(props); derived.set(g.getActor((String) parameter[2].getValue(), sourceName), 1.0, g.getActor((String) parameter[3].getValue(), predicted)); g.add(derived); } } } catch (InstantiationException ex) { Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (IllegalAccessException ex) { Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.SEVERE, null, ex); } } else { // dest==null Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Ground truth mode '" + (String) parameter[3].getValue() + "' has no actors"); } } else { // source==null Logger.getLogger(WekaClassifierOneAttribute.class.getName()).log(Level.WARNING, "Source mode '" + (String) parameter[2].getValue() + "' has no actors"); } }
From source file:org.montp2.m1decol.ter.utils.WekaUtils.java
License:Open Source License
public static void createARFF(String inPath, String outPath, List<String> excludeFiles) throws IOException { FastVector atts = new FastVector(1); atts.addElement(new Attribute("data", (FastVector) null)); Instances data = new Instances("CategorizeUserForum", atts, 0); for (File file : FileUtils.ls(inPath)) { if (!excludeFiles.contains(file.getName())) { double[] newInstance = new double[1]; newInstance[0] = (double) data.attribute(0) .addStringValue(InputStreamUtils.readInputStream(file.getAbsolutePath())); data.add(new Instance(1.0, newInstance)); }//from w w w. j a v a 2 s.co m } OutputStreamUtils.writeSimple(data.toString(), outPath); }
From source file:org.openml.webapplication.algorithm.InstancesHelper.java
License:Open Source License
@SuppressWarnings("unchecked") public static void stratify(Instances dataset) { int numClasses = dataset.classAttribute().numValues(); int numInstances = dataset.numInstances(); double[] classRatios = classRatios(dataset); double[] currentRatios = new double[numClasses]; int[] currentCounts = new int[numClasses]; List<Instance>[] instancesSorted = new LinkedList[numClasses]; for (int i = 0; i < numClasses; ++i) { instancesSorted[i] = new LinkedList<Instance>(); }/*from w w w .jav a2s . co m*/ // first, sort all instances based on class in different lists for (int i = 0; i < numInstances; ++i) { Instance current = dataset.instance(i); instancesSorted[(int) current.classValue()].add(current); } // now empty the original dataset, all instances are stored in the L.L. for (int i = 0; i < numInstances; i++) { dataset.delete(dataset.numInstances() - 1); } for (int i = 0; i < numInstances; ++i) { int idx = biggestDifference(classRatios, currentRatios); dataset.add(instancesSorted[idx].remove(0)); currentCounts[idx]++; for (int j = 0; j < currentRatios.length; ++j) { currentRatios[j] = (currentCounts[j] * 1.0) / (i + 1); } } }
From source file:org.openml.webapplication.generatefolds.GenerateFolds.java
License:Open Source License
private Instances sample_splits_holdout(String name) { Instances splits = new Instances(name, am.getArffHeader(), splits_size); for (int r = 0; r < evaluationMethod.getRepeats(); ++r) { dataset.randomize(rand);/* ww w.j a va 2s. com*/ int testSetSize = Math.round(dataset.numInstances() * evaluationMethod.getPercentage() / 100); for (int i = 0; i < dataset.numInstances(); ++i) { int rowid = (int) dataset.instance(i).value(0); splits.add(am.createInstance(i >= testSetSize, rowid, r, 0)); } } return splits; }
From source file:org.openml.webapplication.generatefolds.GenerateFolds.java
License:Open Source License
private Instances sample_splits_crossvalidation(String name) { Instances splits = new Instances(name, am.getArffHeader(), splits_size); for (int r = 0; r < evaluationMethod.getRepeats(); ++r) { dataset.randomize(rand);/*from w ww.j a v a2s .c om*/ if (dataset.classAttribute().isNominal()) dataset.stratify(evaluationMethod.getFolds()); for (int f = 0; f < evaluationMethod.getFolds(); ++f) { Instances train = dataset.trainCV(evaluationMethod.getFolds(), f); Instances test = dataset.testCV(evaluationMethod.getFolds(), f); for (int i = 0; i < train.numInstances(); ++i) { int rowid = (int) train.instance(i).value(0); splits.add(am.createInstance(true, rowid, r, f)); } for (int i = 0; i < test.numInstances(); ++i) { int rowid = (int) test.instance(i).value(0); splits.add(am.createInstance(false, rowid, r, f)); } } } return splits; }
From source file:org.openml.webapplication.generatefolds.GenerateFolds.java
License:Open Source License
private Instances sample_splits_leaveoneout(String name) { Instances splits = new Instances(name, am.getArffHeader(), splits_size); for (int f = 0; f < dataset.numInstances(); ++f) { for (int i = 0; i < dataset.numInstances(); ++i) { int rowid = (int) dataset.instance(i).value(0); splits.add(am.createInstance(f != i, rowid, 0, f)); }/*from w w w . j a v a 2 s . c o m*/ } return splits; }
From source file:org.openml.webapplication.generatefolds.GenerateFolds.java
License:Open Source License
private Instances sample_splits_learningcurve(String name) { Instances splits = new Instances(name, am.getArffHeader(), splits_size); for (int r = 0; r < evaluationMethod.getRepeats(); ++r) { dataset.randomize(rand);/*from w w w. j a va 2 s. c om*/ if (dataset.classAttribute().isNominal()) InstancesHelper.stratify(dataset); // do our own stratification for (int f = 0; f < evaluationMethod.getFolds(); ++f) { Instances train = dataset.trainCV(evaluationMethod.getFolds(), f); Instances test = dataset.testCV(evaluationMethod.getFolds(), f); for (int s = 0; s < EstimationProcedure.getNumberOfSamples(train.numInstances()); ++s) { for (int i = 0; i < EstimationProcedure.sampleSize(s, train.numInstances()); ++i) { int rowid = (int) train.instance(i).value(0); splits.add(am.createInstance(true, rowid, r, f, s)); } for (int i = 0; i < test.numInstances(); ++i) { int rowid = (int) test.instance(i).value(0); splits.add(am.createInstance(false, rowid, r, f, s)); } } } } return splits; }