List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:org.conqat.engine.commons.machine_learning.DataSetCreator.java
License:Apache License
/** * Creates a weka instance for the given classification object and the given * label and adds it to the given data set. *///from www. j a v a 2s.com private Instance createInstance(T classificationObject, LABEL label, Instances dataSet) { Instance instance = instanceCreator.createWekaInstance(classificationObject, label); dataSet.add(instance); instance.setDataset(dataSet); return instance; }
From source file:org.goai.classification.impl.WekaClassifier.java
License:Apache License
/** * Converts map to weka data set//from w w w . j a v a 2s . c o m * @param itemClassMap Map<double[], String> * @return Instances Weka data set */ public Instances convertItemClassMapToInstances(Map<double[], String> itemClassMap) { if (itemClassMap.isEmpty()) { throw new RuntimeException("Map should have at least one element!"); } //Get first row as example for mapping attributes from sample Map.Entry<double[], String> row = itemClassMap.entrySet().iterator().next(); //Number of attributes without class attribute int numOfAttr = row.getKey().length; //possible class values fillClassValues(itemClassMap); //Sample size int capacity = itemClassMap.entrySet().size(); //Create empty Instances data set Instances newDataSet = createEmptyInstancesDataSet(numOfAttr, capacity); //Set class attribute index newDataSet.setClassIndex(numOfAttr); //Iterating through sample rows for (Map.Entry<double[], String> entry : itemClassMap.entrySet()) { //double array of values for particular class as String double[] el = entry.getKey(); String klasa = entry.getValue(); //Instance of double array for values with class attribute value double[] rowValues = new double[numOfAttr + 1]; //Values copy of common attributs for (int i = 0; i < numOfAttr; i++) { rowValues[i] = el[i]; } //Double value copy of class attribute rowValues[numOfAttr] = classVals.get(klasa); //dataRow as instance of DenseInstance class, 1 as instance weight and values of all attributes Instance dataRow = new DenseInstance(1, rowValues); dataRow.setDataset(newDataSet); newDataSet.add(dataRow); } return newDataSet; }
From source file:org.hypknowsys.wumprep.WUMprepWrapper.java
License:Open Source License
/** * Creates a dummy dataset from the input format, sends it to the script and * reads the script output's ARFF information that in turn is used to set * <code>this</code>' output format. * /*from w w w . j av a 2s. c o m*/ * This mechanism allows a WUMprep script to alter the recordset layout as * long as this change is documented by the output ARFF header. For example, * the <tt>dnsLookup.pl</tt> script changes the <code>host_ip</code> field * to <code>host_dns</code> when performing IP lookups. * * @param instanceInfo * The input format. * @return Object containing the output instance structure. */ public Instances getScriptOutputFormat(Instances instanceInfo) { Instances outputFormat = instanceInfo; Instances testData = new Instances(instanceInfo); Instance testInstance = new Instance(testData.numAttributes()); testData.delete(); testInstance.setDataset(testData); // Initialize the testInstance's attribute values for (int i = 0; i < testInstance.numAttributes(); i++) { String aName = testInstance.attribute(i).name(); if (aName.equals("host_ip")) testInstance.setValue(i, "127.0.0.1"); else if (aName.equals("ts_day")) testInstance.setValue(i, "01"); else if (aName.equals("ts_month")) testInstance.setValue(i, "Jan"); else if (aName.equals("ts_year")) testInstance.setValue(i, "2005"); else if (aName.equals("ts_hour")) testInstance.setValue(i, "11"); else if (aName.equals("ts_minutes")) testInstance.setValue(i, "55"); else if (aName.equals("ts_seconds")) testInstance.setValue(i, "00"); else if (aName.equals("tz")) testInstance.setValue(i, "+0200"); else testInstance.setValue(i, aName + "-dummy"); } testData.add(testInstance); WUMprepWrapper testWrapper = new WUMprepWrapper(m_scriptName, m_args); testWrapper.start(); testWrapper.push(testData.toString()); testWrapper.push((Instance) null); class ErrorReader extends Thread implements Serializable { /** */ private static final long serialVersionUID = -488779846603045891L; PipedReader m_input = null; /** * Helper class for reading stderr output from the WUMprep script * * @param input The script's wrapper's stderr pipe reader */ ErrorReader(PipedReader input) { m_input = input; this.start(); } public void run() { try { while (m_input.read() >= 0) ; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } // read the stderr output new ErrorReader(testWrapper.getErrorPipe()); try { // ignore the stderr output outputFormat = new org.hypknowsys.wumprep4weka.core.Instances(testWrapper.getOutputPipe()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return outputFormat; }
From source file:org.iobserve.analysis.behavior.filter.ClusterMerger.java
License:Apache License
@Override protected void execute(final Map<Integer, List<Pair<Instance, Double>>> clustering) throws Exception { /**// ww w .jav a2 s . c o m * simply pick the first instance of every cluster lookup attributes to build a new * instances Object */ Instance instance = clustering.entrySet().iterator().next().getValue().get(0).getElement1(); final FastVector attributes = new FastVector(); for (int j = 0; j < instance.numAttributes(); j++) { attributes.addElement(instance.attribute(j)); } final Instances result = new Instances("Clustering Result", attributes, clustering.size()); for (final List<Pair<Instance, Double>> entry : clustering.values()) { if (!entry.isEmpty()) { instance = entry.get(0).getElement1(); result.add(instance); } } if (ClusterMerger.LOGGER.isDebugEnabled()) { this.printInstances(result); } this.outputPort.send(result); }
From source file:org.iobserve.analysis.behavior.karlsruhe.AbstractClustering.java
License:Apache License
/** * It transforms the user sessions(userSessions in form of counts of their called operation * signatures) to Weka instances that can be used for the clustering. * * @param countModel// w w w . ja v a 2 s . c o m * contains the userSessions in form of counts of called operation signatures * @param listOfDistinctOperationSignatures * contains the extracted distinct operation signatures of the input * entryCallSequenceModel * @return the Weka instances that hold the data that is used for the clustering */ protected Instances createInstances(final List<UserSessionAsCountsOfCalls> countModel, final List<String> listOfDistinctOperationSignatures) { final int numberOfDistinctOperationSignatures = listOfDistinctOperationSignatures.size(); final FastVector fvWekaAttributes = new FastVector(numberOfDistinctOperationSignatures); for (int i = 0; i < numberOfDistinctOperationSignatures; i++) { final String attributeName = "Attribute" + i; final Attribute attribute = new Attribute(attributeName); fvWekaAttributes.addElement(attribute); } final Instances clusterSet = new Instances("CallCounts", fvWekaAttributes, countModel.size()); for (final UserSessionAsCountsOfCalls userSession : countModel) { int indexOfAttribute = 0; final Instance instance = new Instance(numberOfDistinctOperationSignatures); for (int row = 0; row < listOfDistinctOperationSignatures.size(); row++) { instance.setValue((Attribute) fvWekaAttributes.elementAt(indexOfAttribute), userSession.getAbsoluteCountOfCalls()[row]); indexOfAttribute++; } clusterSet.add(instance); } return clusterSet; }
From source file:org.iobserve.analysis.behavior.models.data.BehaviorModelTable.java
License:Apache License
/** * create an Instances object for clustering. * * @return instance//from w w w. jav a 2s .co m */ public Instances toInstances() { final FastVector fastVector = new FastVector(); // add transitions for (int i = 0; i < this.signatures.size(); i++) { for (int j = 0; j < this.signatures.size(); j++) { if (this.transitions[i][j] > AbstractBehaviorModelTable.TRANSITION_THRESHOLD) { final Attribute attribute = new Attribute( AbstractBehaviorModelTable.EDGE_INDICATOR + this.inverseSignatures[i] + AbstractBehaviorModelTable.EDGE_DIVIDER + this.inverseSignatures[j]); fastVector.addElement(attribute); } else { continue; } } } // add informations this.signatures.values().stream().forEach(pair -> Arrays.stream(pair.getSecond()) .forEach(callInformation -> fastVector.addElement(new Attribute( AbstractBehaviorModelTable.INFORMATION_INDICATOR + this.inverseSignatures[pair.getFirst()] + AbstractBehaviorModelTable.INFORMATION_DIVIDER + callInformation.getSignature())))); // TODO name final Instances instances = new Instances("Test", fastVector, 0); final Instance instance = this.toInstance(); instances.add(instance); return instances; }
From source file:org.isep.simizer.example.policy.utils.IterativeSimpleKMeans.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options.//from ww w . j a v a2 s . co m * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ public void buildClusterer(Instances data) throws Exception { // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_FullMissingCounts = new int[instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = new double[instances.numAttributes()]; } m_FullNominalCounts = new int[instances.numAttributes()][0]; m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false); for (int i = 0; i < instances.numAttributes(); i++) { m_FullMissingCounts[i] = instances.attributeStats(i).missingCount; if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(instances.variance(i)); } if (m_FullMissingCounts[i] == instances.numInstances()) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts; if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common value } } } // Modified to account for already set centroids if (m_ClusterCentroids == null) { m_ClusterCentroids = new Instances(instances, m_NumClusters); } int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap initC = new HashMap(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } // Modified to account for already set centroids if (m_ClusterCentroids.numInstances() > 0) { initC = this.centersMap; for (int i = 0; i < m_NumClusters; i++) initInstances.add(m_ClusterCentroids.instance(i)); } else { //part de la fin du Data Set. swappe le centre identifi avec la derniere for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } } m_NumClusters = m_ClusterCentroids.numInstances(); //removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()]; while (!converged) { emptyClusterCount = 0; m_Iterations++; converged = true; for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, true); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true); } } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index++] = tempI[k]; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0]; } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new int[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = new double[instances.numAttributes()]; for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(tempI[i].variance(j)); } else { vals2[j] = Instance.missingValue(); } } m_ClusterStdDevs.add(new Instance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].numInstances(); } }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java
License:Open Source License
/** * {@inheritDoc}/* w ww .j a v a2 s .c o m*/ */ @Override protected void extractContourData(int[] translations, int[] permutation) { SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setNumClusters(m_numClusters); // cluster the data ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); for (int a = 0; a < contourDataGrid().numFeatures(); a++) { attInfo.add(new Attribute("att" + a)); } Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors()); for (double[] vec : contourDataGrid()) { data.add(new DenseInstance(1.0, vec)); } clusterer.buildClusterer(data); // create clustered images p(C|x) Img[] imgs = new Img[m_numClusters]; int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() }; Cursor<FloatType>[] cursors = new Cursor[m_numClusters]; for (int i = 0; i < imgs.length; i++) { imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType()); cursors[i] = imgs[i].localizingCursor(); } int cluster; for (Instance instance : data) { for (int i = 0; i < cursors.length; i++) { cursors[i].fwd(); } cluster = clusterer.clusterInstance(instance); cursors[cluster].get().set(1.0f); } // greedily select the best cluster combination starting with all // clusters together and then removing the one whose removal // maximises the score of the remaining clusters Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType()); Cursor<FloatType> resC = res.cursor(); while (resC.hasNext()) { resC.fwd(); resC.get().set(1.0f); } Img<FloatType> tmp = res.factory().create(res, new FloatType()); // TODO: normalize img // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm = // new NormalizeIterableInterval<FloatType, Img<FloatType>>(); double score = 0; double bestScore = -Double.MAX_VALUE; double globalBestScore = -Double.MAX_VALUE; int bestCluster = 0; // ShowInSameFrame showInFrame = new ShowInSameFrame(); for (int i = 0; i < m_numClusters; i++) { for (int j = 0; j < m_numClusters; j++) { if (imgs[j] != null) { substract(res, imgs[j], tmp); score = calcScore(tmp, m_bias); if (score > bestScore) { bestScore = score; bestCluster = j; } } } substract(res, imgs[bestCluster], res); imgs[bestCluster] = null; // Pair<FloatType, FloatType> minmax = // Operations.compute(new MinMax<FloatType>(), tmp); // Operations.<FloatType, FloatType> map( // new Normalize<FloatType>(minmax.getA().getRealDouble(), // minmax.getB().getRealDouble(), // -Float.MAX_VALUE, Float.MAX_VALUE)).compute( // tmp, tmp); // showInFrame.show(tmp, 2.0); if (bestScore < globalBestScore) { break; } globalBestScore = bestScore; bestScore = -Double.MAX_VALUE; } // calculate the translations (mean positions) resC = res.localizingCursor(); double meanPos = 0; double num = 0; int index = 0; while (resC.hasNext()) { resC.fwd(); meanPos += resC.get().get() * resC.getDoublePosition(0); num += resC.get().get(); index++; if ((index % res.dimension(0)) == 0) { if (num > 0) { translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num) - CENTER_COL; } else { // setWeight((int)((index - 1) / res.dimension(0)), 0); translations[(int) ((index - 1) / res.dimension(0))] = 0; } meanPos = 0; num = 0; } } } catch (Exception e) { // TODO Auto-generated catch block } }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java
License:Open Source License
/** * {@inheritDoc}/*from w w w .jav a 2s . c om*/ */ @Override public void buildClassifier(Instances miData) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(miData); final Instances tmpMiData = new Instances(miData); final Instances flatData = toSingleInstanceDataset(miData, null); int numPosBags = 0; for (Instance bag : miData) { if (bag.value(2) == 1) { numPosBags++; } } int remainingNumPosBags = numPosBags; Future<Pair<IntervalRule, Double>>[] futures = new Future[m_numThreads]; ExecutorService pool = Executors.newFixedThreadPool(m_numThreads); while (remainingNumPosBags / (double) numPosBags > 1 - m_coverRate) { final int numIterations = ((int) (m_sampleRate * remainingNumPosBags)) / m_numThreads + 1; for (int t = 0; t < m_numThreads; t++) { futures[t] = pool.submit(new Callable<Pair<IntervalRule, Double>>() { @Override public Pair<IntervalRule, Double> call() throws Exception { return createRule(flatData, tmpMiData, numIterations); } }); } // select the best rule from the threads double score = -Double.MAX_VALUE; IntervalRule rule = null; for (int f = 0; f < futures.length; f++) { if (futures[f].get().getB() > score) { score = futures[f].get().getB(); rule = futures[f].get().getA(); } } m_rules.add(rule); // only keep the bags whose instances are not covered by this rule Instances tmp = new Instances(tmpMiData); tmpMiData.clear(); boolean covered; remainingNumPosBags = 0; for (Instance bag : tmp) { covered = false; for (Instance inst : bag.relationalValue(1)) { double[] distr; distr = rule.distributionForInstance(inst); if (distr[1] > distr[0]) { covered = true; break; } } if (!covered) { tmpMiData.add(bag); if (bag.value(2) == 1) { remainingNumPosBags++; } } } flatData.clear(); toSingleInstanceDataset(tmpMiData, flatData); } pool.shutdown(); }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java
License:Open Source License
private Instances toSingleInstanceDataset(Instances miData, Instances flatData) throws Exception { MultiInstanceToPropositional convertToProp = new MultiInstanceToPropositional(); convertToProp.setInputFormat(miData); for (int i = 0; i < miData.numInstances(); i++) { convertToProp.input(miData.instance(i)); }/* w w w.j a va2 s . c o m*/ convertToProp.batchFinished(); if (flatData == null) { flatData = convertToProp.getOutputFormat(); flatData.deleteAttributeAt(0); // remove the bag index attribute } Instance processed; while ((processed = convertToProp.output()) != null) { processed.setDataset(null); processed.deleteAttributeAt(0); // remove the bag index attribute flatData.add(processed); } // remove class attribute // flatData.setClassIndex(-1); // flatData.deleteAttributeAt(flatData.numAttributes() - 1); // set weights int instanceIdx = 0; for (Instance bag : miData) { for (Instance instance : bag.relationalValue(1)) { flatData.get(instanceIdx).setWeight(instance.weight()); instanceIdx++; } } return flatData; }