List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:meka.core.MLUtils.java
License:Open Source License
/** * SetTemplate - returns a copy of x_template, set with x's attributes, and set to dataset D_template (of which x_template) is a template of this. * This function is very useful when Weka throws a strange IndexOutOfBounds exception for setTemplate(x,Template) *//*from w w w .j av a 2 s . c o m*/ public static final Instance setTemplate(Instance x, Instance x_template, Instances D_template) { Instance x_ = (Instance) x_template.copy(); int L_y = x.classIndex(); int L_z = D_template.classIndex(); // copy over x space MLUtils.copyValues(x_, x, L_y, L_z); // set class values to missing MLUtils.setLabelsMissing(x_, L_z); // set dataset x_.setDataset(D_template); return x_; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Convert a multi-label instance into a multi-class instance, according to a template. *//*from w w w. j av a 2s. c o m*/ public static Instance convertInstance(Instance x, int L, Instances template) { Instance x_ = (Instance) x.copy(); x_.setDataset(null); for (int i = 0; i < L; i++) x_.deleteAttributeAt(0); x_.insertAttributeAt(0); x_.setDataset(template); return x_; }
From source file:MetaBlocking.EnhancedMetaBlocking.FastImplementations.RedefinedCardinalityNodePruning.java
License:Open Source License
protected void verifyValidEntities(int entityId, Instances trainingInstances) { if (validEntities.isEmpty()) { return;/*w ww. ja va 2s. c o m*/ } topKEdges.clear(); minimumWeight = Double.MIN_VALUE; Iterator<Integer> it = validEntitiesNeighbor.iterator(); for (int neighborId : validEntities) { // System.out.println("comparison A" + entityId +" "+ neighborId); // if(entityId==2516) // System.out.println("2516 ---"); double weight = getWeight(entityId, neighborId); int blockId = it.next(); if (neighborId == 6792) System.out.println("ok"); if (weight < minimumWeight) { continue; } Comparison comparison = getComparison(entityId, neighborId); comparison.setUtilityMeasure(weight); comparison.blockId = blockId; topKEdges.add(comparison); if (threshold < topKEdges.size()) { Comparison lastComparison = topKEdges.poll(); minimumWeight = lastComparison.getUtilityMeasure(); } } nearestEntities[entityId] = new HashSet<Comparison>(topKEdges); Iterator<Comparison> itb = nearestEntities[entityId].iterator(); while (itb.hasNext()) { Comparison c = itb.next(); int neighborId_clean; int neighborId = c.getEntityId1() == entityId ? c.getEntityId2() : c.getEntityId1(); neighborId_clean = neighborId; if (neighborId_clean == 6792 || neighborId == 6792) System.out.println("ok"); if (cleanCleanER && entityId < datasetLimit) { neighborId += datasetLimit; } // // if (nearestEntities[neighborId] == null) { // continue; // } // // if (nearestEntities[neighborId].contains(c)) { // if(! (entityId < neighborId)) // continue; // } // System.out.println(entityId +" "+ neighborId); // if(entityId>datasetLimit){ // int temp=neighborId_clean; // neighborId=entityId; // entityId=temp; // } Comparison comp = new Comparison(true, entityId, neighborId_clean); final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(c.blockId, comp); if (commonBlockIndices == null) continue; double[] instanceValues = new double[8]; double ibf1 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(entityId, 0)); double ibf2 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(neighborId, 0)); instanceValues[0] = commonBlockIndices.size() * ibf1 * ibf2; double raccb = 0; for (Integer index1 : commonBlockIndices) { raccb += 1.0 / comparisonsPerBlock[index1]; } if (raccb < 1.0E-6) { raccb = 1.0E-6; } instanceValues[1] = raccb; instanceValues[2] = commonBlockIndices.size() / (redundantCPE[entityId] + redundantCPE[neighborId] - commonBlockIndices.size()); instanceValues[3] = nonRedundantCPE[entityId]; instanceValues[4] = nonRedundantCPE[neighborId]; // instanceValues[5] = ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2()); instanceValues[5] = neighborId; instanceValues[6] = entityId;//c.getUtilityMeasure(); //(Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) * getWeight(c.getEntityId1(), c.getEntityId2()+datasetLimit); instanceValues[7] = adp.isSuperfluous(c) == true ? 0 : 1;//adp.isSuperfluous(getComparison(c.getEntityId1(), c.getEntityId2()+datasetLimit))?1:0; Instance newInstance = new DenseInstance(1.0, instanceValues); newInstance.setDataset(trainingInstances); trainingInstances.add(newInstance); // for (int i = 5; i < instanceValues.length-1; i++) { // System.out.print(instanceValues[i] +" "); // } // System.out.println(); // if(instanceValues[6]!=instanceValues[5]) // System.out.println("erro"); // else // System.out.print("..."); } }
From source file:milk.classifiers.MIBoost.java
License:Open Source License
/** * Builds the classifier/*from ww w . j a v a2s . co m*/ * * @param train the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Exemplars exps) throws Exception { Exemplars train = new Exemplars(exps); if (train.classAttribute().type() != Attribute.NOMINAL) { throw new Exception("Class attribute must be nominal."); } if (train.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } m_ClassIndex = train.classIndex(); m_IdIndex = train.idIndex(); m_NumClasses = train.numClasses(); m_NumIterations = m_MaxIterations; if (m_NumClasses > 2) { throw new Exception("Not yet prepared to deal with multiple classes!"); } if (m_Classifier == null) throw new Exception("A base classifier has not been specified!"); if (!(m_Classifier instanceof WeightedInstancesHandler)) throw new Exception("Base classifier cannot handle weighted instances!"); m_Models = Classifier.makeCopies(m_Classifier, getMaxIterations()); if (m_Debug) System.err.println("Base classifier: " + m_Classifier.getClass().getName()); m_Beta = new double[m_NumIterations]; m_Attributes = new Instances(train.exemplar(0).getInstances(), 0); double N = (double) train.numExemplars(), sumNi = 0; Instances data = new Instances(m_Attributes, 0);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); // Initialize weights for (int i = 0; i < N; i++) sumNi += train.exemplar(i).getInstances().numInstances(); for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); exi.setWeight(sumNi / N); Instances insts = exi.getInstances(); double ni = (double) insts.numInstances(); for (int j = 0; j < ni; j++) { Instance ins = new Instance(insts.instance(j));// Copy //insts.instance(j).setWeight(1.0); ins.deleteAttributeAt(m_IdIndex); ins.setDataset(dataset); ins.setWeight(exi.weight() / ni); data.add(ins); } } // Assume the order of the instances are preserved in the Discretize filter if (m_DiscretizeBin > 0) { m_Filter = new Discretize(); m_Filter.setInputFormat(new Instances(data, 0)); m_Filter.setBins(m_DiscretizeBin); data = Filter.useFilter(data, m_Filter); } // Main algorithm int dataIdx; iterations: for (int m = 0; m < m_MaxIterations; m++) { if (m_Debug) System.err.println("\nIteration " + m); // Build a model m_Models[m].buildClassifier(data); // Prediction of each bag double[] err = new double[(int) N], weights = new double[(int) N]; boolean perfect = true, tooWrong = true; dataIdx = 0; for (int n = 0; n < N; n++) { Exemplar exn = train.exemplar(n); // Prediction of each instance and the predicted class distribution // of the bag double nn = (double) exn.getInstances().numInstances(); for (int p = 0; p < nn; p++) { Instance testIns = data.instance(dataIdx++); if ((int) m_Models[m].classifyInstance(testIns) != (int) exn.classValue()) // Weighted instance-wise 0-1 errors err[n]++; } weights[n] = exn.weight(); err[n] /= nn; if (err[n] > 0.5) perfect = false; if (err[n] < 0.5) tooWrong = false; } if (perfect || tooWrong) { // No or 100% classification error, cannot find beta if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("No errors"); break iterations; } double[] x = new double[1]; x[0] = 0; double[][] b = new double[2][x.length]; b[0][0] = Double.NaN; b[1][0] = Double.NaN; OptEng opt = new OptEng(); opt.setWeights(weights); opt.setErrs(err); //opt.setDebug(m_Debug); if (m_Debug) System.out.println("Start searching for c... "); x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println("Finished."); m_Beta[m] = x[0]; if (m_Debug) System.err.println("c = " + m_Beta[m]); // Stop if error too small or error too big and ignore this model if (Double.isInfinite(m_Beta[m]) || Utils.smOrEq(m_Beta[m], 0)) { if (m == 0) m_Beta[m] = 1.0; else m_Beta[m] = 0; m_NumIterations = m + 1; if (m_Debug) System.err.println("Errors out of range!"); break iterations; } // Update weights of data and class label of wfData dataIdx = 0; double totWeights = 0; for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); exr.setWeight(weights[r] * Math.exp(m_Beta[m] * (2.0 * err[r] - 1.0))); totWeights += exr.weight(); } if (m_Debug) System.err.println("Total weights = " + totWeights); for (int r = 0; r < N; r++) { Exemplar exr = train.exemplar(r); double num = (double) exr.getInstances().numInstances(); exr.setWeight(sumNi * exr.weight() / totWeights); //if(m_Debug) // System.err.print("\nExemplar "+r+"="+exr.weight()+": \t"); for (int s = 0; s < num; s++) { Instance inss = data.instance(dataIdx); inss.setWeight(exr.weight() / num); // if(m_Debug) // System.err.print("instance "+s+"="+inss.weight()+ // "|ew*iw*sumNi="+data.instance(dataIdx).weight()+"\t"); if (Double.isNaN(inss.weight())) throw new Exception("instance " + s + " in bag " + r + " has weight NaN!"); dataIdx++; } //if(m_Debug) // System.err.println(); } } }
From source file:milk.classifiers.MIWrapper.java
License:Open Source License
public Instances transform(Exemplars train) throws Exception { Instances data = new Instances(m_Attributes);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); double sumNi = 0, // Total number of instances N = train.numExemplars(); // Number of exemplars for (int i = 0; i < N; i++) sumNi += train.exemplar(i).getInstances().numInstances(); // Initialize weights for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); // m_Prior[(int)exi.classValue()]++; Instances insts = exi.getInstances(); double ni = (double) insts.numInstances(); for (int j = 0; j < ni; j++) { Instance ins = new Instance(insts.instance(j));// Copy ins.deleteAttributeAt(m_IdIndex); ins.setDataset(dataset); ins.setWeight(sumNi / (N * ni)); //ins.setWeight(1); data.add(ins);//from www . j a va 2 s . c o m } } return data; }
From source file:milk.classifiers.SimpleMI.java
License:Open Source License
public Instances transform(Exemplars train) throws Exception { Instances data = new Instances(m_Attributes);// Data to learn a model data.deleteAttributeAt(m_IdIndex);// ID attribute useless Instances dataset = new Instances(data, 0); Instance template = new Instance(dataset.numAttributes()); template.setDataset(dataset); double N = train.numExemplars(); // Number of exemplars for (int i = 0; i < N; i++) { Exemplar exi = train.exemplar(i); Instances insts = exi.getInstances(); int attIdx = 0; Instance newIns = new Instance(template); newIns.setDataset(dataset);//from ww w . java 2 s.c o m for (int j = 0; j < insts.numAttributes(); j++) { if ((j == m_IdIndex) || (j == m_ClassIndex)) continue; double value; if (m_TransformMethod == 1) { value = insts.meanOrMode(j); } else { double[] minimax = minimax(insts, j); value = (minimax[0] + minimax[1]) / 2.0; } newIns.setValue(attIdx++, value); } newIns.setClassValue(exi.classValue()); data.add(newIns); } return data; }
From source file:mlflex.learners.WekaLearner.java
License:Open Source License
private static Instance GetInstance(Instances wekaInstances, FastVector wekaAttributeVector, Prediction prediction) throws Exception { Instance wekaInstance = new Instance(wekaAttributeVector.size()); wekaInstance.setDataset(wekaInstances); wekaInstance.setValue((Attribute) wekaAttributeVector.elementAt(0), prediction.Prediction); wekaInstance.setValue((Attribute) wekaAttributeVector.elementAt(1), prediction.DependentVariableValue); return wekaInstance; }
From source file:mlflex.WekaInMemoryLearner.java
License:Open Source License
private static Instance GetInstance(Instances wekaInstances, FastVector attVector, DataValues dataInstance, DataInstanceCollection dependentVariableInstances) throws Exception { Instance wekaInstance = new Instance(attVector.size()); wekaInstance.setDataset(wekaInstances); for (int i = 0; i < attVector.size() - 1; i++) { Attribute attribute = (Attribute) attVector.elementAt(i); String dataPointValue = dataInstance.GetDataPointValue(attribute.name()); SetAttributeValue(wekaInstance, attribute, dataPointValue); }//from w ww .j a va 2 s . c o m if (dependentVariableInstances != null) SetAttributeValue(wekaInstance, (Attribute) attVector.elementAt(attVector.size() - 1), dependentVariableInstances.Get(dataInstance.GetID()) .GetDataPointValue(Utilities.ProcessorVault.DependentVariableDataProcessor .GetDependentVariableDataPointName())); return wekaInstance; }
From source file:moa.classifiers.macros.TACNB.java
License:Open Source License
public Instance extendWithOldLabels(Instance instance) { if (this.header == null) { initHeader(instance.dataset());//www. j a v a 2 s .com } int numLabels = this.oldLabels.length; if (numLabels == 0) { return instance; } double[] x = instance.toDoubleArray(); double[] x2 = Arrays.copyOfRange(this.oldLabels, 0, numLabels + x.length); System.arraycopy(x, 0, x2, numLabels, x.length); Instance extendedInstance = new DenseInstance(instance.weight(), x2); extendedInstance.setDataset(this.header); //System.out.println( extendedInstance); return extendedInstance; }
From source file:moa.classifiers.meta.RandomRules.java
License:Open Source License
private Instance transformInstance(Instance inst, int classifierIndex) { if (this.listAttributes == null) { this.numAttributes = (int) (this.numAttributesPercentageOption.getValue() * inst.numAttributes() / 100.0);/*from w w w . java2 s .c om*/ this.listAttributes = new int[this.numAttributes][this.ensemble.length]; this.dataset = new InstancesHeader[this.ensemble.length]; for (int ensembleIndex = 0; ensembleIndex < this.ensemble.length; ensembleIndex++) { for (int attributeIndex = 0; attributeIndex < this.numAttributes; attributeIndex++) { boolean isUnique = false; while (isUnique == false) { this.listAttributes[attributeIndex][ensembleIndex] = this.classifierRandom .nextInt(inst.numAttributes() - 1); isUnique = true; for (int k = 0; k < attributeIndex; k++) { if (this.listAttributes[attributeIndex][ensembleIndex] == this.listAttributes[k][ensembleIndex]) { isUnique = false; break; } } } //this.listAttributes[attributeIndex][ensembleIndex] = attributeIndex; } //Create Header FastVector attributes = new FastVector(); for (int attributeIndex = 0; attributeIndex < this.numAttributes; attributeIndex++) { attributes.addElement(inst.attribute(this.listAttributes[attributeIndex][ensembleIndex])); System.out.print(this.listAttributes[attributeIndex][ensembleIndex]); } System.out.println("Number of attributes: " + this.numAttributes + "," + inst.numAttributes()); attributes.addElement(inst.classAttribute()); this.dataset[ensembleIndex] = new InstancesHeader( new Instances(getCLICreationString(InstanceStream.class), attributes, 0)); this.dataset[ensembleIndex].setClassIndex(this.numAttributes); this.ensemble[ensembleIndex].setModelContext(this.dataset[ensembleIndex]); } } //Instance instance = new DenseInstance(this.numAttributes+1); //instance.setDataset(dataset[classifierIndex]); double[] attVals = new double[this.numAttributes + 1]; for (int attributeIndex = 0; attributeIndex < this.numAttributes; attributeIndex++) { //instance.setValue(attributeIndex, inst.value(this.listAttributes[attributeIndex][classifierIndex])); attVals[attributeIndex] = inst.value(this.listAttributes[attributeIndex][classifierIndex]); } Instance instance = new DenseInstance(1.0, attVals); instance.setDataset(dataset[classifierIndex]); instance.setClassValue(inst.classValue()); // System.out.println(inst.toString()); // System.out.println(instance.toString()); // System.out.println("============"); return instance; }