List of usage examples for weka.core Instance copy
Object copy();
From source file:fantail.algorithms.RankingByPairwiseComparison.java
License:Open Source License
public double[] recommendRanking2(Instance testInst) throws Exception { Instances tempData = new Instances(testInst.dataset(), 0); tempData.add((Instance) testInst.copy()); // remove the relation att tempData.setClassIndex(-1);/*from w ww. jav a 2 s . co m*/ tempData.deleteAttributeAt(tempData.numAttributes() - 1); tempData = Filter.useFilter(tempData, m_Add); tempData.setClassIndex(tempData.numAttributes() - 1); double predRanking[] = new double[m_NumLabels]; for (int i = 0; i < m_Classifiers.size(); i++) { double predIndex = m_Classifiers.get(i).classifyInstance(tempData.instance(0)); double predProb = m_Classifiers.get(i).distributionForInstance(tempData.instance(0))[0]; String algoPair = m_AlgoPairs.get(i); String[] parts = algoPair.split("\\|"); int trueIndex = Integer.parseInt(parts[(int) predIndex]); predRanking[trueIndex] -= predProb; } return Tools.doubleArrayToRanking(predRanking); }
From source file:faster_pca.faster_pca.java
License:Open Source License
/** * Transform an instance in original (unormalized) format. * * @param instance an instance in the original (unormalized) format * @return a transformed instance//from www . ja va2 s . c om * @throws Exception if instance can't be transformed */ protected Instance convertInstance(Instance instance) throws Exception { Instance result; double[] newVals; Instance tempInst; double cumulative; int i; int j; double tempval; int numAttsLowerBound; newVals = new double[m_OutputNumAtts]; tempInst = (Instance) instance.copy(); /*m_ReplaceMissingFilter.input(tempInst); m_ReplaceMissingFilter.batchFinished(); tempInst = m_ReplaceMissingFilter.output();*/ m_NominalToBinaryFilter.input(tempInst); m_NominalToBinaryFilter.batchFinished(); tempInst = m_NominalToBinaryFilter.output(); if (m_AttributeFilter != null) { m_AttributeFilter.input(tempInst); m_AttributeFilter.batchFinished(); tempInst = m_AttributeFilter.output(); } if (!super.getCenterData()) { tempInst = f_norm.filter(tempInst); } else { tempInst = f_center.filter(tempInst); } if (m_HasClass) { newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex()); } if (m_MaxAttributes > 0) { numAttsLowerBound = m_NumAttribs - m_MaxAttributes; } else { numAttsLowerBound = 0; } if (numAttsLowerBound < 0) { numAttsLowerBound = 0; } double tempInstCpy[] = new double[m_NumAttribs]; for (j = 0; j < m_NumAttribs; j++) { tempInstCpy[j] = tempInst.value(j); } cumulative = 0; for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) { tempval = 0.0; for (j = 0; j < m_NumAttribs; j++) { tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInstCpy[j]; } newVals[m_NumAttribs - i - 1] = tempval; cumulative += m_Eigenvalues[m_SortedEigens[i]]; if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance) { break; } } // create instance if (instance instanceof SparseInstance) { result = new SparseInstance(instance.weight(), newVals); } else { result = new DenseInstance(instance.weight(), newVals); } return result; }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Prints the predictions for the given dataset into a String variable. *//*from w w w . ja v a 2 s . c o m*/ protected static String printClassifications(Classifier classifier, Instances train, String testFileName, int classIndex, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); if (testFileName.length() != 0) { BufferedReader testReader = null; try { testReader = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.setDataset(test); double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().isNumeric()) { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.append("missing"); } else { text.append(instance.classValue()); } text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + test.classAttribute().value((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.append("missing "); } else { text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.close(); } return text.toString(); }
From source file:gnusmail.learning.ClassifierManager.java
License:Open Source License
/** * This method is used to evaluate a MOA classifier over a data stream * // ww w . j a va 2 s .co m * @param reader * @param moaClassifier * @return */ public List<Double> evaluatePrequential(DocumentReader reader, String moaClassifier) { ClassificationPerformanceEvaluator evaluator = getEvaluator(); moa.classifiers.Classifier learner = getMoaLearner(moaClassifier); InstancesHeader instancesHeader = new InstancesHeader(filterManager.getDataset()); learner.setModelContext(instancesHeader); List<Double> successes = new ArrayList<Double>(); for (Document doc : reader) { Instance trainInst = doc.toWekaInstance(filterManager); Instance testInst = (Instance) trainInst.copy(); double[] prediction = learner.getVotesForInstance(testInst); successes.add(goodPrediction(prediction, trainInst) ? 1.0 : 0.0); evaluator.addResult(testInst, prediction); learner.trainOnInstance(trainInst); try { } catch (Exception ex) { Logger.getLogger(ClassifierManager.class.getName()).log(Level.SEVERE, null, ex); } } return successes; }
From source file:j48.NBTreeSplit.java
License:Open Source License
/** * Creates split on enumerated attribute. * * @exception Exception if something goes wrong *///from ww w. j a v a 2s. c om private void handleEnumeratedAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instance instance; Instances[] trainingSets = new Instances[m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { trainingSets[i] = new Instances(trainInstances, 0); } /* m_distribution = new Distribution(m_complexityIndex, trainInstances.numClasses()); */ int subset; for (int i = 0; i < trainInstances.numInstances(); i++) { instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset > -1) { trainingSets[subset].add((Instance) instance.copy()); } else { double[] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { try { Instance temp = (Instance) instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } catch (Exception ex) { ex.printStackTrace(); System.err.println("*** " + m_complexityIndex); System.err.println(weights.length); System.exit(1); } } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ /* // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) { // m_distribution.add((int)instance.value(m_attIndex),instance); trainingSets[(int)instances.value(m_attIndex)].add(instance); } else { // add these to the error count m_errors += instance.weight(); } } */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() >= 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { // if fewer than min obj then just count them as errors for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if there are at least five instances in at least two of the subsets // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }
From source file:j48.NBTreeSplit.java
License:Open Source License
/** * Creates split on numeric attribute./*from w w w.j a v a2s. co m*/ * * @exception Exception if something goes wrong */ private void handleNumericAttribute(Instances trainInstances) throws Exception { m_c45S = new C45Split(m_attIndex, 2, m_sumOfWeights); m_c45S.buildClassifier(trainInstances); if (m_c45S.numSubsets() == 0) { return; } m_errors = 0; Instances[] trainingSets = new Instances[m_complexityIndex]; trainingSets[0] = new Instances(trainInstances, 0); trainingSets[1] = new Instances(trainInstances, 0); int subset = -1; // populate the subsets for (int i = 0; i < trainInstances.numInstances(); i++) { Instance instance = trainInstances.instance(i); subset = m_c45S.whichSubset(instance); if (subset != -1) { trainingSets[subset].add((Instance) instance.copy()); } else { double[] weights = m_c45S.weights(instance); for (int j = 0; j < m_complexityIndex; j++) { Instance temp = (Instance) instance.copy(); if (weights.length == m_complexityIndex) { temp.setWeight(temp.weight() * weights[j]); } else { temp.setWeight(temp.weight() / m_complexityIndex); } trainingSets[j].add(temp); } } } /* // compute weights (weights of instances per subset m_weights = new double [m_complexityIndex]; for (int i = 0; i < m_complexityIndex; i++) { m_weights[i] = trainingSets[i].sumOfWeights(); } Utils.normalize(m_weights); */ Random r = new Random(1); int minNumCount = 0; for (int i = 0; i < m_complexityIndex; i++) { if (trainingSets[i].numInstances() > 5) { minNumCount++; // Discretize the sets Discretize disc = new Discretize(); disc.setInputFormat(trainingSets[i]); trainingSets[i] = Filter.useFilter(trainingSets[i], disc); trainingSets[i].randomize(r); trainingSets[i].stratify(5); NaiveBayesUpdateable fullModel = new NaiveBayesUpdateable(); fullModel.buildClassifier(trainingSets[i]); // add the errors for this branch of the split m_errors += NBTreeNoSplit.crossValidate(fullModel, trainingSets[i], r); } else { for (int j = 0; j < trainingSets[i].numInstances(); j++) { m_errors += trainingSets[i].instance(j).weight(); } } } // Check if minimum number of Instances in at least two // subsets. if (minNumCount > 1) { m_numSubsets = m_complexityIndex; } }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * //from w ww .j a va 2 s. c o m * @return */ public Instances getModifiedInstances() { //Copy attribute list (and change categorical by numerical) final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { Attribute attr = instances.attribute(i); if (attr.isNumeric() || attr.index() == instances.classIndex()) { lAttrs.add(attr); } else { Attribute newAttr = new Attribute(attr.name()); lAttrs.add(newAttr); } } //Build new instance final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances()); newInstances.setClassIndex(instances.classIndex()); for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); final Instance cpyInstance = (Instance) instance.copy(); for (int j = 0; j < instance.numAttributes(); j++) { Attribute attribute = instance.attribute(j); int k = 0; if (attribute.index() == instances.classIndex()) { //The class index is nominal cpyInstance.setValue(attribute, instance.stringValue(j)); } else if (!attribute.isNumeric()) { String elt = attribute.value((int) instance.value(j)); cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j))); } else { if (maxNum[k] > 1) { cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]); } k++; } } newInstances.add(cpyInstance); } if (ignoreClass && instances.classIndex() != -1) { newInstances.deleteAttributeAt(instances.classIndex()); } return newInstances; }
From source file:meka.classifiers.multilabel.BRq.java
License:Open Source License
protected Instance[] convertInstance(Instance instance, int c) { Instance FilteredInstances[] = new Instance[c]; //for each 'i' classifiers for (int i = 0; i < c; i++) { //remove all except 'i' FilteredInstances[i] = (Instance) instance.copy(); FilteredInstances[i].setDataset(null); for (int j = 0, offset = 0; j < c; j++) { if (j == i) offset = 1;/*ww w.j av a2s .c o m*/ else FilteredInstances[i].deleteAttributeAt(offset); } FilteredInstances[i].setDataset(m_InstancesTemplate); } return FilteredInstances; }
From source file:meka.classifiers.multilabel.cc.CNode.java
License:Open Source License
/** * Transform - turn [y1,y2,y3,x1,x2] into [y1,y2,x1,x2]. * @return transformed Instance/*from w ww .ja v a 2s . c o m*/ */ public Instance transform(Instance x, double ypred[]) throws Exception { x = (Instance) x.copy(); int L = x.classIndex(); int L_c = (paY.length + 1); x.setDataset(null); for (int j = 0; j < (L - L_c); j++) { x.deleteAttributeAt(0); } for (int pa : paY) { //System.out.println("x_["+map[pa]+"] <- "+ypred[pa]); x.setValue(map[pa], ypred[pa]); } x.setDataset(T); x.setClassMissing(); return x; }
From source file:meka.classifiers.multilabel.incremental.BRUpdateable.java
License:Open Source License
@Override public void updateClassifier(Instance x) throws Exception { int L = x.classIndex(); if (getDebug()) System.out.print("-: Updating " + L + " models"); for (int j = 0; j < L; j++) { Instance x_j = (Instance) x.copy(); x_j.setDataset(null);// w w w. ja va 2 s.c o m x_j = MLUtils.keepAttributesAt(x_j, new int[] { j }, L); x_j.setDataset(m_InstancesTemplates[j]); ((UpdateableClassifier) m_MultiClassifiers[j]).updateClassifier(x_j); } if (getDebug()) System.out.println(":- "); }