List of usage examples for weka.core Instances deleteAttributeAt
public void deleteAttributeAt(int position)
From source file:org.opentox.jaqpot3.qsar.trainer.FastRbfNnTrainer.java
License:Open Source License
@Override public Model train(Instances training) throws JaqpotException { /*/*ww w . j a v a 2 s .c o m*/ * For this algorithm we need to remove all string and nominal attributes * and additionally we will remove the target attribute too. */ Instances cleanedTraining = training; Attribute targetAttribute = cleanedTraining.attribute(targetUri.toString()); if (targetAttribute == null) { throw new JaqpotException("The prediction feature you provided was not found in the dataset. " + "Prediction Feature provided by the client: " + targetUri.toString()); } else { if (!targetAttribute.isNumeric()) { throw new JaqpotException("The prediction feature you provided is not numeric."); } } double[] targetValues = new double[cleanedTraining.numInstances()]; for (int i = 0; i < cleanedTraining.numInstances(); i++) { targetValues[i] = cleanedTraining.instance(i).value(targetAttribute); } cleanedTraining.deleteAttributeAt(targetAttribute.index()); Instances rbfNnNodes = new Instances(cleanedTraining); rbfNnNodes.delete(); double[] potential = calculatePotential(cleanedTraining); int L = 1; int i_star = locationOfMax(potential); double potential_star = potential[i_star]; double potential_star_1 = potential_star; do { rbfNnNodes.add(cleanedTraining.instance(i_star)); potential = updatePotential(potential, i_star, cleanedTraining); i_star = locationOfMax(potential); double diff = potential[i_star] - e * potential_star_1; if (Double.isNaN(diff)) { throw new JaqpotException("Not converging"); } if (potential[i_star] <= e * potential_star_1) { break; } else { L = L + 1; potential_star = potential[i_star]; } } while (true); /* P-nearest neighbors */ double[] pNn = null; double[] sigma = new double[rbfNnNodes.numInstances()]; double s = 0; for (int i = 0; i < rbfNnNodes.numInstances(); i++) { pNn = new double[cleanedTraining.numInstances()]; s = 0; for (int j = 0; j < cleanedTraining.numInstances(); j++) { if (j != i) { pNn[j] = squaredNormDifference(rbfNnNodes.instance(i), cleanedTraining.instance(j)); } else { pNn[j] = 0; } } int[] minPoints = locationOfpMinimum(p, pNn); // indices refer to 'cleanedTraining' for (int q : minPoints) { s += squaredNormDifference(rbfNnNodes.instance(i), cleanedTraining.instance(q)); } sigma[i] = Math.sqrt(s / p); } /* Caclulate the matrix X = (l_{i,j})_{i,j} */ double[][] X = new double[cleanedTraining.numInstances()][rbfNnNodes.numInstances()]; for (int i = 0; i < cleanedTraining.numInstances(); i++) { //for DoA for (int j = 0; j < rbfNnNodes.numInstances(); j++) { X[i][j] = rbf(sigma[j], cleanedTraining.instance(i), rbfNnNodes.instance(j)); } } Jama.Matrix X_matr = new Matrix(X); Jama.Matrix Y_matr = new Matrix(targetValues, targetValues.length); Jama.Matrix coeffs = (X_matr.transpose().times(X_matr)).inverse().times(X_matr.transpose()).times(Y_matr); FastRbfNnModel actualModel = new FastRbfNnModel(); actualModel.setAlpha(a); actualModel.setBeta(b); actualModel.setEpsilon(e); actualModel.setNodes(rbfNnNodes); actualModel.setSigma(sigma); actualModel.setLrCoefficients(coeffs.getColumnPackedCopy()); Model m = new Model(Configuration.getBaseUri().augment("model", getUuid().toString())); m.setAlgorithm(getAlgorithm()); m.setCreatedBy(getTask().getCreatedBy()); m.setDataset(datasetUri); m.addDependentFeatures(dependentFeature); Feature predictedFeature = publishFeature(m, dependentFeature.getUnits(), "Created as prediction feature for the RBF NN model " + m.getUri(), datasetUri, featureService); m.addPredictedFeatures(predictedFeature); m.setIndependentFeatures(independentFeatures); try { m.setActualModel(new ActualModel(actualModel)); } catch (NotSerializableException ex) { logger.error("The provided instance of model cannot be serialized! Critical Error!", ex); } m.setParameters(new HashSet<Parameter>()); Parameter<Double> aParam = new Parameter("a", new LiteralValue<Double>(a)) .setScope(Parameter.ParameterScope.OPTIONAL); aParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong())); Parameter<Double> bParam = new Parameter("b", new LiteralValue<Double>(b)) .setScope(Parameter.ParameterScope.OPTIONAL); bParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong())); Parameter<Double> eParam = new Parameter("e", new LiteralValue<Double>(e)) .setScope(Parameter.ParameterScope.OPTIONAL); eParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong())); m.getParameters().add(aParam); m.getParameters().add(bParam); m.getParameters().add(eParam); //save the instances being predicted to abstract trainer and set the features to be excluded for calculating DoA predictedInstances = training; excludeAttributesDoA.add(dependentFeature.getUri().toString()); return m; }
From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.MissingValueHandler.java
License:Apache License
/** * Method to replace the identified missing values. * * @throws Exception the exception//from ww w.j ava2s . com */ public void replaceMissingValues() throws Exception { this.confirmationMessage = new ArrayList<String>(); Instances outputData; String inputFile = BASE_DIR + "OriginalDataSet.csv"; // load CSV file CSVLoader fileLoader = new CSVLoader(); fileLoader.setSource(new File(inputFile)); outputData = fileLoader.getDataSet(); int numInstances = outputData.numInstances(); int numAttributes = outputData.numAttributes(); final int NON_NUMERIC = -1; int[] m_AttributeIndices = null; Range m_Attributes = new Range("first-last"); // attributes must be numeric m_Attributes.setUpper(outputData.numAttributes() - 1); m_AttributeIndices = m_Attributes.getSelection(); for (int i = 0; i < m_AttributeIndices.length; i++) { // ignore class if (m_AttributeIndices[i] == outputData.classIndex()) { m_AttributeIndices[i] = NON_NUMERIC; continue; } // not numeric -> ignore it if (!outputData.attribute(m_AttributeIndices[i]).isNumeric()) m_AttributeIndices[i] = NON_NUMERIC; } double sum; int missingCounter; double attributeMean; // identify the missing values for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) { // non-numeric attribute? if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) { continue; } double tempArr[] = outputData.attributeToDoubleArray(attributeIndex); sum = 0; missingCounter = 0; for (int i = 0; i < tempArr.length; i++) { sum = sum + tempArr[i]; if (tempArr[i] == 0) missingCounter++; } attributeMean = sum / (numInstances - missingCounter); for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) { // replace the missing values with attribute mean values if (outputData.instance(instanceIndex).value(attributeIndex) == 0) { outputData.instance(instanceIndex).setValue(attributeIndex, attributeMean); } } } outputData.deleteAttributeAt(outputData.numAttributes() - 1); outputData.deleteAttributeAt(outputData.numAttributes() - 1); saveFilledData(inputFile, outputData); }
From source file:sirius.trainer.features.subsetselection.GreedyForwardSubsetSearch.java
License:Open Source License
@Override public List<Feature> selectSubset(List<Feature> wholeList, String outputFileLocation, ApplicationData appData) throws Exception { //Based on the wholeList, starts with a empty list and add one Feature at a time for the wholeList //If the MCC value is increased then keep the Feature else remove it //Train on Dataset1 and Test on Dataset3 in appData String classifierName = "weka.classifiers.meta.AttributeSelectedClassifier"; String[] classifierOptions = new String[13]; BufferedWriter output = new BufferedWriter(new FileWriter(outputFileLocation)); List<Feature> selectedList = new ArrayList<Feature>(); HashSet<Integer> selectedIndex = new HashSet<Integer>(); double lastMCC = 0.0; FeatureTableModel currentFeatureTableModel = new FeatureTableModel(false); for (int x = 0; x < wholeList.size(); x++) { currentFeatureTableModel.add(wholeList.get(x)); }//from www . j a v a 2 s . com appData.setStep2FeatureTableModel(currentFeatureTableModel); new GenerateFeatures(null, appData, null, null, null, null); //Wait till GenerateFeatures is finished while (appData.getOneThread() != null) { try { Thread.sleep(1000); } catch (Exception e) { } } for (int x = 0; x < wholeList.size(); x++) { Instances currentInstances = new Instances(new BufferedReader( new FileReader(appData.getWorkingDirectory() + File.separator + "Dataset1.arff"))); for (int y = wholeList.size() - 1; y >= 0; y--) { if (selectedIndex.contains(y) == false && y != x) currentInstances.deleteAttributeAt(y); } appData.setDataset1Instances(currentInstances); classifierOptions[0] = "-E"; classifierOptions[1] = "weka.attributeSelection.GainRatioAttributeEval"; classifierOptions[2] = "-S"; classifierOptions[3] = "weka.attributeSelection.Ranker -T 0.0 -N -1"; classifierOptions[4] = "-W"; classifierOptions[5] = "weka.classifiers.trees.RandomForest"; classifierOptions[6] = "--"; classifierOptions[7] = "-I"; classifierOptions[8] = "1000";//10, 100, 1000 classifierOptions[9] = "-K"; classifierOptions[10] = "0"; classifierOptions[11] = "-S"; classifierOptions[12] = "1"; double MCC1000 = ((PredictionStats) RunClassifierWithNoLocationIndex .startClassifierOneWithNoLocationIndex(null, appData, null, null, true, null, 0, 0.5, classifierName, classifierOptions, false, null, new Random().nextInt())).getMaxMCC(); while (appData.getOneThread() != null) { try { Thread.sleep(1000); } catch (Exception e) { } } output.write("" + MCC1000); output.newLine(); output.flush(); if (MCC1000 > lastMCC) { selectedList.add(wholeList.get(x)); selectedIndex.add(x); lastMCC = MCC1000; } } output.close(); System.out.println(lastMCC); return selectedList; }
From source file:tr.gov.ulakbim.jDenetX.streams.generators.multilabel.MetaMultilabelGenerator.java
License:Open Source License
/** * GenerateMultilabelHeader.//from w w w.ja v a 2s .c om */ protected MultilabelInstancesHeader generateMultilabelHeader(Instances si) { Instances mi = new Instances(si, 0, 0); mi.setClassIndex(-1); mi.deleteAttributeAt(mi.numAttributes() - 1); FastVector bfv = new FastVector(); bfv.addElement("0"); bfv.addElement("1"); for (int i = 0; i < this.m_N; i++) { mi.insertAttributeAt(new Attribute("class" + i, bfv), i); } this.multilabelStreamTemplate = mi; this.multilabelStreamTemplate.setRelationName("SYN_Z" + this.labelCardinalityOption.getValue() + "L" + this.m_N + "X" + m_A + "S" + metaRandomSeedOption.getValue() + ": -C " + this.m_N); this.multilabelStreamTemplate.setClassIndex(this.m_N); return new MultilabelInstancesHeader(multilabelStreamTemplate, m_N); }
From source file:tubes1.myClassifiers.myC45.java
public TreeNode C45Node(Instances i, double parentGain) { TreeNode treeNode = new TreeNode(); int[] count = calculateCount(i); for (int j = 0; j < count.length; j++) { int c = count[j]; if (c == i.numInstances()) { treeNode.label = j;/*from ww w .j a v a2 s . co m*/ return treeNode; } } if (i.numAttributes() <= 1) { int maxc = -1; int maxcj = -1; for (int j = 0; j < count.length; j++) { if (count[j] > maxc) { maxc = count[j]; maxcj = j; } } treeNode.label = maxcj; return treeNode; } Attribute bestA = null; double bestAIG = -1; double entropyOfSet = entropy(i); for (int j = 0; j < i.numAttributes(); j++) { Attribute a = i.attribute(j); if (a != i.classAttribute()) { double aIG = infoGain(i, a, entropyOfSet); if (aIG > bestAIG) { bestAIG = aIG; bestA = a; } } } double childGainRatio = gainRatio(bestAIG, entropyOfSet); treeNode.decision = bestA; if (childGainRatio > parentGain) { Instances[] subSets = splitData(i, bestA); for (Instances subSet : subSets) { if (subSet.numInstances() > 0) { double attributeValue = subSet.firstInstance().value(bestA); subSet.deleteAttributeAt(bestA.index()); TreeNode newBranch = C45Node(subSet, childGainRatio); newBranch.branchValue = attributeValue; treeNode.addBranch(newBranch); } } } else { TreeNode newBranch = new TreeNode(); newBranch.label = vote(i, bestA); newBranch.branchValue = treeNode.branchValue; treeNode.addBranch(newBranch); } return treeNode; }
From source file:tubes1.myClassifiers.myID3.java
public TreeNode id3Node(Instances i) { TreeNode treeNode = new TreeNode(); int[] count = calculateCount(i); for (int j = 0; j < count.length; j++) { int c = count[j]; if (c == i.numInstances()) { treeNode.label = j;/* ww w.ja va 2 s .c o m*/ return treeNode; } } if (i.numAttributes() <= 1) { int maxc = -1; int maxcj = -1; for (int j = 0; j < count.length; j++) { if (count[j] > maxc) { maxc = count[j]; maxcj = j; } } treeNode.label = maxcj; return treeNode; } Attribute bestA = null; double bestAIG = -1; double entropyOfSet = entropy(i); for (int j = 0; j < i.numAttributes(); j++) { Attribute a = i.attribute(j); if (a != i.classAttribute()) { double aIG = infoGain(i, a, entropyOfSet); if (aIG > bestAIG) { bestAIG = aIG; bestA = a; } } } treeNode.decision = bestA; Instances[] subSets = splitData(i, bestA); for (Instances subSet : subSets) { if (subSet.numInstances() > 0) { double attributeValue = subSet.firstInstance().value(bestA); subSet.deleteAttributeAt(bestA.index()); TreeNode newBranch = id3Node(subSet); newBranch.branchValue = attributeValue; treeNode.addBranch(newBranch); } else { } } return treeNode; }