List of usage examples for weka.core Instances get
@Override
publicInstance get(int index)
From source file:ANN_Single.SinglelayerPerceptron.java
@Override public void buildClassifier(Instances i) { listOutput = new ArrayList<>(); for (int num = 0; num < i.numClasses(); num++) { listOutput.add(new Node(i.numAttributes())); }/*from w ww . j a v a 2 s .co m*/ while (true) {//ulang iterasi // System.out.println(); // System.out.println("iterasi "+itt); for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) { //buat list input // System.out.print(idxInstance+" "); ArrayList<Double> listInput = new ArrayList<>(); listInput.add(1.0); for (int idx = 0; idx < i.numAttributes() - 1; idx++) { listInput.add(i.get(idxInstance).value(idx)); } //hitung output layer for (int idxOutput = 0; idxOutput < listOutput.size(); idxOutput++) { output(listInput, idxOutput); // listOutput.get(idxOutput).setValue(outputVal); // System.out.print(listOutput.get(idxOutput).getValue()+" "); } // System.out.println(); //hitung error calculateError(idxInstance); //update bobot updateWeight(listInput); } double error = 0; for (int idxErr = 0; idxErr < i.numInstances(); idxErr++) { for (int idx = 0; idx < listOutput.size(); idx++) { error += Math.pow(listOutput.get(idx).getError(), 2) / 2; // System.out.println(listOutput.get(idx).getError()); } // System.out.println(error); } System.out.println(error); System.out.println(); if (error <= 0) break; } fold++; System.out.println("Fold ke-" + fold); double error = 0; for (int idxErr = 0; idxErr < i.numInstances(); idxErr++) { for (Node listOutput1 : listOutput) { error += Math.pow(listOutput1.getError(), 2) / 2; // System.out.println(listOutput1.getError()); } // System.out.println(error); } System.out.println("error " + error); for (int idx = 0; idx < listOutput.size(); idx++) { System.out.println("Output value " + listOutput.get(idx).getValue()); System.out.println("Output error " + listOutput.get(idx).getError()); for (int idx2 = 0; idx2 < listOutput.get(idx).getWeightSize(); idx2++) System.out.println("Output weight" + listOutput.get(idx).getWeightFromList(idx2)); } }
From source file:ANN_single2.MultilayerPerceptron.java
@Override public void buildClassifier(Instances i) { //mengubah class menjadi numeric (diambil indexnya) listDoubleinstance = new double[i.numInstances()]; for (int numIns = 0; numIns < i.numInstances(); numIns++) { listDoubleinstance[numIns] = i.instance(numIns).toDoubleArray()[i.classIndex()]; }//w w w.j a v a 2 s . c om int cnt = 0; for (int itt = 0; itt < 10000; itt++) { for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) { //buat list input ArrayList<Double> listInput = new ArrayList<>(); listInput.add(1.0); //ini untuk bias for (int ins = 0; ins < i.get(idxInstance).numAttributes() - 1; ins++) { listInput.add(i.get(idxInstance).value(ins)); } ArrayList<Double> listHide = new ArrayList<>(); listHide.add(1.0); //Hitung output hidden layer for (int idxHidden = 1; idxHidden < listHidden.size(); idxHidden++) { output(listHidden, listInput, idxHidden); listHide.add(listHidden.get(idxHidden).getValue()); } //Hitung ouput output lyer for (int idxOutput = 0; idxOutput < listOutput.size(); idxOutput++) { output(listOutput, listHide, idxOutput); } //Hitung error calculateError(idxInstance); //update bobot updateBobot(listInput); } //Hitung seluruh error untuk menentukan kapan harus berhenti // double error = 0; // for (int idx =0; idx < i.numInstances(); idx++) { // for (int idxOut=0; idxOut < listOutput.size(); idxOut++) { // error += Math.pow(listOutput.get(idxOut).getError(), 2)/2; // } // } // cnt++; // if (cnt==1000) { // System.out.println("error " + error); // cnt=0; // } // if (error <= threshold) break; } double error = 0; fold++; for (int idx = 0; idx < i.numInstances(); idx++) { for (int idxOut = 0; idxOut < listOutput.size(); idxOut++) { error += Math.pow(listOutput.get(idxOut).getError(), 2) / 2; } } System.out.println("Fold " + fold); System.out.println("error " + error); }
From source file:ANN_single2.SinglelayerPerceptron.java
@Override public void buildClassifier(Instances i) { listOutput = new ArrayList<>(); for (int idx = 0; idx < i.numClasses(); idx++) { listOutput.add(new Node(i.numAttributes())); }/*from w w w.j a v a 2 s. com*/ //mengubah class menjadi numeric (diambil indexnya) listDoubleinstance = new double[i.numInstances()]; for (int numIns = 0; numIns < i.numInstances(); numIns++) { listDoubleinstance[numIns] = i.instance(numIns).toDoubleArray()[i.classIndex()]; } double error = 0; for (int iter = 0; iter < itteration; iter++) { double errorThres = 0; for (int idxInstance = 0; idxInstance < i.numInstances(); idxInstance++) { //buat list input ArrayList<Double> listInput = new ArrayList<>(); listInput.add(1.0); //ini bias for (int idx = 0; idx < i.numAttributes() - 1; idx++) { listInput.add(i.get(idxInstance).value(idx)); } //Hitung output rumus = sigmoid dari sigma for (int idxOut = 0; idxOut < listOutput.size(); idxOut++) { output(listInput, idxOut); } //Hitung error calculateError(idxInstance); //update bobot updateBobot(listInput); } for (int idxOut = 0; idxOut < listOutput.size(); idxOut++) { errorThres += Math.pow(listOutput.get(idxOut).getError(), 2) / 2; } if (errorThres <= threshold) break; // System.out.println(errorThres); } // fold++; // for (int idx =0; idx < i.numInstances(); idx++) { // for (int idxOut=0; idxOut < listOutput.size(); idxOut++) { // error += Math.pow(listOutput.get(idxOut).getError(), 2)/2; // } // } // System.out.println("Fold " + fold); // System.out.println("error " + error); }
From source file:app.RunApp.java
License:Open Source License
/** * Preprocess dataset// ww w . j av a2s . c o m * * @return Positive number if successfull and negative otherwise */ private int preprocess() { trainDatasets = new ArrayList(); testDatasets = new ArrayList(); Instances train, test; if (dataset == null) { JOptionPane.showMessageDialog(null, "You must load a dataset.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } MultiLabelInstances preprocessDataset = dataset.clone(); if (!radioNoIS.isSelected()) { //Do Instance Selection if (radioRandomIS.isSelected()) { int nInstances = Integer.parseInt(textRandomIS.getText()); if (nInstances < 1) { JOptionPane.showMessageDialog(null, "The number of instances must be a positive natural number.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } else if (nInstances > dataset.getNumInstances()) { JOptionPane.showMessageDialog(null, "The number of instances to select must be less than the original.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } Instances dataIS; try { Randomize randomize = new Randomize(); dataIS = dataset.getDataSet(); randomize.setInputFormat(dataIS); dataIS = Filter.useFilter(dataIS, randomize); randomize.batchFinished(); RemoveRange removeRange = new RemoveRange(); removeRange.setInputFormat(dataIS); removeRange.setInstancesIndices((nInstances + 1) + "-last"); dataIS = Filter.useFilter(dataIS, removeRange); removeRange.batchFinished(); preprocessDataset = dataset.reintegrateModifiedDataSet(dataIS); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } if (preprocessDataset == null) { JOptionPane.showMessageDialog(null, "Error when selecting instances.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } preprocessedDataset = preprocessDataset; } } if (!radioNoFS.isSelected()) { //FS_BR if (radioBRFS.isSelected()) { int nFeatures = Integer.parseInt(textBRFS.getText()); if (nFeatures < 1) { JOptionPane.showMessageDialog(null, "The number of features must be a positive natural number.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } else if (nFeatures > dataset.getFeatureIndices().length) { JOptionPane.showMessageDialog(null, "The number of features to select must be less than the original.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } String combination = jComboBoxBRFSComb.getSelectedItem().toString(); String normalization = jComboBoxBRFSNorm.getSelectedItem().toString(); String output = jComboBoxBRFSOut.getSelectedItem().toString(); FeatureSelector fs; if (radioNoIS.isSelected()) { fs = new FeatureSelector(dataset, nFeatures); } else { //If IS have been done fs = new FeatureSelector(preprocessDataset, nFeatures); } preprocessedDataset = fs.select(combination, normalization, output); if (preprocessedDataset == null) { JOptionPane.showMessageDialog(null, "Error when selecting features.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } preprocessDataset = preprocessedDataset; } else if (radioRandomFS.isSelected()) { int nFeatures = Integer.parseInt(textRandomFS.getText()); if (nFeatures < 1) { JOptionPane.showMessageDialog(null, "The number of features must be a positive natural number.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } else if (nFeatures > dataset.getFeatureIndices().length) { JOptionPane.showMessageDialog(null, "The number of features to select must be less than the original.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } FeatureSelector fs; if (radioNoIS.isSelected()) { fs = new FeatureSelector(dataset, nFeatures); } else { //If IS have been done fs = new FeatureSelector(preprocessDataset, nFeatures); } preprocessedDataset = fs.randomSelect(); if (preprocessedDataset == null) { JOptionPane.showMessageDialog(null, "Error when selecting features.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } preprocessDataset = preprocessedDataset; } } if (!radioNoSplit.isSelected()) { //Random Holdout if (radioRandomHoldout.isSelected()) { String split = textRandomHoldout.getText(); double percentage = Double.parseDouble(split); if ((percentage <= 0) || (percentage >= 100)) { JOptionPane.showMessageDialog(null, "The percentage must be a number in the range (0, 100).", "alert", JOptionPane.ERROR_MESSAGE); return -1; } try { RandomTrainTest pre = new RandomTrainTest(); MultiLabelInstances[] partitions = pre.split(preprocessDataset, percentage); trainDataset = partitions[0]; testDataset = partitions[1]; } catch (InvalidDataFormatException ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } } //Random CV else if (radioRandomCV.isSelected()) { String split = textRandomCV.getText(); if (split.equals("")) { JOptionPane.showMessageDialog(null, "You must enter the number of folds.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } int nFolds; try { nFolds = Integer.parseInt(split); } catch (Exception e) { JOptionPane.showMessageDialog(null, "Introduce a correct number of folds.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } if (nFolds < 2) { JOptionPane.showMessageDialog(null, "The number of folds must be greater or equal to 2.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } else if (nFolds > preprocessDataset.getNumInstances()) { JOptionPane.showMessageDialog(null, "The number of folds can not be greater than the number of instances.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } try { MultiLabelInstances temp = preprocessDataset.clone(); Instances dataTemp = temp.getDataSet(); int seed = (int) (Math.random() * 100) + 100; Random rand = new Random(seed); dataTemp.randomize(rand); Instances[] foldsCV = new Instances[nFolds]; for (int i = 0; i < nFolds; i++) { foldsCV[i] = new Instances(dataTemp); foldsCV[i].clear(); } for (int i = 0; i < dataTemp.numInstances(); i++) { foldsCV[i % nFolds].add(dataTemp.get(i)); } train = new Instances(dataTemp); test = new Instances(dataTemp); for (int i = 0; i < nFolds; i++) { train.clear(); test.clear(); for (int j = 0; j < nFolds; j++) { if (i != j) { System.out.println("Add fold " + j + " to train"); train.addAll(foldsCV[j]); } } System.out.println("Add fold " + i + " to test"); test.addAll(foldsCV[i]); System.out.println(train.get(0).toString()); System.out.println(test.get(0).toString()); trainDatasets.add(new MultiLabelInstances(new Instances(train), preprocessDataset.getLabelsMetaData())); testDatasets.add(new MultiLabelInstances(new Instances(test), preprocessDataset.getLabelsMetaData())); System.out.println(trainDatasets.get(i).getDataSet().get(0).toString()); System.out.println(testDatasets.get(i).getDataSet().get(0).toString()); System.out.println("---"); } } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } } //Iterative stratified holdout else if (radioIterativeStratifiedHoldout.isSelected()) { String split = textIterativeStratifiedHoldout.getText(); double percentage = Double.parseDouble(split); if ((percentage <= 0) || (percentage >= 100)) { JOptionPane.showMessageDialog(null, "The percentage must be a number in the range (0, 100).", "alert", JOptionPane.ERROR_MESSAGE); return -1; } try { IterativeTrainTest pre = new IterativeTrainTest(); MultiLabelInstances[] partitions = pre.split(preprocessDataset, percentage); trainDataset = partitions[0]; testDataset = partitions[1]; } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } } //Iterative stratified CV else if (radioIterativeStratifiedCV.isSelected()) { String split = textIterativeStratifiedCV.getText(); if (split.equals("")) { JOptionPane.showMessageDialog(null, "You must enter the number of folds.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } int nFolds = 0; try { nFolds = Integer.parseInt(split); } catch (Exception e) { JOptionPane.showMessageDialog(null, "Introduce a correct number of folds.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } if (nFolds < 2) { JOptionPane.showMessageDialog(null, "The number of folds must be greater or equal to 2.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } else if (nFolds > preprocessDataset.getNumInstances()) { JOptionPane.showMessageDialog(null, "The number of folds can not be greater than the number of instances.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } IterativeStratification strat = new IterativeStratification(); MultiLabelInstances folds[] = strat.stratify(preprocessDataset, nFolds); for (int i = 0; i < nFolds; i++) { try { int trainSize = 0, testSize = 0; for (int j = 0; j < nFolds; j++) { if (i != j) { trainSize += folds[j].getNumInstances(); } } testSize += folds[i].getNumInstances(); train = new Instances(preprocessDataset.getDataSet(), trainSize); test = new Instances(preprocessDataset.getDataSet(), testSize); for (int j = 0; j < nFolds; j++) { if (i != j) { train.addAll(folds[j].getDataSet()); } } test.addAll(folds[i].getDataSet()); trainDatasets.add(new MultiLabelInstances(train, preprocessDataset.getLabelsMetaData())); testDatasets.add(new MultiLabelInstances(test, preprocessDataset.getLabelsMetaData())); } catch (InvalidDataFormatException ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } } } //LP stratified holdout else if (radioLPStratifiedHoldout.isSelected()) { String split = textLPStratifiedHoldout.getText(); double percentage = Double.parseDouble(split); if ((percentage <= 0) || (percentage >= 100)) { JOptionPane.showMessageDialog(null, "The percentage must be a number in the range (0, 100).", "alert", JOptionPane.ERROR_MESSAGE); return -1; } try { IterativeTrainTest pre = new IterativeTrainTest(); MultiLabelInstances[] partitions = pre.split(preprocessDataset, percentage); trainDataset = partitions[0]; testDataset = partitions[1]; } catch (Exception ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } } //LP stratified CV else if (radioLPStratifiedCV.isSelected()) { String split = textLPStratifiedCV.getText(); if (split.equals("")) { JOptionPane.showMessageDialog(null, "You must enter the number of folds.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } int nFolds = 0; try { nFolds = Integer.parseInt(split); } catch (Exception e) { JOptionPane.showMessageDialog(null, "Introduce a correct number of folds.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } if (nFolds < 2) { JOptionPane.showMessageDialog(null, "The number of folds must be greater or equal to 2.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } else if (nFolds > preprocessDataset.getNumInstances()) { JOptionPane.showMessageDialog(null, "The number of folds can not be greater than the number of instances.", "alert", JOptionPane.ERROR_MESSAGE); return -1; } LabelPowersetTrainTest strat = new LabelPowersetTrainTest(); MultiLabelInstances folds[] = strat.stratify(preprocessDataset, nFolds); for (int i = 0; i < nFolds; i++) { try { train = new Instances(preprocessDataset.getDataSet(), 0); test = new Instances(preprocessDataset.getDataSet(), 0); for (int j = 0; j < nFolds; j++) { if (i != j) { train.addAll(folds[j].getDataSet()); } } test.addAll(folds[i].getDataSet()); trainDatasets.add(new MultiLabelInstances(train, preprocessDataset.getLabelsMetaData())); testDatasets.add(new MultiLabelInstances(test, preprocessDataset.getLabelsMetaData())); } catch (InvalidDataFormatException ex) { Logger.getLogger(RunApp.class.getName()).log(Level.SEVERE, null, ex); } } } } jButtonSaveDatasets.setEnabled(true); jComboBoxSaveFormat.setEnabled(true); return 1; }
From source file:asap.PostProcess.java
private void writePredictionErrors(Instances instances, double[] predictions, String errorsFilename) { TreeSet<PredictionError> errors = new TreeSet<>(); for (int i = 0; i < predictions.length; i++) { double prediction = predictions[i]; double expected = instances.get(i).classValue(); int pairId = (int) instances.get(i).value(instances.attribute("pair_ID")); String sourceFile = instances.get(i).stringValue(instances.attribute("source_file")); PredictionError pe = new PredictionError(prediction, expected, pairId, sourceFile, instances.get(i)); //if (pe.getError()>=0.5d) errors.add(pe);/*from w w w . jav a 2 s . c o m*/ } StringBuilder sb = new StringBuilder(); for (PredictionError error : errors) { sb.append(error.toString()).append("\n"); } File f = new File(errorsFilename); try (FileOutputStream fos = new FileOutputStream(f)) { fos.write(sb.toString().getBytes()); } catch (IOException ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:be.uza.keratoconus.analysis.impl.PreTrainedModel.java
License:Open Source License
@Override public void processPatientExam(PatientExam exam) { examData = exam.getExamData();//w ww .j a v a2 s . c o m String headerLine = ""; String dataLine = ""; int nColumns = 0; for (String fieldName : classificationModelService.getUsedFields()) { if (examData.containsKey(fieldName)) { headerLine += fieldName + SEMICOLON; final String fieldValue = examData.get(fieldName); // TODO fatal error if fieldValue is null? dataLine += fieldValue + SEMICOLON; ++nColumns; } else if (examData.containsKey(fieldName + " " + Face.FRONT)) { headerLine += fieldName + " " + Face.FRONT + SEMICOLON; final String frontFieldValue = examData.get(fieldName + " " + Face.FRONT); // TODO fatal error if fieldValue is null? dataLine += frontFieldValue + SEMICOLON; ++nColumns; headerLine += fieldName + " " + Face.BACK + SEMICOLON; final String backFieldValue = examData.get(fieldName + " " + Face.BACK); // TODO fatal error if fieldValue is null? dataLine += backFieldValue + SEMICOLON; ++nColumns; } } String csv = headerLine + "Class\n" + dataLine + "?\n"; CSVLoader csvLoader = new CSVLoader(); csvLoader.setFieldSeparator(SEMICOLON); try { csvLoader.setSource(new ByteArrayInputStream(csv.getBytes(Charset.forName("windows-1252")))); final Instances dataSet = csvLoader.getDataSet(); dataSet.setClassIndex(nColumns); instance = dataSet.get(0); } catch (Exception e) { logService.log(ownComponentContext.getServiceReference(), LogService.LOG_WARNING, "Exception thrown when reading CSV record", e); } }
From source file:br.com.edu.arff.LoadArff.java
public ArrayList<Cluster> carregarArff(String caminho) throws FileNotFoundException, IOException { BufferedReader reader = new BufferedReader(new FileReader(caminho)); ArffReader arff = new ArffReader(reader); Instances data = arff.getData(); data.setClassIndex(data.numAttributes() - 1); Instance inst = null;/*w ww . j a va 2 s . c om*/ Attribute att = data.attribute("Cluster"); ArrayList<String> uris; ArrayList<Cluster> lista = new ArrayList<Cluster>(); Fuseki fuseki = new Fuseki(); uris = fuseki.buscaURIS(); for (int i = 0; i <= data.numInstances() - 1; i++) { Cluster cluster = new Cluster(); String clusters = String.valueOf(data.get(i).stringValue(att)); cluster.setUri(uris.get(i)); cluster.setGrupo(clusters); lista.add(cluster); } // for (Cluster c : lista) { // System.out.println(c.getUri()); // System.out.println(c.getGrupo()); // } return lista; }
From source file:clusterer.SimpleKMeansWithSilhouette.java
License:Open Source License
/** * Generates a clusterer. Has to initialize all fields of the clusterer that * are not being set via options.//w w w . j a v a 2 s. c o m * * @param data set of instances serving as training data * @throws Exception if the clusterer has not been generated successfully */ @Override public void buildClusterer(Instances data) throws Exception { m_canopyClusters = null; // can clusterer handle the data? getCapabilities().testWithFail(data); m_Iterations = 0; m_ReplaceMissingFilter = new ReplaceMissingValues(); Instances instances = new Instances(data); instances.setClassIndex(-1); if (!m_dontReplaceMissing) { m_ReplaceMissingFilter.setInputFormat(instances); instances = Filter.useFilter(instances, m_ReplaceMissingFilter); } m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][]; m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()]; if (m_displayStdDevs) { m_FullStdDevs = instances.variances(); } m_FullMeansOrMediansOrModes = moveCentroid(0, instances, true, false); m_FullMissingCounts = m_ClusterMissingCounts[0]; m_FullNominalCounts = m_ClusterNominalCounts[0]; double sumOfWeights = instances.sumOfWeights(); for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) { if (m_displayStdDevs) { m_FullStdDevs[i] = Math.sqrt(m_FullStdDevs[i]); } if (m_FullMissingCounts[i] == sumOfWeights) { m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean } } else { if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) { m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common // value } } } m_ClusterCentroids = new Instances(instances, m_NumClusters); int[] clusterAssignments = new int[instances.numInstances()]; if (m_PreserveOrder) { m_Assignments = clusterAssignments; } m_DistanceFunction.setInstances(instances); Random RandomO = new Random(getSeed()); int instIndex; HashMap<DecisionTableHashKey, Integer> initC = new HashMap<DecisionTableHashKey, Integer>(); DecisionTableHashKey hk = null; Instances initInstances = null; if (m_PreserveOrder) { initInstances = new Instances(instances); } else { initInstances = instances; } if (m_speedUpDistanceCompWithCanopies) { m_canopyClusters = new Canopy(); m_canopyClusters.setNumClusters(m_NumClusters); m_canopyClusters.setSeed(getSeed()); m_canopyClusters.setT2(getCanopyT2()); m_canopyClusters.setT1(getCanopyT1()); m_canopyClusters.setMaxNumCandidateCanopiesToHoldInMemory(getCanopyMaxNumCanopiesToHoldInMemory()); m_canopyClusters.setPeriodicPruningRate(getCanopyPeriodicPruningRate()); m_canopyClusters.setMinimumCanopyDensity(getCanopyMinimumCanopyDensity()); m_canopyClusters.setDebug(getDebug()); m_canopyClusters.buildClusterer(initInstances); // System.err.println(m_canopyClusters); m_centroidCanopyAssignments = new ArrayList<long[]>(); m_dataPointCanopyAssignments = new ArrayList<long[]>(); } if (m_initializationMethod == KMEANS_PLUS_PLUS) { kMeansPlusPlusInit(initInstances); m_initialStartPoints = new Instances(m_ClusterCentroids); } else if (m_initializationMethod == CANOPY) { canopyInit(initInstances); m_initialStartPoints = new Instances(m_canopyClusters.getCanopies()); } else if (m_initializationMethod == FARTHEST_FIRST) { farthestFirstInit(initInstances); m_initialStartPoints = new Instances(m_ClusterCentroids); } else { // random for (int j = initInstances.numInstances() - 1; j >= 0; j--) { instIndex = RandomO.nextInt(j + 1); hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(), true); if (!initC.containsKey(hk)) { m_ClusterCentroids.add(initInstances.instance(instIndex)); initC.put(hk, null); } initInstances.swap(j, instIndex); if (m_ClusterCentroids.numInstances() == m_NumClusters) { break; } } m_initialStartPoints = new Instances(m_ClusterCentroids); } if (m_speedUpDistanceCompWithCanopies) { // assign canopies to training data for (int i = 0; i < instances.numInstances(); i++) { m_dataPointCanopyAssignments.add(m_canopyClusters.assignCanopies(instances.instance(i))); } } m_NumClusters = m_ClusterCentroids.numInstances(); // removing reference initInstances = null; int i; boolean converged = false; int emptyClusterCount; Instances[] tempI = new Instances[m_NumClusters]; m_squaredErrors = new double[m_NumClusters]; m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0]; m_ClusterMissingCounts = new double[m_NumClusters][instances.numAttributes()]; startExecutorPool(); while (!converged) { if (m_speedUpDistanceCompWithCanopies) { // re-assign canopies to the current cluster centers m_centroidCanopyAssignments.clear(); for (int kk = 0; kk < m_ClusterCentroids.numInstances(); kk++) { m_centroidCanopyAssignments .add(m_canopyClusters.assignCanopies(m_ClusterCentroids.instance(kk))); } } emptyClusterCount = 0; m_Iterations++; converged = true; if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < instances.numInstances(); i++) { Instance toCluster = instances.instance(i); int newC = clusterProcessedInstance(toCluster, false, true, m_speedUpDistanceCompWithCanopies ? m_dataPointCanopyAssignments.get(i) : null); if (newC != clusterAssignments[i]) { converged = false; } clusterAssignments[i] = newC; } } else { converged = launchAssignToClusters(instances, clusterAssignments); } // update centroids m_ClusterCentroids = new Instances(instances, m_NumClusters); for (i = 0; i < m_NumClusters; i++) { tempI[i] = new Instances(instances, 0); } for (i = 0; i < instances.numInstances(); i++) { tempI[clusterAssignments[i]].add(instances.instance(i)); } if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) { for (i = 0; i < m_NumClusters; i++) { if (tempI[i].numInstances() == 0) { // empty cluster emptyClusterCount++; } else { moveCentroid(i, tempI[i], true, true); } } } else { emptyClusterCount = launchMoveCentroids(tempI); } if (m_Iterations == m_MaxIterations) { converged = true; } if (emptyClusterCount > 0) { m_NumClusters -= emptyClusterCount; if (converged) { Instances[] t = new Instances[m_NumClusters]; int index = 0; for (int k = 0; k < tempI.length; k++) { if (tempI[k].numInstances() > 0) { t[index] = tempI[k]; for (i = 0; i < tempI[k].numAttributes(); i++) { m_ClusterNominalCounts[index][i] = m_ClusterNominalCounts[k][i]; } index++; } } tempI = t; } else { tempI = new Instances[m_NumClusters]; } } if (!converged) { m_ClusterNominalCounts = new double[m_NumClusters][instances.numAttributes()][0]; } } // calculate errors if (!m_FastDistanceCalc) { for (i = 0; i < instances.numInstances(); i++) { clusterProcessedInstance(instances.instance(i), true, false, null); } } if (m_displayStdDevs) { m_ClusterStdDevs = new Instances(instances, m_NumClusters); } m_ClusterSizes = new double[m_NumClusters]; for (i = 0; i < m_NumClusters; i++) { if (m_displayStdDevs) { double[] vals2 = tempI[i].variances(); for (int j = 0; j < instances.numAttributes(); j++) { if (instances.attribute(j).isNumeric()) { vals2[j] = Math.sqrt(vals2[j]); } else { vals2[j] = Utils.missingValue(); } } m_ClusterStdDevs.add(new DenseInstance(1.0, vals2)); } m_ClusterSizes[i] = tempI[i].sumOfWeights(); } m_executorPool.shutdown(); // save memory! m_DistanceFunction.clean(); // Calculate Silhouette Coefficient SilCoeff = new double[instances.numInstances()]; AvgSilCoeff = 0; for (int z = 0; z < instances.numInstances(); z++) { double[] distance = new double[m_NumClusters]; Arrays.fill(distance, 0.0); //Sum for (int y = 0; y < instances.numInstances(); y++) { distance[clusterAssignments[y]] += m_DistanceFunction.distance(instances.get(z), instances.get(y)); } //Average for (int x = 0; x < m_NumClusters; x++) { distance[x] = distance[x] / m_ClusterSizes[x]; } double a = distance[clusterAssignments[z]]; distance[clusterAssignments[z]] = Double.MAX_VALUE; Arrays.sort(distance); double b = distance[0]; SilCoeff[z] = (b - a) / Math.max(a, b); AvgSilCoeff += SilCoeff[z]; } AvgSilCoeff = AvgSilCoeff / instances.numInstances(); //System.out.println("AvgSilCoeff: " + AvgSilCoeff); }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static Instance findBestPerf(Instances data) { int idx = data.numAttributes() - 1; double bestPerf = data.attributeStats(idx).numericStats.max; for (int i = 0; i < data.numInstances(); i++) if (data.get(i).value(idx) == bestPerf) return data.get(i); return null;//should never return NULL }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static int findBestPerfIndex(Instances data) { int idx = data.numAttributes() - 1; double bestPerf = data.attributeStats(idx).numericStats.max; for (int i = 0; i < data.numInstances(); i++) if (data.get(i).value(idx) == bestPerf) return i; return -1;//should never return -1 }