List of usage examples for weka.core Instances classAttribute
publicAttribute classAttribute()
From source file:adams.flow.transformer.WekaAttributeSelection.java
License:Open Source License
/** * Executes the flow item./* ww w. ja v a2 s. com*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Instances reduced; Instances transformed; AttributeSelection eval; boolean crossValidate; int fold; Instances train; WekaAttributeSelectionContainer cont; SpreadSheet stats; int i; Row row; int[] selected; double[][] ranked; Range range; String rangeStr; boolean useReduced; result = null; try { if (m_InputToken.getPayload() instanceof Instances) data = (Instances) m_InputToken.getPayload(); else data = (Instances) ((WekaTrainTestSetContainer) m_InputToken.getPayload()) .getValue(WekaTrainTestSetContainer.VALUE_TRAIN); if (result == null) { crossValidate = (m_Folds >= 2); // setup evaluation eval = new AttributeSelection(); eval.setEvaluator(m_Evaluator); eval.setSearch(m_Search); eval.setFolds(m_Folds); eval.setSeed((int) m_Seed); eval.setXval(crossValidate); // select attributes if (crossValidate) { Random random = new Random(m_Seed); data = new Instances(data); data.randomize(random); if ((data.classIndex() > -1) && data.classAttribute().isNominal()) { if (isLoggingEnabled()) getLogger().info("Stratifying instances..."); data.stratify(m_Folds); } for (fold = 0; fold < m_Folds; fold++) { if (isLoggingEnabled()) getLogger().info("Creating splits for fold " + (fold + 1) + "..."); train = data.trainCV(m_Folds, fold, random); if (isLoggingEnabled()) getLogger().info("Selecting attributes using all but fold " + (fold + 1) + "..."); eval.selectAttributesCVSplit(train); } } else { eval.SelectAttributes(data); } // generate reduced/transformed dataset reduced = null; transformed = null; if (!crossValidate) { reduced = eval.reduceDimensionality(data); if (m_Evaluator instanceof AttributeTransformer) transformed = ((AttributeTransformer) m_Evaluator).transformedData(data); } // generated stats stats = null; if (!crossValidate) { stats = new DefaultSpreadSheet(); row = stats.getHeaderRow(); useReduced = false; if (m_Search instanceof RankedOutputSearch) { i = reduced.numAttributes(); if (reduced.classIndex() > -1) i--; ranked = eval.rankedAttributes(); useReduced = (ranked.length == i); } if (useReduced) { for (i = 0; i < reduced.numAttributes(); i++) row.addCell("" + i).setContent(reduced.attribute(i).name()); row = stats.addRow(); for (i = 0; i < reduced.numAttributes(); i++) row.addCell(i).setContent(0.0); } else { for (i = 0; i < data.numAttributes(); i++) row.addCell("" + i).setContent(data.attribute(i).name()); row = stats.addRow(); for (i = 0; i < data.numAttributes(); i++) row.addCell(i).setContent(0.0); } if (m_Search instanceof RankedOutputSearch) { ranked = eval.rankedAttributes(); for (i = 0; i < ranked.length; i++) row.getCell((int) ranked[i][0]).setContent(ranked[i][1]); } else { selected = eval.selectedAttributes(); for (i = 0; i < selected.length; i++) row.getCell(selected[i]).setContent(1.0); } } // selected attributes rangeStr = null; if (!crossValidate) { range = new Range(); range.setIndices(eval.selectedAttributes()); rangeStr = range.getRange(); } // setup container if (crossValidate) cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, m_Seed, m_Folds); else cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, stats, rangeStr); m_OutputToken = new Token(cont); } } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to process data:", e); } return result; }
From source file:adams.flow.transformer.WekaBootstrapping.java
License:Open Source License
/** * Executes the flow item./*from w w w. ja v a 2s.c om*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Row row; Evaluation evalAll; Evaluation eval; WekaEvaluationContainer cont; TIntList indices; Random random; int i; int iteration; int size; List<Prediction> preds; Instances header; Instances data; ArrayList<Attribute> atts; Instance inst; boolean numeric; int classIndex; Double[] errors; Double[] errorsRev; Percentile<Double> perc; Percentile<Double> percRev; TIntList subset; result = null; if (m_InputToken.getPayload() instanceof Evaluation) { evalAll = (Evaluation) m_InputToken.getPayload(); } else { cont = (WekaEvaluationContainer) m_InputToken.getPayload(); evalAll = (Evaluation) cont.getValue(WekaEvaluationContainer.VALUE_EVALUATION); } if ((evalAll.predictions() == null) || (evalAll.predictions().size() == 0)) result = "No predictions available!"; if (result == null) { // init spreadsheet sheet = new DefaultSpreadSheet(); row = sheet.getHeaderRow(); row.addCell("S").setContentAsString("Subsample"); for (EvaluationStatistic s : m_StatisticValues) row.addCell(s.toString()).setContentAsString(s.toString()); for (i = 0; i < m_Percentiles.length; i++) { switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContentAsString("Percentile-Abs-" + m_Percentiles[i]); break; case BOTH: row.addCell("perc-AmP-" + i).setContentAsString("Percentile-AmP-" + m_Percentiles[i]); row.addCell("perc-PmA-" + i).setContentAsString("Percentile-PmA-" + m_Percentiles[i]); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // set up bootstrapping preds = evalAll.predictions(); random = new Random(m_Seed); indices = new TIntArrayList(); size = (int) Math.round(preds.size() * m_Percentage); header = evalAll.getHeader(); numeric = header.classAttribute().isNumeric(); m_ClassIndex.setData(header.classAttribute()); if (numeric) classIndex = -1; else classIndex = m_ClassIndex.getIntIndex(); for (i = 0; i < preds.size(); i++) indices.add(i); // create fake evalutions subset = new TIntArrayList(); for (iteration = 0; iteration < m_NumSubSamples; iteration++) { if (isStopped()) { sheet = null; break; } // determine subset.clear(); if (m_WithReplacement) { for (i = 0; i < size; i++) subset.add(indices.get(random.nextInt(preds.size()))); } else { indices.shuffle(random); for (i = 0; i < size; i++) subset.add(indices.get(i)); } // create dataset from predictions errors = new Double[size]; errorsRev = new Double[size]; atts = new ArrayList<>(); atts.add(header.classAttribute().copy("Actual")); data = new Instances(header.relationName() + "-" + (iteration + 1), atts, size); data.setClassIndex(0); for (i = 0; i < subset.size(); i++) { inst = new DenseInstance(preds.get(subset.get(i)).weight(), new double[] { preds.get(subset.get(i)).actual() }); data.add(inst); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); break; case PREDICTED_MINUS_ACTUAL: errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; case ABSOLUTE: errors[i] = Math .abs(preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted()); break; case BOTH: errors[i] = preds.get(subset.get(i)).actual() - preds.get(subset.get(i)).predicted(); errorsRev[i] = preds.get(subset.get(i)).predicted() - preds.get(subset.get(i)).actual(); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } // perform "fake" evaluation try { eval = new Evaluation(data); for (i = 0; i < subset.size(); i++) { if (numeric) eval.evaluateModelOnceAndRecordPrediction( new double[] { preds.get(subset.get(i)).predicted() }, data.instance(i)); else eval.evaluateModelOnceAndRecordPrediction( ((NominalPrediction) preds.get(subset.get(i))).distribution().clone(), data.instance(i)); } } catch (Exception e) { result = handleException( "Failed to create 'fake' Evaluation object (iteration: " + (iteration + 1) + ")!", e); break; } // add row row = sheet.addRow(); row.addCell("S").setContent(iteration + 1); for (EvaluationStatistic s : m_StatisticValues) { try { row.addCell(s.toString()).setContent(EvaluationHelper.getValue(eval, s, classIndex)); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to calculate statistic in iteration #" + (iteration + 1) + ": " + s, e); row.addCell(s.toString()).setMissing(); } } for (i = 0; i < m_Percentiles.length; i++) { perc = new Percentile<>(); perc.addAll(errors); percRev = new Percentile<>(); percRev.addAll(errorsRev); switch (m_ErrorCalculation) { case ACTUAL_MINUS_PREDICTED: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case PREDICTED_MINUS_ACTUAL: row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; case ABSOLUTE: row.addCell("perc-Abs-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); break; case BOTH: row.addCell("perc-AmP-" + i).setContent(perc.getPercentile(m_Percentiles[i].doubleValue())); row.addCell("perc-PmA-" + i) .setContent(percRev.getPercentile(m_Percentiles[i].doubleValue())); break; default: throw new IllegalStateException("Unhandled error calculation: " + m_ErrorCalculation); } } } if ((result == null) && (sheet != null)) m_OutputToken = new Token(sheet); } return result; }
From source file:adams.flow.transformer.WekaInstancesInfo.java
License:Open Source License
/** * Executes the flow item.//from ww w.j a v a 2 s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; int index; int labelIndex; double[] dist; Enumeration enm; int i; result = null; if (m_InputToken.getPayload() instanceof Instance) inst = ((Instance) m_InputToken.getPayload()).dataset(); else inst = (Instances) m_InputToken.getPayload(); m_AttributeIndex.setData(inst); index = m_AttributeIndex.getIntIndex(); m_Queue.clear(); switch (m_Type) { case FULL: m_Queue.add(inst.toSummaryString()); break; case FULL_ATTRIBUTE: m_Queue.add(getAttributeStats(inst, index)); break; case FULL_CLASS: if (inst.classIndex() > -1) m_Queue.add(getAttributeStats(inst, inst.classIndex())); break; case HEADER: m_Queue.add(new Instances(inst, 0).toString()); break; case RELATION_NAME: m_Queue.add(inst.relationName()); break; case ATTRIBUTE_NAME: if (index != -1) m_Queue.add(inst.attribute(index).name()); break; case ATTRIBUTE_NAMES: for (i = 0; i < inst.numAttributes(); i++) m_Queue.add(inst.attribute(i).name()); break; case LABELS: if (index != -1) { enm = inst.attribute(index).enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case CLASS_LABELS: if (inst.classIndex() > -1) { enm = inst.classAttribute().enumerateValues(); while (enm.hasMoreElements()) m_Queue.add(enm.nextElement()); } break; case LABEL_COUNT: if (index > -1) { m_LabelIndex.setData(inst.attribute(index)); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]); } break; case LABEL_COUNTS: if (index > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts)); break; case LABEL_DISTRIBUTION: if (index > -1) { dist = new double[inst.attributeStats(index).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(index).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case CLASS_LABEL_COUNT: if (inst.classIndex() > -1) { m_LabelIndex.setData(inst.classAttribute()); labelIndex = m_LabelIndex.getIntIndex(); m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]); } break; case CLASS_LABEL_COUNTS: if (inst.classIndex() > -1) m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts)); break; case CLASS_LABEL_DISTRIBUTION: if (inst.classIndex() > -1) { dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length]; for (i = 0; i < dist.length; i++) dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i]; Utils.normalize(dist); m_Queue.add(StatUtils.toNumberArray(dist)); } break; case NUM_ATTRIBUTES: m_Queue.add(inst.numAttributes()); break; case NUM_INSTANCES: m_Queue.add(inst.numInstances()); break; case NUM_CLASS_LABELS: if ((inst.classIndex() != -1) && inst.classAttribute().isNominal()) m_Queue.add(inst.classAttribute().numValues()); break; case NUM_LABELS: if ((index != -1) && inst.attribute(index).isNominal()) m_Queue.add(inst.attribute(index).numValues()); break; case NUM_DISTINCT_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).distinctCount); break; case NUM_UNIQUE_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).uniqueCount); break; case NUM_MISSING_VALUES: if (index != -1) m_Queue.add(inst.attributeStats(index).missingCount); break; case MIN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.min); break; case MAX: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.max); break; case MEAN: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.mean); break; case STDEV: if ((index != -1) && inst.attribute(index).isNumeric()) m_Queue.add(inst.attributeStats(index).numericStats.stdDev); break; case ATTRIBUTE_TYPE: if (index != -1) m_Queue.add(Attribute.typeToString(inst.attribute(index))); break; case CLASS_TYPE: if (inst.classIndex() != -1) m_Queue.add(Attribute.typeToString(inst.classAttribute())); break; default: result = "Unhandled info type: " + m_Type; } return result; }
From source file:adams.flow.transformer.WekaPredictionsToInstances.java
License:Open Source License
/** * Executes the flow item.// w ww. j a v a2 s . c o m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Evaluation eval; int i; int n; int indexErr; int indexProb; int indexDist; int indexWeight; boolean nominal; Instances header; ArrayList<Attribute> atts; ArrayList<String> values; ArrayList<Prediction> predictions; Prediction pred; double[] vals; Instances data; Instances testData; int[] indices; result = null; if (m_InputToken.getPayload() instanceof WekaEvaluationContainer) { eval = (Evaluation) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_EVALUATION); indices = (int[]) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_ORIGINALINDICES); testData = (Instances) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_TESTDATA); } else { eval = (Evaluation) m_InputToken.getPayload(); indices = null; testData = null; } header = eval.getHeader(); nominal = header.classAttribute().isNominal(); predictions = eval.predictions(); if (predictions != null) { // create header atts = new ArrayList<>(); // actual if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Actual", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Actual")); } // predicted if (nominal && m_AddLabelIndex) { values = new ArrayList<>(); for (i = 0; i < header.classAttribute().numValues(); i++) values.add((i + 1) + ":" + header.classAttribute().value(i)); atts.add(new Attribute(m_MeasuresPrefix + "Predicted", values)); } else { atts.add(header.classAttribute().copy(m_MeasuresPrefix + "Predicted")); } // error indexErr = -1; if (m_ShowError) { indexErr = atts.size(); if (nominal) { values = new ArrayList<>(); values.add("n"); values.add("y"); atts.add(new Attribute(m_MeasuresPrefix + "Error", values)); } else { atts.add(new Attribute(m_MeasuresPrefix + "Error")); } } // probability indexProb = -1; if (m_ShowProbability && nominal) { indexProb = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Probability")); } // distribution indexDist = -1; if (m_ShowDistribution && nominal) { indexDist = atts.size(); for (n = 0; n < header.classAttribute().numValues(); n++) atts.add(new Attribute( m_MeasuresPrefix + "Distribution (" + header.classAttribute().value(n) + ")")); } // weight indexWeight = -1; if (m_ShowWeight) { indexWeight = atts.size(); atts.add(new Attribute(m_MeasuresPrefix + "Weight")); } data = new Instances("Predictions", atts, predictions.size()); data.setClassIndex(1); // predicted // add data if ((indices != null) && m_UseOriginalIndices) predictions = CrossValidationHelper.alignPredictions(predictions, indices); for (i = 0; i < predictions.size(); i++) { pred = predictions.get(i); vals = new double[data.numAttributes()]; // actual vals[0] = pred.actual(); // predicted vals[1] = pred.predicted(); // error if (m_ShowError) { if (nominal) { vals[indexErr] = ((pred.actual() != pred.predicted()) ? 1.0 : 0.0); } else { if (m_UseAbsoluteError) vals[indexErr] = Math.abs(pred.actual() - pred.predicted()); else vals[indexErr] = pred.actual() - pred.predicted(); } } // probability if (m_ShowProbability && nominal) { vals[indexProb] = StatUtils.max(((NominalPrediction) pred).distribution()); } // distribution if (m_ShowDistribution && nominal) { for (n = 0; n < header.classAttribute().numValues(); n++) vals[indexDist + n] = ((NominalPrediction) pred).distribution()[n]; } // weight if (m_ShowWeight) { vals[indexWeight] = pred.weight(); } // add row data.add(new DenseInstance(1.0, vals)); } // add test data? if ((testData != null) && !m_TestAttributes.isEmpty()) { testData = filterTestData(testData); if (testData != null) data = Instances.mergeInstances(data, testData); } // generate output token m_OutputToken = new Token(data); } else { getLogger().severe("No predictions available from Evaluation object!"); } return result; }
From source file:adams.flow.transformer.WekaPredictionsToSpreadSheet.java
License:Open Source License
/** * Executes the flow item.// w w w .j av a 2 s .co m * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Evaluation eval; int i; int n; int indexErr; int indexProb; int indexDist; int indexWeight; boolean nominal; Instances header; ArrayList<Prediction> predictions; Prediction pred; SpreadSheet data; Instances testData; InstancesView testView; Row row; int[] indices; result = null; if (m_InputToken.getPayload() instanceof WekaEvaluationContainer) { eval = (Evaluation) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_EVALUATION); indices = (int[]) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_ORIGINALINDICES); testData = (Instances) ((WekaEvaluationContainer) m_InputToken.getPayload()) .getValue(WekaEvaluationContainer.VALUE_TESTDATA); } else { eval = (Evaluation) m_InputToken.getPayload(); indices = null; testData = null; } header = eval.getHeader(); nominal = header.classAttribute().isNominal(); predictions = eval.predictions(); if (predictions != null) { data = new DefaultSpreadSheet(); data.setName("Predictions"); // create header row = data.getHeaderRow(); row.addCell("A").setContent(m_MeasuresPrefix + "Actual"); row.addCell("P").setContent(m_MeasuresPrefix + "Predicted"); indexErr = -1; if (m_ShowError) { indexErr = row.getCellCount(); row.addCell("E").setContent(m_MeasuresPrefix + "Error"); } // probability indexProb = -1; if (m_ShowProbability && nominal) { indexProb = row.getCellCount(); row.addCell("Pr").setContent(m_MeasuresPrefix + "Probability"); } // distribution indexDist = -1; if (m_ShowDistribution && nominal) { indexDist = row.getCellCount(); for (n = 0; n < header.classAttribute().numValues(); n++) row.addCell("D" + n).setContent( m_MeasuresPrefix + "Distribution (" + header.classAttribute().value(n) + ")"); } // weight indexWeight = -1; if (m_ShowWeight) { indexWeight = row.getCellCount(); row.addCell("W").setContent(m_MeasuresPrefix + "Weight"); } // add data if ((indices != null) && m_UseOriginalIndices) predictions = CrossValidationHelper.alignPredictions(predictions, indices); for (i = 0; i < predictions.size(); i++) { pred = predictions.get(i); row = data.addRow(); // actual if (Double.isNaN(pred.actual())) row.addCell(0).setMissing(); else if (nominal) row.addCell(0).setContentAsString(header.classAttribute().value((int) pred.actual())); else row.addCell(0).setContent(pred.actual()); // predicted if (Double.isNaN(pred.predicted())) row.addCell(1).setMissing(); else if (nominal) row.addCell(1).setContentAsString(header.classAttribute().value((int) pred.predicted())); else row.addCell(1).setContent(pred.predicted()); // error if (m_ShowError) { if (nominal) { row.addCell(indexErr).setContent((pred.actual() != pred.predicted() ? "true" : "false")); } else { if (m_UseAbsoluteError) row.addCell(indexErr).setContent(Math.abs(pred.actual() - pred.predicted())); else row.addCell(indexErr).setContent(pred.actual() - pred.predicted()); } } // probability if (m_ShowProbability && nominal) { row.addCell(indexProb).setContent(StatUtils.max(((NominalPrediction) pred).distribution())); } // distribution if (m_ShowDistribution && nominal) { for (n = 0; n < header.classAttribute().numValues(); n++) row.addCell(indexDist + n).setContent(((NominalPrediction) pred).distribution()[n]); } // weight if (m_ShowWeight) { row.addCell(indexWeight).setContent(pred.weight()); } } // add test data? if ((testData != null) && !m_TestAttributes.isEmpty()) { testData = filterTestData(testData); if (testData != null) { testView = new InstancesView(testData); data.mergeWith(testView); } } // generate output token m_OutputToken = new Token(data); } else { getLogger().severe("No predictions available from Evaluation object!"); } return result; }
From source file:adams.gui.visualization.debug.inspectionhandler.WekaInstances.java
License:Open Source License
/** * Returns further inspection values./*www. j av a 2s . co m*/ * * @param obj the object to further inspect * @return the named inspected values */ @Override public Hashtable<String, Object> inspect(Object obj) { Hashtable<String, Object> result; Instances data; Instance inst; result = new Hashtable<String, Object>(); if (obj instanceof Instances) { data = (Instances) obj; inst = null; } else { inst = (Instance) obj; data = inst.dataset(); } result.put("relation", data.relationName()); result.put("num attributes", data.numAttributes()); result.put("class attribute", (data.classIndex() == -1) ? "-none-" : ((data.classIndex() + 1) + " (" + data.classAttribute().name() + ")")); if (inst == null) { result.put("num instances", data.numInstances()); result.put("instances", data.toArray()); } return result; }
From source file:adams.opt.cso.Measure.java
License:Open Source License
/** * Checks whether the data can be used with this measure. * * @param data the data to check//ww w . j a v a 2s .co m * @return true if the measure can be obtain for this kind of data */ public boolean isValid(Instances data) { if (data.classIndex() == -1) throw new UnassignedClassException("No class attribute set!"); if (data.classAttribute().isNominal()) return m_Nominal; else if (data.classAttribute().isNumeric()) return m_Numeric; else throw new IllegalStateException("Class attribute '" + data.classAttribute().type() + "' not handled!"); }
From source file:ai.BalancedRandomForest.java
License:GNU General Public License
/** * Build Balanced Random Forest// ww w.j ava2s.co m */ public void buildClassifier(final Instances data) throws Exception { // If number of features is 0 then set it to log2 of M (number of attributes) if (numFeatures < 1) numFeatures = (int) Utils.log2(data.numAttributes()) + 1; // Check maximum number of random features if (numFeatures >= data.numAttributes()) numFeatures = data.numAttributes() - 1; // Initialize array of trees tree = new BalancedRandomTree[numTrees]; // total number of instances final int numInstances = data.numInstances(); // total number of classes final int numClasses = data.numClasses(); final ArrayList<Integer>[] indexSample = new ArrayList[numClasses]; for (int i = 0; i < numClasses; i++) indexSample[i] = new ArrayList<Integer>(); //System.out.println("numClasses = " + numClasses); // fill indexSample with the indices of each class for (int i = 0; i < numInstances; i++) { //System.out.println("data.get("+i+").classValue() = " + data.get(i).classValue()); indexSample[(int) data.get(i).classValue()].add(i); } final Random random = new Random(seed); // Executor service to run concurrent trees final ExecutorService exe = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); List<Future<BalancedRandomTree>> futures = new ArrayList<Future<BalancedRandomTree>>(numTrees); final boolean[][] inBag = new boolean[numTrees][numInstances]; try { for (int i = 0; i < numTrees; i++) { final ArrayList<Integer> bagIndices = new ArrayList<Integer>(); // Randomly select the indices in a balanced way for (int j = 0; j < numInstances; j++) { // Select first the class final int randomClass = random.nextInt(numClasses); // Select then a random sample of that class final int randomSample = random.nextInt(indexSample[randomClass].size()); bagIndices.add(indexSample[randomClass].get(randomSample)); inBag[i][indexSample[randomClass].get(randomSample)] = true; } // Create random tree final Splitter splitter = new Splitter( new GiniFunction(numFeatures, data.getRandomNumberGenerator(random.nextInt()))); futures.add(exe.submit(new Callable<BalancedRandomTree>() { public BalancedRandomTree call() { return new BalancedRandomTree(data, bagIndices, splitter); } })); } // Grab all trained trees before proceeding for (int treeIdx = 0; treeIdx < numTrees; treeIdx++) tree[treeIdx] = futures.get(treeIdx).get(); // Calculate out of bag error final boolean numeric = data.classAttribute().isNumeric(); List<Future<Double>> votes = new ArrayList<Future<Double>>(data.numInstances()); for (int i = 0; i < data.numInstances(); i++) { VotesCollector aCollector = new VotesCollector(tree, i, data, inBag); votes.add(exe.submit(aCollector)); } double outOfBagCount = 0.0; double errorSum = 0.0; for (int i = 0; i < data.numInstances(); i++) { double vote = votes.get(i).get(); // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } outOfBagError = errorSum / outOfBagCount; } catch (Exception ex) { ex.printStackTrace(); } finally { exe.shutdownNow(); } }
From source file:ann.ANN.java
public void classify(String data_address, Classifier model) { try {/*from w w w . java 2 s .co m*/ Instances test = ConverterUtils.DataSource.read(data_address); test.setClassIndex(test.numAttributes() - 1); System.out.println("===================================="); System.out.println("=== Predictions on user test set ==="); System.out.println("===================================="); System.out.println("# - actual - predicted - distribution"); for (int i = 0; i < test.numInstances(); i++) { double pred = model.classifyInstance(test.instance(i)); double[] dist = model.distributionForInstance(test.instance(i)); System.out.print((i + 1) + " - "); System.out.print(test.instance(i).toString(test.classIndex()) + " - "); System.out.print(test.classAttribute().value((int) pred) + " - "); System.out.println(Utils.arrayToString(dist)); } System.out.println("\n"); } catch (Exception ex) { System.out.println("Tidak berhasil memprediksi hasil\n"); } }
From source file:ann.Main.java
public static void main(String[] args) { String trainPath = null;/*w w w . j av a2 s. co m*/ String testPath = null; String weights = null; String predictPath = null; char activationFunction = MyANN.SIGMOID_FUNCTION, terminateCondition = MyANN.TERMINATE_MAX_ITERATION, learningRule = MyANN.PERCEPTRON_TRAINING_RULE, topology = MyANN.ONE_PERCEPTRON; double deltaMSE = 0.01; int maxIteration = 500; double learningRate = 0.3; double momentum = 0.2; int nbHidden = 0; int[] hiddenConf = null; boolean isCV = false; int numFolds = 10; boolean isEvaluate = false; if (args.length < 1 || args.length % 2 == 0) { System.out.println("Usage: ANN [-I <path>] [-t O|M] [-r P|B|D] [-h <layer>]" + "\n\t [-a N|G|T] [-L <rate>] [-m <momentum>] [-E D|I|B] [-d <mse>]" + "\n\t [-i <iteration>] [-e <path>|<n>] [-p <path>] <trainDataPath>"); System.out.println(""); System.out.println("-a N|G|T \t set activation function for OnePerceptron"); System.out.println("\t\t N=SIGN, G=SIGMOID, T=STEP"); System.out.println("-d <mse> \t set MSE = <mse> for terminate condition"); System.out.println("-E D|I|B \t\t set terminate condition, D=by MSE, I=by iteration"); System.out.println("-e <path>|<n> \t set test data using <path> or cross-validation w/ folds = <n>"); System.out.println("-h <layer> \t set hidden layer. <layer>=0 no hidden layer"); System.out.println("\t\t <layer>=2 => 1 hidden layer with 2 nodes"); System.out.println("\t\t <layer>=2,3 => 2 hidden layer with 2 nodes on first and 3 on second layer"); System.out.println("-I <path> \t set initial weight from <path>"); System.out.println("-i <iteration> \t set max iteration for terminate condition"); System.out.println("-L <rate> \t set learning rate = <rate>"); System.out.println("-m <momentum> \t set momentum = <momentum>"); System.out.println("-p <path> \t set data to predict"); System.out.println("-r P|B|D \t set learning rule for OnePerceptron "); System.out.println("\t\t P=Perceptron training rule,B=Batch, D=DeltaRule"); System.out.println("-t O|M \t\t set topology, O=OnePerceptron, M=MLP"); return; } else { trainPath = args[args.length - 1]; int i = 0; while (i < args.length - 1) { switch (args[i]) { case "-a": switch (args[i + 1]) { case "N": activationFunction = MyANN.SIGN_FUNCTION; break; case "G": activationFunction = MyANN.SIGMOID_FUNCTION; break; case "T": activationFunction = MyANN.STEP_FUNCTION; break; default: break; } break; case "-d": deltaMSE = Double.valueOf(args[i + 1]); break; case "-E": switch (args[i + 1]) { case "D": terminateCondition = MyANN.TERMINATE_MSE; break; case "I": terminateCondition = MyANN.TERMINATE_MAX_ITERATION; break; case "B": terminateCondition = MyANN.TERMINATE_BOTH; default: break; } break; case "-e": if (args[i + 1].length() <= 2) { numFolds = Integer.parseInt(args[i + 1]); isCV = true; } else { isEvaluate = true; testPath = args[i + 1]; } break; case "-h": String[] nbl = args[i + 1].split(","); if (nbl.length == 1) { nbHidden = Integer.parseInt(nbl[0]); if (nbHidden != 0) { hiddenConf = new int[1]; hiddenConf[0] = nbHidden; nbHidden = 1; } } else { nbHidden = nbl.length; hiddenConf = new int[nbHidden]; for (int j = 0; j < nbHidden; j++) { hiddenConf[j] = Integer.parseInt(nbl[j]); } } break; case "-I": weights = args[i + 1]; break; case "-i": maxIteration = Integer.parseInt(args[i + 1]); break; case "-L": learningRate = Double.parseDouble(args[i + 1]); break; case "-m": momentum = Double.parseDouble(args[i + 1]); break; case "-p": predictPath = args[i + 1]; break; case "-r": switch (args[i + 1]) { case "P": learningRule = MyANN.PERCEPTRON_TRAINING_RULE; break; case "B": learningRule = MyANN.BATCH_GRADIENT_DESCENT; break; case "D": learningRule = MyANN.DELTA_RULE; break; default: break; } break; case "-t": switch (args[i + 1]) { case "O": topology = MyANN.ONE_PERCEPTRON; break; case "M": topology = MyANN.MULTILAYER_PERCEPTRON; break; default: break; } break; default: break; } i += 2; } } // persiapkan data Instances trainData = null; Instances testData = null; Instances predictData = null; try { ConverterUtils.DataSource source = new ConverterUtils.DataSource(trainPath); trainData = source.getDataSet(); if (trainData.classIndex() == -1) { trainData.setClassIndex(trainData.numAttributes() - 1); } if (testPath != null) { source = new ConverterUtils.DataSource(testPath); testData = source.getDataSet(); if (testData.classIndex() == -1) { testData.setClassIndex(testData.numAttributes() - 1); } } if (predictPath != null) { source = new ConverterUtils.DataSource(predictPath); predictData = source.getDataSet(); if (predictData.classIndex() == -1) { predictData.setClassIndex(predictData.numAttributes() - 1); } } } catch (Exception ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } // persiapkan model dan parameter MyANN myAnn = new MyANN(); WeightParser wp = null; if (weights != null) { wp = new WeightParser(weights); myAnn.setInitialWeight(wp.weight); } myAnn.setActivationFunction(activationFunction); myAnn.setDeltaMSE(deltaMSE); myAnn.setLearningRate(learningRate); myAnn.setLearningRule(learningRule); myAnn.setMaxIteration(maxIteration); myAnn.setMomentum(momentum); myAnn.setTerminationCondition(terminateCondition); myAnn.setThreshold(momentum); myAnn.setTopology(topology); int[] nbLayer = new int[2]; if (nbHidden != 0) { nbLayer = new int[2 + nbHidden]; for (int j = 1; j < nbLayer.length - 1; j++) { nbLayer[j] = hiddenConf[j - 1]; } } nbLayer[0] = trainData.numAttributes() - 1; if (trainData.classAttribute().isNominal()) nbLayer[nbLayer.length - 1] = trainData.classAttribute().numValues(); else nbLayer[nbLayer.length - 1] = 1; myAnn.setNbLayers(nbLayer); // debug: cek kondigurasi System.out.println("training data: " + trainPath); System.out.println("settings:"); myAnn.printSetting(); System.out.println(""); // klasifikasi System.out.println("start classifiying..."); try { myAnn.buildClassifier(trainData); } catch (Exception ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } myAnn.printSummary(); System.out.println("done"); System.out.println("-------------------------------------------------"); System.out.print("evaluating "); int[][] result = null; int nbData = trainData.numInstances(); if (isCV) { System.out.println("using " + numFolds + "-folds cross validation"); result = myAnn.crossValidation(trainData, numFolds, new Random(1)); } else if (isEvaluate) { System.out.println("using testData: " + testPath); result = myAnn.evaluate(testData); nbData = testData.numInstances(); } else { System.out.println("using trainData"); result = myAnn.evaluate(trainData); } System.out.println(""); System.out.println("result:"); double accuracy = 0.0; // a+d/total double[] precision = new double[result.length]; // a/a+c; prec[i] = M[i,i] / sumj(M[j,i]) double[] recall = new double[result[0].length]; // a/a+b; rec[i] = M[i,i] / sumj(M[i,j]) for (int i = 0; i < result.length; i++) { for (int j = 0; j < result[0].length; j++) { System.out.print(result[i][j] + " "); if (i == j) { accuracy += result[i][j]; } } System.out.println(""); } // precision for (int i = 0; i < precision.length; i++) { double sum = 0.0; for (int j = 0; j < result[0].length; j++) { sum += result[j][i]; } precision[i] = result[i][i] / sum; } // recall for (int i = 0; i < recall.length; i++) { double sum = 0.0; for (int j = 0; j < result[0].length; j++) { sum += result[i][j]; } recall[i] = result[i][i] / sum; } accuracy /= nbData; System.out.println(""); System.out.println("accuracy: " + accuracy); System.out.println("precision: "); for (double p : precision) { System.out.println(p); } System.out.println(""); System.out.println("recall: "); for (double r : recall) System.out.println(r); System.out.println(""); System.out.println("-------------------------------------------------"); if (predictPath != null) { System.out.println("predicting: " + predictPath); for (int i = 0; i < predictData.numInstances(); i++) { try { int idx = myAnn.predictClassIndex(myAnn.distributionForInstance(predictData.instance(i))); System.out.println("instance[" + (i) + "]: " + trainData.classAttribute().value(idx)); } catch (Exception ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } } System.out.println("done"); } /* try { File file = new File("/media/yusuf/5652859E52858389/Data/Kuliah/Semester 7/ML/WekaMiddle/weather.nominal.arff"); File unlabel = new File("/media/yusuf/5652859E52858389/Data/Kuliah/Semester 7/ML/WekaMiddle/weather.nominal.unlabeled.arff"); Instances data, test; ConverterUtils.DataSource source = new ConverterUtils.DataSource(file.getPath()); data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } source = new ConverterUtils.DataSource(unlabel.getPath()); test = source.getDataSet(); if (test.classIndex() == -1) { test.setClassIndex(data.numAttributes() - 1); } WeightParser wp = new WeightParser("/media/yusuf/5652859E52858389/Data/Kuliah/Semester 7/ML/khaidzir_myANN/initial.weight"); MyANN myANN = new MyANN(); int[] nbLayers = {4, 3, 2}; myANN.setNbLayers(nbLayers); myANN.setDeltaMSE(0.001); //myANN.setMomentum(0.2); myANN.setLearningRate(0.1); myANN.setTopology(MyANN.MULTILAYER_PERCEPTRON); myANN.setLearningRule(MyANN.PERCEPTRON_TRAINING_RULE); myANN.setActivationFunction(MyANN.SIGMOID_FUNCTION); myANN.setMaxIteration(10000); myANN.setTerminationCondition(MyANN.TERMINATE_MAX_ITERATION); //myANN.setInitialWeight(wp.weight); myANN.buildClassifier(data); int[][] ev = myANN.evaluate(data); for (int[] ev1 : ev) { for (int ev2 : ev1) { System.out.print(ev2+", "); } System.out.println(""); } System.out.println(""); //ev = myANN.crossValidation(data, 10, new Random(1)); for (int[] ev1 : ev) { for (int ev2 : ev1) { System.out.print(ev2+", "); } System.out.println(""); } System.out.println(""); /* myANN.buildClassifier(data); int[][] cm = myANN.evaluate(data); double accuracy = 0.0; // a+d/total double[] precision = new double[cm.length]; // a/a+c; prec[i] = M[i,i] / sumj(M[j,i]) double[] recall = new double[cm[0].length]; // a/a+b; rec[i] = M[i,i] / sumj(M[i,j]) for (int i = 0; i < cm.length; i++) { for (int j = 0; j < cm[0].length; j++) { System.out.print(cm[i][j] + " "); if (i==j) { accuracy += cm[i][j]; } } System.out.println(""); } // precision for(int i = 0; i < precision.length; i++) { double sum = 0.0; for (int j = 0; j < cm[0].length; j++) { sum += cm[j][i]; } precision[i] = cm[i][i] / sum; } // recall for(int i = 0; i < recall.length; i++) { double sum = 0.0; for (int j = 0; j < cm[0].length; j++) { sum += cm[i][j]; } recall[i] = cm[i][i] / sum; } accuracy /= data.numInstances(); System.out.println("accuracy: "+accuracy); System.out.println("precision: "); for(double p : precision) { System.out.print(p+", "); } System.out.println(""); System.out.println("recall: "); for (double r : recall) System.out.print(r+", "); System.out.println(""); } catch (Exception ex) { Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); } */ }