Java tutorial
/* * EvaluatePrequential.java * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package moa.tasks; import java.io.File; import java.io.FileOutputStream; import java.io.PrintStream; import moa.core.Measurement; import moa.core.ObjectRepository; import moa.core.TimingUtils; import moa.evaluation.ClassificationPerformanceEvaluator; import moa.evaluation.LearningCurve; import moa.evaluation.LearningEvaluation; import moa.learners.ChangeDetectorLearner; import moa.options.ClassOption; import moa.options.FileOption; import moa.options.IntOption; import moa.streams.clustering.ClusterEvent; import moa.streams.generators.cd.ConceptDriftGenerator; import weka.core.Instance; import weka.core.Utils; /** * Task for evaluating a classifier on a stream by testing then training with each example in sequence. * * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) * @version $Revision: 7 $ */ public class EvaluateConceptDrift extends ConceptDriftMainTask { @Override public String getPurposeString() { return "Evaluates a classifier on a stream by testing then training with each example in sequence."; } private static final long serialVersionUID = 1L; public ClassOption learnerOption = new ClassOption("learner", 'l', "Change detector to train.", ChangeDetectorLearner.class, "ChangeDetectorLearner"); public ClassOption streamOption = new ClassOption("stream", 's', "Stream to learn from.", ConceptDriftGenerator.class, "GradualChangeGenerator"); public ClassOption evaluatorOption = new ClassOption("evaluator", 'e', "Classification performance evaluation method.", ClassificationPerformanceEvaluator.class, "BasicConceptDriftPerformanceEvaluator"); public IntOption instanceLimitOption = new IntOption("instanceLimit", 'i', "Maximum number of instances to test/train on (-1 = no limit).", 1000, -1, Integer.MAX_VALUE); public IntOption timeLimitOption = new IntOption("timeLimit", 't', "Maximum number of seconds to test/train for (-1 = no limit).", -1, -1, Integer.MAX_VALUE); public IntOption sampleFrequencyOption = new IntOption("sampleFrequency", 'f', "How many instances between samples of the learning performance.", 10, 0, Integer.MAX_VALUE); /*public IntOption memCheckFrequencyOption = new IntOption( "memCheckFrequency", 'q', "How many instances between memory bound checks.", 100000, 0, Integer.MAX_VALUE);*/ public FileOption dumpFileOption = new FileOption("dumpFile", 'd', "File to append intermediate csv results to.", null, "csv", true); /*public FileOption outputPredictionFileOption = new FileOption("outputPredictionFile", 'o', "File to append output predictions to.", null, "pred", true);*/ @Override public Class<?> getTaskResultType() { return LearningCurve.class; } @Override protected Object doMainTask(TaskMonitor monitor, ObjectRepository repository) { ChangeDetectorLearner learner = (ChangeDetectorLearner) getPreparedClassOption(this.learnerOption); ConceptDriftGenerator stream = (ConceptDriftGenerator) getPreparedClassOption(this.streamOption); this.setEventsList(stream.getEventsList()); ClassificationPerformanceEvaluator evaluator = (ClassificationPerformanceEvaluator) getPreparedClassOption( this.evaluatorOption); LearningCurve learningCurve = new LearningCurve("learning evaluation instances"); learner.setModelContext(stream.getHeader()); int maxInstances = this.instanceLimitOption.getValue(); long instancesProcessed = 0; int maxSeconds = this.timeLimitOption.getValue(); int secondsElapsed = 0; monitor.setCurrentActivity("Evaluating learner...", -1.0); File dumpFile = this.dumpFileOption.getFile(); PrintStream immediateResultStream = null; if (dumpFile != null) { try { if (dumpFile.exists()) { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true); } else { immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true); } } catch (Exception ex) { throw new RuntimeException("Unable to open immediate result file: " + dumpFile, ex); } } //File for output predictions /* File outputPredictionFile = this.outputPredictionFileOption.getFile(); PrintStream outputPredictionResultStream = null; if (outputPredictionFile != null) { try { if (outputPredictionFile.exists()) { outputPredictionResultStream = new PrintStream( new FileOutputStream(outputPredictionFile, true), true); } else { outputPredictionResultStream = new PrintStream( new FileOutputStream(outputPredictionFile), true); } } catch (Exception ex) { throw new RuntimeException( "Unable to open prediction result file: " + outputPredictionFile, ex); } }*/ boolean firstDump = true; boolean preciseCPUTiming = TimingUtils.enablePreciseTiming(); long evaluateStartTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); long lastEvaluateStartTime = evaluateStartTime; double RAMHours = 0.0; while (stream.hasMoreInstances() && ((maxInstances < 0) || (instancesProcessed < maxInstances)) && ((maxSeconds < 0) || (secondsElapsed < maxSeconds))) { Instance trainInst = stream.nextInstance(); Instance testInst = (Instance) trainInst.copy(); int trueClass = (int) trainInst.classValue(); //testInst.setClassMissing(); double[] prediction = learner.getVotesForInstance(testInst); if (prediction[0] == 1) { //Change detected this.getEventsList().add(new ClusterEvent(this, instancesProcessed, "Detected Change", "Drift")); } // Output prediction /* if (outputPredictionFile != null) { outputPredictionResultStream.println(Utils.maxIndex(prediction) + "," + trueClass); }*/ //evaluator.addClassificationAttempt(trueClass, prediction, testInst.weight()); evaluator.addResult(testInst, prediction); learner.trainOnInstance(trainInst); instancesProcessed++; if (instancesProcessed % this.sampleFrequencyOption.getValue() == 0 || stream.hasMoreInstances() == false) { long evaluateTime = TimingUtils.getNanoCPUTimeOfCurrentThread(); double time = TimingUtils.nanoTimeToSeconds(evaluateTime - evaluateStartTime); double timeIncrement = TimingUtils.nanoTimeToSeconds(evaluateTime - lastEvaluateStartTime); double RAMHoursIncrement = learner.measureByteSize() / (1024.0 * 1024.0 * 1024.0); //GBs RAMHoursIncrement *= (timeIncrement / 3600.0); //Hours RAMHours += RAMHoursIncrement; lastEvaluateStartTime = evaluateTime; learningCurve.insertEntry(new LearningEvaluation( new Measurement[] { new Measurement("learning evaluation instances", instancesProcessed), new Measurement("evaluation time (" + (preciseCPUTiming ? "cpu " : "") + "seconds)", time), new Measurement("model cost (RAM-Hours)", RAMHours) }, evaluator, learner)); if (immediateResultStream != null) { if (firstDump) { immediateResultStream.println(learningCurve.headerToString()); firstDump = false; } immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1)); immediateResultStream.flush(); } } if (instancesProcessed % INSTANCES_BETWEEN_MONITOR_UPDATES == 0) { if (monitor.taskShouldAbort()) { return null; } long estimatedRemainingInstances = stream.estimatedRemainingInstances(); if (maxInstances > 0) { long maxRemaining = maxInstances - instancesProcessed; if ((estimatedRemainingInstances < 0) || (maxRemaining < estimatedRemainingInstances)) { estimatedRemainingInstances = maxRemaining; } } monitor.setCurrentActivityFractionComplete(estimatedRemainingInstances < 0 ? -1.0 : (double) instancesProcessed / (double) (instancesProcessed + estimatedRemainingInstances)); if (monitor.resultPreviewRequested()) { monitor.setLatestResultPreview(learningCurve.copy()); } secondsElapsed = (int) TimingUtils .nanoTimeToSeconds(TimingUtils.getNanoCPUTimeOfCurrentThread() - evaluateStartTime); } } if (immediateResultStream != null) { immediateResultStream.close(); } /* if (outputPredictionResultStream != null) { outputPredictionResultStream.close(); }*/ return learningCurve; } }