Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * WekaRandomSplit.java * Copyright (C) 2009-2018 University of Waikato, Hamilton, New Zealand */ package adams.flow.transformer; import adams.core.QuickInfoHelper; import adams.core.Randomizable; import adams.core.option.OptionUtils; import adams.data.weka.InstancesViewCreator; import adams.flow.container.WekaTrainTestSetContainer; import adams.flow.core.Token; import adams.flow.provenance.ActorType; import adams.flow.provenance.Provenance; import adams.flow.provenance.ProvenanceContainer; import adams.flow.provenance.ProvenanceInformation; import adams.flow.provenance.ProvenanceSupporter; import weka.classifiers.DefaultRandomSplitGenerator; import weka.classifiers.RandomSplitGenerator; import weka.core.Instances; /** <!-- globalinfo-start --> * Splits a dataset into a training and test set according to a specified split percentage. Randomization can be suppressed using with 'preserve order'.<br> * The training set can be accessed in the container with 'Train' and the test set with 'Test'. * <br><br> <!-- globalinfo-end --> * <!-- flow-summary-start --> * Input/output:<br> * - accepts:<br> * weka.core.Instances<br> * - generates:<br> * adams.flow.container.WekaTrainTestSetContainer<br> * <br><br> * Container information:<br> * - adams.flow.container.WekaTrainTestSetContainer: Train, Test, Seed, FoldNumber, FoldCount, Train original indices, Test original indices * <br><br> <!-- flow-summary-end --> * <!-- options-start --> * <pre>-logging-level <OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST> (property: loggingLevel) * The logging level for outputting errors and debugging output. * default: WARNING * </pre> * * <pre>-name <java.lang.String> (property: name) * The name of the actor. * default: WekaRandomSplit * </pre> * * <pre>-annotation <adams.core.base.BaseAnnotation> (property: annotations) * The annotations to attach to this actor. * default: * </pre> * * <pre>-skip <boolean> (property: skip) * If set to true, transformation is skipped and the input token is just forwarded * as it is. * default: false * </pre> * * <pre>-stop-flow-on-error <boolean> (property: stopFlowOnError) * If set to true, the flow execution at this level gets stopped in case this * actor encounters an error; the error gets propagated; useful for critical * actors. * default: false * </pre> * * <pre>-silent <boolean> (property: silent) * If enabled, then no errors are output in the console; Note: the enclosing * actor handler must have this enabled as well. * default: false * </pre> * * <pre>-preserve-order <boolean> (property: preserveOrder) * If set to true, then the order is preserved by suppressing randomization. * default: false * </pre> * * <pre>-seed <long> (property: seed) * The seed value for the randomization. * default: 1 * </pre> * * <pre>-percentage <double> (property: percentage) * The percentage for the split (between 0 and 1). * default: 0.66 * minimum: 1.0E-4 * maximum: 0.9999 * </pre> * * <pre>-create-view <boolean> (property: createView) * If enabled, views of the dataset are created instead of actual copies. * default: false * </pre> * * <pre>-generator <weka.classifiers.RandomSplitGenerator> (property: generator) * The scheme to use for generating the split; the actor options take precedence * over the scheme's ones. * default: weka.classifiers.DefaultRandomSplitGenerator * </pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) */ public class WekaRandomSplit extends AbstractTransformer implements Randomizable, ProvenanceSupporter, InstancesViewCreator { /** for serialization. */ private static final long serialVersionUID = -6447945986570354931L; /** whether to preserve the order. */ protected boolean m_PreserveOrder; /** the seed value. */ protected long m_Seed; /** the percentage for the split (0-1). */ protected double m_Percentage; /** whether to create a view only. */ protected boolean m_CreateView; /** the split generator to use. */ protected RandomSplitGenerator m_Generator; /** * Returns a string describing the object. * * @return a description suitable for displaying in the gui */ @Override public String globalInfo() { return "Splits a dataset into a training and test set according to a " + "specified split percentage. Randomization can be suppressed using " + "with 'preserve order'.\n" + "The training set can be accessed in the container with '" + WekaTrainTestSetContainer.VALUE_TRAIN + "' " + "and the test set with '" + WekaTrainTestSetContainer.VALUE_TEST + "'."; } /** * Adds options to the internal list of options. */ @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("preserve-order", "preserveOrder", false); m_OptionManager.add("seed", "seed", 1L); m_OptionManager.add("percentage", "percentage", 0.66, 0.0001, 0.9999); m_OptionManager.add("create-view", "createView", false); m_OptionManager.add("generator", "generator", new DefaultRandomSplitGenerator()); } /** * Returns a quick info about the actor, which will be displayed in the GUI. * * @return null if no info available, otherwise short string */ @Override public String getQuickInfo() { String result; String value; result = QuickInfoHelper.toString(this, "percentage", m_Percentage); result += QuickInfoHelper.toString(this, "seed", m_Seed, ", seed: "); result += QuickInfoHelper.toString(this, "preserveOrder", m_PreserveOrder, "order preserved", ", "); value = QuickInfoHelper.toString(this, "createView", m_CreateView, ", view only"); if (value != null) result += value; return result; } /** * Returns the class that the consumer accepts. * * @return <!-- flow-accepts-start -->weka.core.Instances.class<!-- flow-accepts-end --> */ public Class[] accepts() { return new Class[] { Instances.class }; } /** * Returns the class of objects that it generates. * * @return <!-- flow-generates-start -->adams.flow.container.WekaTrainTestSetContainer.class<!-- flow-generates-end --> */ public Class[] generates() { return new Class[] { WekaTrainTestSetContainer.class }; } /** * Sets whether to preserve order and suppress randomization. * * @param value if true then no randomization will happen */ public void setPreserveOrder(boolean value) { m_PreserveOrder = value; reset(); } /** * Returns whether to preserve order and suppress randomization. * * @return true if to preserve order and suppress randomization */ public boolean getPreserveOrder() { return m_PreserveOrder; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String preserveOrderTipText() { return "If set to true, then the order is preserved by suppressing randomization; overrides the value defined by the split generator scheme."; } /** * Sets the seed value. * * @param value the seed */ public void setSeed(long value) { m_Seed = value; reset(); } /** * Returns the seed value. * * @return the seed */ public long getSeed() { return m_Seed; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String seedTipText() { return "The seed value for the randomization; overrides the value defined by the split generator scheme."; } /** * Sets the percentage (0-1). * * @param value the percentage */ public void setPercentage(double value) { if ((value > 0) && (value < 1)) { m_Percentage = value; reset(); } else { getLogger().severe("Percentage must be between 0 and 1 (provided: " + value + ")!"); } } /** * Returns the percentage (0-1). * * @return the percentage */ public double getPercentage() { return m_Percentage; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String percentageTipText() { return "The percentage for the split (between 0 and 1); overrides the value defined by the split generator scheme."; } /** * Sets whether to create a view only. * * @param value true if to create a view only */ public void setCreateView(boolean value) { m_CreateView = value; reset(); } /** * Returns whether to create only a view. * * @return true if to create view only */ public boolean getCreateView() { return m_CreateView; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String createViewTipText() { return "If enabled, views of the dataset are created instead of actual copies; overrides the value defined by the split generator scheme."; } /** * Sets the scheme for generating the split. * * @param value the generator */ public void setGenerator(RandomSplitGenerator value) { m_Generator = value; reset(); } /** * Returns the scheme for generating the split. * * @return the generator */ public RandomSplitGenerator getGenerator() { return m_Generator; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String generatorTipText() { return "The scheme to use for generating the split; the actor options take precedence over the scheme's ones."; } /** * Executes the flow item. * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances inst; RandomSplitGenerator generator; result = null; inst = new Instances((Instances) m_InputToken.getPayload()); try { generator = (RandomSplitGenerator) OptionUtils.shallowCopy(m_Generator); generator.setData(inst); generator.setSeed(m_Seed); generator.setPercentage(m_Percentage); generator.setPreserveOrder(m_PreserveOrder); generator.setUseViews(m_CreateView); } catch (Exception e) { generator = null; result = handleException("Failed to generate split!", e); } if (result == null) m_OutputToken = new Token(generator.next()); updateProvenance(m_OutputToken); return result; } /** * Updates the provenance information in the provided container. * * @param cont the provenance container to update */ public void updateProvenance(ProvenanceContainer cont) { if (Provenance.getSingleton().isEnabled()) { if (m_InputToken.hasProvenance()) cont.setProvenance(m_InputToken.getProvenance().getClone()); cont.addProvenance(new ProvenanceInformation(ActorType.DATAGENERATOR, m_InputToken.getPayload().getClass(), this, m_OutputToken.getPayload().getClass())); } } }