Example usage for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance)

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:org.pentaho.di.scoring.WekaScoringData.java

License:Open Source License

/**
 * Generates a batch of predictions (more specifically, an array of output
 * rows containing all input Kettle fields plus new fields that hold the
 * prediction(s)) for each incoming Kettle row given a Weka model.
 *
 * @param inputMeta  the meta data for the incoming rows
 * @param outputMeta the meta data for the output rows
 * @param inputRow   the values of the incoming row
 * @param meta       meta data for this step
 * @return a Kettle row containing all incoming fields along with new ones
 * that hold the prediction(s)//from w  ww .jav a  2 s  .com
 * @throws Exception if an error occurs
 */
public Object[][] generatePredictions(RowMetaInterface inputMeta, RowMetaInterface outputMeta,
        List<Object[]> inputRows, WekaScoringMeta meta) throws Exception {

    int[] mappingIndexes = m_mappingIndexes;
    WekaScoringModel model = getModel(); // copy of the model for this copy of
    // the step
    boolean outputProbs = meta.getOutputProbabilities();
    boolean supervised = model.isSupervisedLearningModel();

    Attribute classAtt = null;
    if (supervised) {
        classAtt = model.getHeader().classAttribute();
    }

    Instances batch = new Instances(model.getHeader(), inputRows.size());
    for (Object[] r : inputRows) {
        Instance inst = constructInstance(inputMeta, r, mappingIndexes, model, true);
        batch.add(inst);
    }

    double[][] preds = model.distributionsForInstances(batch);

    Object[][] result = new Object[preds.length][];
    for (int i = 0; i < preds.length; i++) {
        // First copy the input data to the new result...
        Object[] resultRow = RowDataUtil.resizeArray(inputRows.get(i), outputMeta.size());
        int index = inputMeta.size();

        double[] prediction = preds[i];

        if (prediction.length == 1 || !outputProbs) {
            if (supervised) {
                if (classAtt.isNumeric()) {
                    Double newVal = new Double(prediction[0]);
                    resultRow[index++] = newVal;
                } else {
                    int maxProb = Utils.maxIndex(prediction);
                    if (prediction[maxProb] > 0) {
                        String newVal = classAtt.value(maxProb);
                        resultRow[index++] = newVal;
                    } else {
                        String newVal = BaseMessages.getString(WekaScoringMeta.PKG,
                                "WekaScoringData.Message.UnableToPredict"); //$NON-NLS-1$
                        resultRow[index++] = newVal;
                    }
                }
            } else {
                int maxProb = Utils.maxIndex(prediction);
                if (prediction[maxProb] > 0) {
                    Double newVal = new Double(maxProb);
                    resultRow[index++] = newVal;
                } else {
                    String newVal = BaseMessages.getString(WekaScoringMeta.PKG,
                            "WekaScoringData.Message.UnableToPredictCluster"); //$NON-NLS-1$
                    resultRow[index++] = newVal;
                }
            }
        } else {
            // output probability distribution
            for (int j = 0; j < prediction.length; j++) {
                Double newVal = new Double(prediction[j]);
                resultRow[index++] = newVal;
            }
        }

        result[i] = resultRow;
    }

    return result;
}

From source file:org.processmining.analysis.clusteranalysis.ClusterDecisionAnalyzer.java

License:Open Source License

public Instances getDataInfo() {
    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection
    // scope/*from www . ja  va2  s  . co m*/
    for (int i = 0; i < agProfiles.numberOfItems(); i++) {
        if (checks[i].isSelected()) {
            String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i));
            Attribute wekaAtt = new Attribute(name);
            attributeInfo.addElement(wekaAtt);
        }
    }
    // for target concept
    FastVector my_nominal_values = new FastVector(clusters.getClusters().size());
    Attribute targetConcept = null;
    for (Cluster aCluster : clusters.getClusters()) {
        my_nominal_values.addElement(aCluster.getName());
    }
    targetConcept = new Attribute("Cluster", my_nominal_values);
    attributeInfo.addElement(targetConcept);
    attributeInfo.trimToSize();

    // learning
    Instances data = new Instances("Clustering", attributeInfo, 0);
    data.setClassIndex(data.numAttributes() - 1);

    for (Cluster aCluster : clusters.getClusters()) {
        String clusterName = aCluster.getName();
        for (Integer i : aCluster.getTraceIndices()) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < agProfiles.numberOfItems(); j++) {
                if (checks[j].isSelected()) {
                    String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j));
                    Attribute wekaAtt = data.attribute(name);
                    if (wekaAtt != null) {
                        double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue();
                        instance0.setValue(wekaAtt, doubleAttValue);
                    } else {
                        System.out.println("fail to add");
                    }
                }
            }
            instance0.setDataset(data);
            instance0.setClassValue(clusterName);
            data.add(instance0);
        }
    }
    return data;
}

From source file:org.processmining.analysis.decisionmining.DecisionAnalyser.java

License:Open Source License

/**
 * Analyses the given list of decision points according to the context
 * specified. Furthermore, the context is provided with some visualization
 * of the analysis result./*from w w  w.ja  va  2  s. c  o m*/
 * 
 * @param decisionPoints
 *            the list of decision points to be analysed
 * @param log
 *            the log to be analysed
 * @param highLevelPN
 *            the simulation model to export discovered data dependencies
 */
public void analyse(List<DecisionPoint> decisionPoints, DecisionMiningLogReader log, HLPetriNet highLevelPN) {

    Iterator<DecisionPoint> allDecisionPoints = decisionPoints.iterator();
    while (allDecisionPoints.hasNext()) {
        DecisionPoint currentDP = allDecisionPoints.next();

        // initialize the classifying data structure
        initClassifier();

        // create attribute information
        FastVector attributeInfo = currentDP.getContext().getAttributeInfo();

        // create empty data set with attribute information
        Instances data = new Instances(currentDP.getName(), attributeInfo, 0);
        data.setClassIndex(data.numAttributes() - 1);

        // create learning instances
        List<DecisionCategory> allCategories = currentDP.getTargetConcept();
        Iterator<DecisionCategory> categoryIterator = allCategories.iterator();
        while (categoryIterator.hasNext()) {
            DecisionCategory branch = categoryIterator.next();
            // create all instances for one class at once
            ArrayList belongingTraces = log.getTracesInCategory(branch);
            Iterator traceIterator = belongingTraces.iterator();
            while (traceIterator.hasNext()) {
                DecisionMiningLogTrace trace = (DecisionMiningLogTrace) traceIterator.next();
                // one instance per trace
                // (future work: loops may result in multiple instances per
                // trace!)
                Instance instance = trace.makeInstance(data, attributeInfo.size(), branch, log,
                        currentDP.getContext());
                // classify instance
                instance.setClassValue(branch.toString());
                data.add(instance);
            }
        }

        // in case no single learning instance can be provided (as decision
        // point is never
        // reached, or decision classes cannot specified properly) --> do
        // not call algorithm
        if (data.numInstances() == 0) {
            currentDP.getContext().setResultViewPanel(createMessagePanel("No learning instances available"));
        }
        // actually solve the classification problem
        else {
            try {
                myClassifier.buildClassifier(data);
                // build up result visualization
                currentDP.getContext().setResultViewPanel(createResultVisualization());
                // create evaluation statistics of classifier for the user
                currentDP.getContext().setEvaluationViewPanel(createEvaluationVisualization(data));
                // only derive discovered data dependencies for decision
                // point if tree is not trivial
                if (((J48) myClassifier).measureNumRules() > 0) {
                    // TODO - derive the rules in a
                    // a) classifier-independent way
                    // b) cpn-independent way
                    currentDP.setDataDependencies(((J48) myClassifier).prefix(), highLevelPN);
                }
            } catch (Exception ex) {
                ex.printStackTrace();
                currentDP.getContext().setResultViewPanel(
                        createMessagePanel("Error while solving the classification problem"));
            }
        }
    }
}

From source file:org.processmining.analysis.decisionmining.DecisionAnalyserForAuLdg.java

License:Open Source License

/**
 * Analyses the given list of decision points according to the context
 * specified. Furthermore, the context is provided with some visualization of
 * the analysis result.//from   w ww .  j  a v  a  2s  .  c  o m
 * 
 * @param decisionPoints
 *          the list of decision points to be analysed
 * @param log
 *          the log to be analysed
 * @param highLevelPN
 *          the simulation model to export discovered data dependencies
 */
public void analyse(List<DecisionPointForAuLdg> decisionPoints, DecisionMiningLogReaderForAuLdg log,
        HLPetriNet highLevelPN) {

    Iterator<DecisionPointForAuLdg> allDecisionPoints = decisionPoints.iterator();
    while (allDecisionPoints.hasNext()) {
        DecisionPointForAuLdg currentDP = allDecisionPoints.next();

        // initialize the classifying data structure
        initClassifier();

        // create attribute information
        FastVector attributeInfo = currentDP.getContext().getAttributeInfo();

        // create empty data set with attribute information
        Instances data = new Instances(currentDP.getName(), attributeInfo, 0);
        data.setClassIndex(data.numAttributes() - 1);

        // create learning instances
        List<DecisionCategoryForAuLdg> allCategories = currentDP.getTargetConcept();
        Iterator<DecisionCategoryForAuLdg> categoryIterator = allCategories.iterator();
        while (categoryIterator.hasNext()) {
            DecisionCategoryForAuLdg branch = categoryIterator.next();
            // create all instances for one class at once
            ArrayList belongingTraces = log.getTracesInCategory(branch);
            Iterator traceIterator = belongingTraces.iterator();
            while (traceIterator.hasNext()) {
                DecisionMiningLogTraceForAuLdg trace = (DecisionMiningLogTraceForAuLdg) traceIterator.next();
                // one instance per trace
                // (future work: loops may result in multiple instances per trace!)
                Instance instance = trace.makeInstance(data, attributeInfo.size(), branch, log,
                        currentDP.getContext());
                // classify instance
                instance.setClassValue(branch.toString());
                data.add(instance);
            }
        }

        // in case no single learning instance can be provided (as decision point
        // is never
        // reached, or decision classes cannot specified properly) --> do not call
        // algorithm
        if (data.numInstances() == 0) {
            currentDP.getContext().setResultViewPanel(createMessagePanel("No learning instances available"));
        }
        // actually solve the classification problem
        else {
            try {
                myClassifier.buildClassifier(data);
                // build up result visualization
                currentDP.getContext().setResultViewPanel(createResultVisualization());
                // create evaluation statistics of classifier for the user
                currentDP.getContext().setEvaluationViewPanel(createEvaluationVisualization(data));
                // only derive discovered data dependencies for decision point if tree
                // is not trivial
                if (((J48) myClassifier).measureNumRules() > 0) {
                    // TODO - derive the rules in a
                    // a) classifier-independent way
                    // b) cpn-independent way
                    currentDP.setDataDependencies(((J48) myClassifier).prefix(), highLevelPN);
                }
            } catch (Exception ex) {
                ex.printStackTrace();
                currentDP.getContext().setResultViewPanel(
                        createMessagePanel("Error while solving the classification problem"));
            }
        }
    }
}

From source file:org.processmining.analysis.traceclustering.profile.AggregateProfile.java

License:Open Source License

public Instances getWekaData() {
    Instances data = null;

    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection
    // scope//from  ww  w  . j  a  v a 2s .  c  o  m
    for (int i = 0; i < numberOfItems(); i++) {
        String name = CpnUtils.replaceSpecialCharacters(getItemKey(i));
        Attribute wekaAtt = new Attribute(name);
        attributeInfo.addElement(wekaAtt);
    }
    attributeInfo.trimToSize();
    data = new Instances("Clustering", attributeInfo, 0);
    try {
        for (int i = 0; i < getLog().numberOfInstances(); i++) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < numberOfItems(); j++) {
                String name = CpnUtils.replaceSpecialCharacters(getItemKey(j));
                Attribute wekaAtt = data.attribute(name);
                if (wekaAtt != null) {
                    double doubleAttValue = (new Double(getValue(i, j))).doubleValue();
                    instance0.setValue(wekaAtt, doubleAttValue);
                } else {
                    Message.add("Weka Error: fail to add", Message.ERROR);
                }
            }
            instance0.setDataset(data);
            data.add(instance0);
        }
    } catch (Exception c) {
        Message.add("Weka Error: " + c.toString(), Message.ERROR);
    }

    return data;
}

From source file:org.prom5.analysis.clusteranalysis.ClusterDecisionAnalyzer.java

License:Open Source License

public Instances getDataInfo() {
    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection scope
    for (int i = 0; i < agProfiles.numberOfItems(); i++) {
        if (checks[i].isSelected()) {
            String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i));
            Attribute wekaAtt = new Attribute(name);
            attributeInfo.addElement(wekaAtt);
        }/*from   w w  w.ja  va2  s . co  m*/
    }
    // for target concept
    FastVector my_nominal_values = new FastVector(clusters.getClusters().size());
    Attribute targetConcept = null;
    for (Cluster aCluster : clusters.getClusters()) {
        my_nominal_values.addElement(aCluster.getName());
    }
    targetConcept = new Attribute("Cluster", my_nominal_values);
    attributeInfo.addElement(targetConcept);
    attributeInfo.trimToSize();

    // learning
    Instances data = new Instances("Clustering", attributeInfo, 0);
    data.setClassIndex(data.numAttributes() - 1);

    for (Cluster aCluster : clusters.getClusters()) {
        String clusterName = aCluster.getName();
        for (Integer i : aCluster.getTraceIndices()) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < agProfiles.numberOfItems(); j++) {
                if (checks[j].isSelected()) {
                    String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j));
                    Attribute wekaAtt = data.attribute(name);
                    if (wekaAtt != null) {
                        double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue();
                        instance0.setValue(wekaAtt, doubleAttValue);
                    } else {
                        System.out.println("fail to add");
                    }
                }
            }
            instance0.setDataset(data);
            instance0.setClassValue(clusterName);
            data.add(instance0);
        }
    }
    return data;
}

From source file:org.prom5.analysis.decisionmining.DecisionAnalyser.java

License:Open Source License

/**
 * Analyses the given list of decision points according to the context specified.
 * Furthermore, the context is provided with some visualization of the analysis result.
 * @param decisionPoints the list of decision points to be analysed
 * @param log the log to be analysed/*  w w w.ja  v  a  2s  .  c o  m*/
 * @param highLevelPN the simulation model to export discovered data dependencies
 */
public void analyse(List<DecisionPoint> decisionPoints, DecisionMiningLogReader log, HLPetriNet highLevelPN) {

    Iterator<DecisionPoint> allDecisionPoints = decisionPoints.iterator();
    while (allDecisionPoints.hasNext()) {
        DecisionPoint currentDP = allDecisionPoints.next();

        // initialize the classifying data structure
        initClassifier();

        // create attribute information
        FastVector attributeInfo = currentDP.getContext().getAttributeInfo();

        // create empty data set with attribute information
        Instances data = new Instances(currentDP.getName(), attributeInfo, 0);
        data.setClassIndex(data.numAttributes() - 1);

        // create learning instances
        List<DecisionCategory> allCategories = currentDP.getTargetConcept();
        Iterator<DecisionCategory> categoryIterator = allCategories.iterator();
        while (categoryIterator.hasNext()) {
            DecisionCategory branch = categoryIterator.next();
            // create all instances for one class at once
            ArrayList belongingTraces = log.getTracesInCategory(branch);
            Iterator traceIterator = belongingTraces.iterator();
            while (traceIterator.hasNext()) {
                DecisionMiningLogTrace trace = (DecisionMiningLogTrace) traceIterator.next();
                // one instance per trace
                // (future work: loops may result in multiple instances per trace!)
                Instance instance = trace.makeInstance(data, attributeInfo.size(), branch, log,
                        currentDP.getContext());
                // classify instance
                instance.setClassValue(branch.toString());
                data.add(instance);
            }
        }

        // in case no single learning instance can be provided (as decision point is never
        // reached, or decision classes cannot specified properly) --> do not call algorithm
        if (data.numInstances() == 0) {
            currentDP.getContext().setResultViewPanel(createMessagePanel("No learning instances available"));
        }
        // actually solve the classification problem
        else {
            try {
                myClassifier.buildClassifier(data);
                // build up result visualization
                currentDP.getContext().setResultViewPanel(createResultVisualization());
                // create evaluation statistics of classifier for the user
                currentDP.getContext().setEvaluationViewPanel(createEvaluationVisualization(data));
                // only derive discovered data dependencies for decision point if tree is not trivial
                if (((J48) myClassifier).measureNumRules() > 0) {
                    // TODO - derive the rules in a
                    // a) classifier-independent way
                    // b) cpn-independent way
                    currentDP.setDataDependencies(((J48) myClassifier).prefix(), highLevelPN);
                }
            } catch (Exception ex) {
                ex.printStackTrace();
                currentDP.getContext().setResultViewPanel(
                        createMessagePanel("Error while solving the classification problem"));
            }
        }
    }
}

From source file:org.prom5.analysis.traceclustering.profile.AggregateProfile.java

License:Open Source License

public Instances getWekaData() {
    Instances data = null;

    // create attribute information
    FastVector attributeInfo = new FastVector();
    // make attribute
    // clean the relevant attribute list and re-fill based on new selection scope
    for (int i = 0; i < numberOfItems(); i++) {
        String name = CpnUtils.replaceSpecialCharacters(getItemKey(i));
        Attribute wekaAtt = new Attribute(name);
        attributeInfo.addElement(wekaAtt);
    }/*from   w w w . j  a  v  a 2s . c  o  m*/
    attributeInfo.trimToSize();
    data = new Instances("Clustering", attributeInfo, 0);
    try {
        for (int i = 0; i < getLog().numberOfInstances(); i++) {
            Instance instance0 = new Instance(attributeInfo.size());
            for (int j = 0; j < numberOfItems(); j++) {
                String name = CpnUtils.replaceSpecialCharacters(getItemKey(j));
                Attribute wekaAtt = data.attribute(name);
                if (wekaAtt != null) {
                    double doubleAttValue = (new Double(getValue(i, j))).doubleValue();
                    instance0.setValue(wekaAtt, doubleAttValue);
                } else {
                    Message.add("Weka Error: fail to add", Message.ERROR);
                }
            }
            instance0.setDataset(data);
            data.add(instance0);
        }
    } catch (Exception c) {
        Message.add("Weka Error: " + c.toString(), Message.ERROR);
    }

    return data;
}

From source file:org.scify.NewSumServer.Server.MachineLearning.dataSets.java

License:Apache License

/**
 * Generate the train dataset/*from  www  .j a  v a2s  . co  m*/
 *
 * @param file the path for the InsectDB file
 * @return the train dataset as Instance
 */
public static Instances trainingSet(INSECTDB file) {
    ArrayList<Attribute> atts;
    ArrayList<String> attVals = new ArrayList<String>();
    ArrayList<String> vectors;
    Instances data;
    double[] vals;

    atts = new ArrayList<Attribute>(); // Set up attributes

    HashSet<String> hasGnames = new HashSet<String>(); //create a HashSet with all class graph names 
    hasGnames.addAll(Arrays.asList(file.getObjectList("cg")));
    for (String index : hasGnames) { // for each class graph name add a attribute
        atts.add(new Attribute(index));
        attVals.add(index);
    }

    atts.add(new Attribute("Class", attVals)); // fill the attribute class with with given  class graph name

    data = new Instances("train Set for Category Classification ", atts, 0);//create Instances object

    vectors = vector.trainingVector(file); // take all instance vectors 

    // fill with data

    for (String vi : vectors) { // for each instance

        String[] vectorTable = vi.trim().split(",");

        vals = new double[data.numAttributes()];

        for (int i = 0; i < vectorTable.length - 2; i++) {
            vals[i] = Double.parseDouble(vectorTable[i]);

        }

        vals[vectorTable.length - 1] = attVals.indexOf(vectorTable[vectorTable.length - 1]); //Class name

        data.add(new DenseInstance(1.0, vals)); // add data to Instance

    }

    return data;
}

From source file:org.scify.NewSumServer.Server.MachineLearning.dataSets.java

License:Apache License

/**
 * Generate the label dataset/*from   w w w  . j a  v a  2 s  .c  o  m*/
 *
 * @param file path for insectDB file
 * @param ClassGname The name for current class
 * @param Ivector the similarity vector between given mail and all class
 * graphs
 * @return the label dataset as instance
 */
public static Instances labelingSet(INSECTDB file, String Ivector) {

    ArrayList<Attribute> atts;
    ArrayList<String> attVals = new ArrayList<String>();
    Instances data;
    double[] vals;

    atts = new ArrayList<Attribute>(); // Set up attributes

    HashSet<String> hasGnames = new HashSet<String>(); // create a HashSet with all class graph names 
    hasGnames.addAll(Arrays.asList(file.getObjectList("cg")));
    for (String index : hasGnames) {
        atts.add(new Attribute(index));
        attVals.add(index);
    }

    atts.add(new Attribute("Class", attVals)); // fill the attribute with the given class graph name

    data = new Instances("label Set for Category Classification ", atts, 0);//create Instances object

    //fill with data

    String[] vectorTable = Ivector.trim().split(",");

    vals = new double[data.numAttributes()];
    int count = 0;
    for (String value : vectorTable) { //for each vector
        vals[count] = Double.parseDouble(value);
        count++;
    }

    vals[count] = Utils.missingValue(); //add missingValue in place for the class graph name

    data.add(new DenseInstance(1.0, vals)); // add data to Instance

    return data;

}