List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:org.pentaho.di.scoring.WekaScoringData.java
License:Open Source License
/** * Generates a batch of predictions (more specifically, an array of output * rows containing all input Kettle fields plus new fields that hold the * prediction(s)) for each incoming Kettle row given a Weka model. * * @param inputMeta the meta data for the incoming rows * @param outputMeta the meta data for the output rows * @param inputRow the values of the incoming row * @param meta meta data for this step * @return a Kettle row containing all incoming fields along with new ones * that hold the prediction(s)//from w ww .jav a 2 s .com * @throws Exception if an error occurs */ public Object[][] generatePredictions(RowMetaInterface inputMeta, RowMetaInterface outputMeta, List<Object[]> inputRows, WekaScoringMeta meta) throws Exception { int[] mappingIndexes = m_mappingIndexes; WekaScoringModel model = getModel(); // copy of the model for this copy of // the step boolean outputProbs = meta.getOutputProbabilities(); boolean supervised = model.isSupervisedLearningModel(); Attribute classAtt = null; if (supervised) { classAtt = model.getHeader().classAttribute(); } Instances batch = new Instances(model.getHeader(), inputRows.size()); for (Object[] r : inputRows) { Instance inst = constructInstance(inputMeta, r, mappingIndexes, model, true); batch.add(inst); } double[][] preds = model.distributionsForInstances(batch); Object[][] result = new Object[preds.length][]; for (int i = 0; i < preds.length; i++) { // First copy the input data to the new result... Object[] resultRow = RowDataUtil.resizeArray(inputRows.get(i), outputMeta.size()); int index = inputMeta.size(); double[] prediction = preds[i]; if (prediction.length == 1 || !outputProbs) { if (supervised) { if (classAtt.isNumeric()) { Double newVal = new Double(prediction[0]); resultRow[index++] = newVal; } else { int maxProb = Utils.maxIndex(prediction); if (prediction[maxProb] > 0) { String newVal = classAtt.value(maxProb); resultRow[index++] = newVal; } else { String newVal = BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringData.Message.UnableToPredict"); //$NON-NLS-1$ resultRow[index++] = newVal; } } } else { int maxProb = Utils.maxIndex(prediction); if (prediction[maxProb] > 0) { Double newVal = new Double(maxProb); resultRow[index++] = newVal; } else { String newVal = BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringData.Message.UnableToPredictCluster"); //$NON-NLS-1$ resultRow[index++] = newVal; } } } else { // output probability distribution for (int j = 0; j < prediction.length; j++) { Double newVal = new Double(prediction[j]); resultRow[index++] = newVal; } } result[i] = resultRow; } return result; }
From source file:org.processmining.analysis.clusteranalysis.ClusterDecisionAnalyzer.java
License:Open Source License
public Instances getDataInfo() { // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection // scope/*from www . ja va2 s . co m*/ for (int i = 0; i < agProfiles.numberOfItems(); i++) { if (checks[i].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); } } // for target concept FastVector my_nominal_values = new FastVector(clusters.getClusters().size()); Attribute targetConcept = null; for (Cluster aCluster : clusters.getClusters()) { my_nominal_values.addElement(aCluster.getName()); } targetConcept = new Attribute("Cluster", my_nominal_values); attributeInfo.addElement(targetConcept); attributeInfo.trimToSize(); // learning Instances data = new Instances("Clustering", attributeInfo, 0); data.setClassIndex(data.numAttributes() - 1); for (Cluster aCluster : clusters.getClusters()) { String clusterName = aCluster.getName(); for (Integer i : aCluster.getTraceIndices()) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < agProfiles.numberOfItems(); j++) { if (checks[j].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { System.out.println("fail to add"); } } } instance0.setDataset(data); instance0.setClassValue(clusterName); data.add(instance0); } } return data; }
From source file:org.processmining.analysis.decisionmining.DecisionAnalyser.java
License:Open Source License
/** * Analyses the given list of decision points according to the context * specified. Furthermore, the context is provided with some visualization * of the analysis result./*from w w w.ja va 2 s. c o m*/ * * @param decisionPoints * the list of decision points to be analysed * @param log * the log to be analysed * @param highLevelPN * the simulation model to export discovered data dependencies */ public void analyse(List<DecisionPoint> decisionPoints, DecisionMiningLogReader log, HLPetriNet highLevelPN) { Iterator<DecisionPoint> allDecisionPoints = decisionPoints.iterator(); while (allDecisionPoints.hasNext()) { DecisionPoint currentDP = allDecisionPoints.next(); // initialize the classifying data structure initClassifier(); // create attribute information FastVector attributeInfo = currentDP.getContext().getAttributeInfo(); // create empty data set with attribute information Instances data = new Instances(currentDP.getName(), attributeInfo, 0); data.setClassIndex(data.numAttributes() - 1); // create learning instances List<DecisionCategory> allCategories = currentDP.getTargetConcept(); Iterator<DecisionCategory> categoryIterator = allCategories.iterator(); while (categoryIterator.hasNext()) { DecisionCategory branch = categoryIterator.next(); // create all instances for one class at once ArrayList belongingTraces = log.getTracesInCategory(branch); Iterator traceIterator = belongingTraces.iterator(); while (traceIterator.hasNext()) { DecisionMiningLogTrace trace = (DecisionMiningLogTrace) traceIterator.next(); // one instance per trace // (future work: loops may result in multiple instances per // trace!) Instance instance = trace.makeInstance(data, attributeInfo.size(), branch, log, currentDP.getContext()); // classify instance instance.setClassValue(branch.toString()); data.add(instance); } } // in case no single learning instance can be provided (as decision // point is never // reached, or decision classes cannot specified properly) --> do // not call algorithm if (data.numInstances() == 0) { currentDP.getContext().setResultViewPanel(createMessagePanel("No learning instances available")); } // actually solve the classification problem else { try { myClassifier.buildClassifier(data); // build up result visualization currentDP.getContext().setResultViewPanel(createResultVisualization()); // create evaluation statistics of classifier for the user currentDP.getContext().setEvaluationViewPanel(createEvaluationVisualization(data)); // only derive discovered data dependencies for decision // point if tree is not trivial if (((J48) myClassifier).measureNumRules() > 0) { // TODO - derive the rules in a // a) classifier-independent way // b) cpn-independent way currentDP.setDataDependencies(((J48) myClassifier).prefix(), highLevelPN); } } catch (Exception ex) { ex.printStackTrace(); currentDP.getContext().setResultViewPanel( createMessagePanel("Error while solving the classification problem")); } } } }
From source file:org.processmining.analysis.decisionmining.DecisionAnalyserForAuLdg.java
License:Open Source License
/** * Analyses the given list of decision points according to the context * specified. Furthermore, the context is provided with some visualization of * the analysis result.//from w ww . j a v a 2s . c o m * * @param decisionPoints * the list of decision points to be analysed * @param log * the log to be analysed * @param highLevelPN * the simulation model to export discovered data dependencies */ public void analyse(List<DecisionPointForAuLdg> decisionPoints, DecisionMiningLogReaderForAuLdg log, HLPetriNet highLevelPN) { Iterator<DecisionPointForAuLdg> allDecisionPoints = decisionPoints.iterator(); while (allDecisionPoints.hasNext()) { DecisionPointForAuLdg currentDP = allDecisionPoints.next(); // initialize the classifying data structure initClassifier(); // create attribute information FastVector attributeInfo = currentDP.getContext().getAttributeInfo(); // create empty data set with attribute information Instances data = new Instances(currentDP.getName(), attributeInfo, 0); data.setClassIndex(data.numAttributes() - 1); // create learning instances List<DecisionCategoryForAuLdg> allCategories = currentDP.getTargetConcept(); Iterator<DecisionCategoryForAuLdg> categoryIterator = allCategories.iterator(); while (categoryIterator.hasNext()) { DecisionCategoryForAuLdg branch = categoryIterator.next(); // create all instances for one class at once ArrayList belongingTraces = log.getTracesInCategory(branch); Iterator traceIterator = belongingTraces.iterator(); while (traceIterator.hasNext()) { DecisionMiningLogTraceForAuLdg trace = (DecisionMiningLogTraceForAuLdg) traceIterator.next(); // one instance per trace // (future work: loops may result in multiple instances per trace!) Instance instance = trace.makeInstance(data, attributeInfo.size(), branch, log, currentDP.getContext()); // classify instance instance.setClassValue(branch.toString()); data.add(instance); } } // in case no single learning instance can be provided (as decision point // is never // reached, or decision classes cannot specified properly) --> do not call // algorithm if (data.numInstances() == 0) { currentDP.getContext().setResultViewPanel(createMessagePanel("No learning instances available")); } // actually solve the classification problem else { try { myClassifier.buildClassifier(data); // build up result visualization currentDP.getContext().setResultViewPanel(createResultVisualization()); // create evaluation statistics of classifier for the user currentDP.getContext().setEvaluationViewPanel(createEvaluationVisualization(data)); // only derive discovered data dependencies for decision point if tree // is not trivial if (((J48) myClassifier).measureNumRules() > 0) { // TODO - derive the rules in a // a) classifier-independent way // b) cpn-independent way currentDP.setDataDependencies(((J48) myClassifier).prefix(), highLevelPN); } } catch (Exception ex) { ex.printStackTrace(); currentDP.getContext().setResultViewPanel( createMessagePanel("Error while solving the classification problem")); } } } }
From source file:org.processmining.analysis.traceclustering.profile.AggregateProfile.java
License:Open Source License
public Instances getWekaData() { Instances data = null; // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection // scope//from ww w . j a v a 2s . c o m for (int i = 0; i < numberOfItems(); i++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); } attributeInfo.trimToSize(); data = new Instances("Clustering", attributeInfo, 0); try { for (int i = 0; i < getLog().numberOfInstances(); i++) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < numberOfItems(); j++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { Message.add("Weka Error: fail to add", Message.ERROR); } } instance0.setDataset(data); data.add(instance0); } } catch (Exception c) { Message.add("Weka Error: " + c.toString(), Message.ERROR); } return data; }
From source file:org.prom5.analysis.clusteranalysis.ClusterDecisionAnalyzer.java
License:Open Source License
public Instances getDataInfo() { // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection scope for (int i = 0; i < agProfiles.numberOfItems(); i++) { if (checks[i].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); }/*from w w w.ja va2 s . co m*/ } // for target concept FastVector my_nominal_values = new FastVector(clusters.getClusters().size()); Attribute targetConcept = null; for (Cluster aCluster : clusters.getClusters()) { my_nominal_values.addElement(aCluster.getName()); } targetConcept = new Attribute("Cluster", my_nominal_values); attributeInfo.addElement(targetConcept); attributeInfo.trimToSize(); // learning Instances data = new Instances("Clustering", attributeInfo, 0); data.setClassIndex(data.numAttributes() - 1); for (Cluster aCluster : clusters.getClusters()) { String clusterName = aCluster.getName(); for (Integer i : aCluster.getTraceIndices()) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < agProfiles.numberOfItems(); j++) { if (checks[j].isSelected()) { String name = CpnUtils.replaceSpecialCharacters(agProfiles.getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(agProfiles.getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { System.out.println("fail to add"); } } } instance0.setDataset(data); instance0.setClassValue(clusterName); data.add(instance0); } } return data; }
From source file:org.prom5.analysis.decisionmining.DecisionAnalyser.java
License:Open Source License
/** * Analyses the given list of decision points according to the context specified. * Furthermore, the context is provided with some visualization of the analysis result. * @param decisionPoints the list of decision points to be analysed * @param log the log to be analysed/* w w w.ja v a 2s . c o m*/ * @param highLevelPN the simulation model to export discovered data dependencies */ public void analyse(List<DecisionPoint> decisionPoints, DecisionMiningLogReader log, HLPetriNet highLevelPN) { Iterator<DecisionPoint> allDecisionPoints = decisionPoints.iterator(); while (allDecisionPoints.hasNext()) { DecisionPoint currentDP = allDecisionPoints.next(); // initialize the classifying data structure initClassifier(); // create attribute information FastVector attributeInfo = currentDP.getContext().getAttributeInfo(); // create empty data set with attribute information Instances data = new Instances(currentDP.getName(), attributeInfo, 0); data.setClassIndex(data.numAttributes() - 1); // create learning instances List<DecisionCategory> allCategories = currentDP.getTargetConcept(); Iterator<DecisionCategory> categoryIterator = allCategories.iterator(); while (categoryIterator.hasNext()) { DecisionCategory branch = categoryIterator.next(); // create all instances for one class at once ArrayList belongingTraces = log.getTracesInCategory(branch); Iterator traceIterator = belongingTraces.iterator(); while (traceIterator.hasNext()) { DecisionMiningLogTrace trace = (DecisionMiningLogTrace) traceIterator.next(); // one instance per trace // (future work: loops may result in multiple instances per trace!) Instance instance = trace.makeInstance(data, attributeInfo.size(), branch, log, currentDP.getContext()); // classify instance instance.setClassValue(branch.toString()); data.add(instance); } } // in case no single learning instance can be provided (as decision point is never // reached, or decision classes cannot specified properly) --> do not call algorithm if (data.numInstances() == 0) { currentDP.getContext().setResultViewPanel(createMessagePanel("No learning instances available")); } // actually solve the classification problem else { try { myClassifier.buildClassifier(data); // build up result visualization currentDP.getContext().setResultViewPanel(createResultVisualization()); // create evaluation statistics of classifier for the user currentDP.getContext().setEvaluationViewPanel(createEvaluationVisualization(data)); // only derive discovered data dependencies for decision point if tree is not trivial if (((J48) myClassifier).measureNumRules() > 0) { // TODO - derive the rules in a // a) classifier-independent way // b) cpn-independent way currentDP.setDataDependencies(((J48) myClassifier).prefix(), highLevelPN); } } catch (Exception ex) { ex.printStackTrace(); currentDP.getContext().setResultViewPanel( createMessagePanel("Error while solving the classification problem")); } } } }
From source file:org.prom5.analysis.traceclustering.profile.AggregateProfile.java
License:Open Source License
public Instances getWekaData() { Instances data = null; // create attribute information FastVector attributeInfo = new FastVector(); // make attribute // clean the relevant attribute list and re-fill based on new selection scope for (int i = 0; i < numberOfItems(); i++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(i)); Attribute wekaAtt = new Attribute(name); attributeInfo.addElement(wekaAtt); }/*from w w w . j a v a 2s . c o m*/ attributeInfo.trimToSize(); data = new Instances("Clustering", attributeInfo, 0); try { for (int i = 0; i < getLog().numberOfInstances(); i++) { Instance instance0 = new Instance(attributeInfo.size()); for (int j = 0; j < numberOfItems(); j++) { String name = CpnUtils.replaceSpecialCharacters(getItemKey(j)); Attribute wekaAtt = data.attribute(name); if (wekaAtt != null) { double doubleAttValue = (new Double(getValue(i, j))).doubleValue(); instance0.setValue(wekaAtt, doubleAttValue); } else { Message.add("Weka Error: fail to add", Message.ERROR); } } instance0.setDataset(data); data.add(instance0); } } catch (Exception c) { Message.add("Weka Error: " + c.toString(), Message.ERROR); } return data; }
From source file:org.scify.NewSumServer.Server.MachineLearning.dataSets.java
License:Apache License
/** * Generate the train dataset/*from www .j a v a2s . co m*/ * * @param file the path for the InsectDB file * @return the train dataset as Instance */ public static Instances trainingSet(INSECTDB file) { ArrayList<Attribute> atts; ArrayList<String> attVals = new ArrayList<String>(); ArrayList<String> vectors; Instances data; double[] vals; atts = new ArrayList<Attribute>(); // Set up attributes HashSet<String> hasGnames = new HashSet<String>(); //create a HashSet with all class graph names hasGnames.addAll(Arrays.asList(file.getObjectList("cg"))); for (String index : hasGnames) { // for each class graph name add a attribute atts.add(new Attribute(index)); attVals.add(index); } atts.add(new Attribute("Class", attVals)); // fill the attribute class with with given class graph name data = new Instances("train Set for Category Classification ", atts, 0);//create Instances object vectors = vector.trainingVector(file); // take all instance vectors // fill with data for (String vi : vectors) { // for each instance String[] vectorTable = vi.trim().split(","); vals = new double[data.numAttributes()]; for (int i = 0; i < vectorTable.length - 2; i++) { vals[i] = Double.parseDouble(vectorTable[i]); } vals[vectorTable.length - 1] = attVals.indexOf(vectorTable[vectorTable.length - 1]); //Class name data.add(new DenseInstance(1.0, vals)); // add data to Instance } return data; }
From source file:org.scify.NewSumServer.Server.MachineLearning.dataSets.java
License:Apache License
/** * Generate the label dataset/*from w w w . j a v a 2 s .c o m*/ * * @param file path for insectDB file * @param ClassGname The name for current class * @param Ivector the similarity vector between given mail and all class * graphs * @return the label dataset as instance */ public static Instances labelingSet(INSECTDB file, String Ivector) { ArrayList<Attribute> atts; ArrayList<String> attVals = new ArrayList<String>(); Instances data; double[] vals; atts = new ArrayList<Attribute>(); // Set up attributes HashSet<String> hasGnames = new HashSet<String>(); // create a HashSet with all class graph names hasGnames.addAll(Arrays.asList(file.getObjectList("cg"))); for (String index : hasGnames) { atts.add(new Attribute(index)); attVals.add(index); } atts.add(new Attribute("Class", attVals)); // fill the attribute with the given class graph name data = new Instances("label Set for Category Classification ", atts, 0);//create Instances object //fill with data String[] vectorTable = Ivector.trim().split(","); vals = new double[data.numAttributes()]; int count = 0; for (String value : vectorTable) { //for each vector vals[count] = Double.parseDouble(value); count++; } vals[count] = Utils.missingValue(); //add missingValue in place for the class graph name data.add(new DenseInstance(1.0, vals)); // add data to Instance return data; }