List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:mulan.data.MultiLabelInstances.java
License:Open Source License
/** * Does validation and integrity checks between data set and meta-data. The appropriate exception is * thrown if any inconsistencies of validation rules breached. * The passed data set and meta-data are not modified in any way. */// www . ja va 2 s . c om private void validate(Instances dataSet, LabelsMetaData labelsMetaData) throws InvalidDataFormatException { Set<String> labelNames = labelsMetaData.getLabelNames(); if (labelNames.size() < 2) { throw new InvalidDataFormatException(String.format( "There must be at least 2 label attributes specified, but only '%s' are defined in metadata", labelNames.size())); } int numAttributes = dataSet.numAttributes(); int numMatches = 0; for (int index = 0; index < numAttributes; index++) { Attribute attribute = dataSet.attribute(index); if (labelNames.contains(attribute.name())) { numMatches++; if (!checkLabelAttributeFormat(attribute)) { throw new InvalidDataFormatException( String.format("The format of label attribute '%s' is not valid.", attribute.name())); } } } if (numMatches != labelNames.size()) { throw new InvalidDataFormatException( String.format("Not all labels defined in meta-data are present in ARFF data file.")); } if (labelsMetaData.isHierarchy()) { checkLabelsConsistency(dataSet, labelsMetaData.getRootLabels()); } }
From source file:mulan.data.MultiLabelInstances.java
License:Open Source License
private void checkLabelsConsistency(Instances dataSet, Set<LabelNode> rootLabelNodes) throws InvalidDataFormatException { // create an index for faster access to attribute based on name Map<String, Attribute> attributesIndex = new HashMap<String, Attribute>(); for (int index = 0; index < dataSet.numAttributes(); index++) { Attribute attribute = dataSet.attribute(index); attributesIndex.put(attribute.name(), attribute); }//from ww w . jav a 2 s . c o m int numInstances = dataSet.numInstances(); for (int index = 0; index < numInstances; index++) { Instance instance = dataSet.instance(index); for (LabelNode labelNode : rootLabelNodes) { checkSubtreeConsistency(labelNode, instance, true, attributesIndex); } } }
From source file:mulan.data.Statistics.java
License:Open Source License
/** * calculates various multilabel statistics, such as label cardinality, <br> * label density and the set of distinct labels along with their frequency * // w ww .ja v a 2 s . co m * @param mlData a multi-label dataset */ public void calculateStats(MultiLabelInstances mlData) { // initialize statistics Instances data = mlData.getDataSet(); numLabels = mlData.getNumLabels(); int[] labelIndices = mlData.getLabelIndices(); int[] featureIndices = mlData.getFeatureIndices(); numPredictors = featureIndices.length; labelCardinality = 0; numNominal = 0; numNumeric = 0; examplesPerLabel = new double[numLabels]; cardinalityDistribution = new double[numLabels + 1]; labelsets = new HashMap<LabelSet, Integer>(); // gather statistics for (int i = 0; i < featureIndices.length; i++) { if (data.attribute(featureIndices[i]).isNominal()) { numNominal++; } if (data.attribute(featureIndices[i]).isNumeric()) { numNumeric++; } } numInstances = data.numInstances(); for (int i = 0; i < numInstances; i++) { int exampleCardinality = 0; double[] dblLabels = new double[numLabels]; for (int j = 0; j < numLabels; j++) { if (data.instance(i).stringValue(labelIndices[j]).equals("1")) { dblLabels[j] = 1; exampleCardinality++; labelCardinality++; examplesPerLabel[j]++; } else { dblLabels[j] = 0; } } cardinalityDistribution[exampleCardinality]++; LabelSet labelSet = new LabelSet(dblLabels); if (labelsets.containsKey(labelSet)) { labelsets.put(labelSet, labelsets.get(labelSet) + 1); } else { labelsets.put(labelSet, 1); } } labelCardinality /= numInstances; labelDensity = labelCardinality / numLabels; for (int j = 0; j < numLabels; j++) { examplesPerLabel[j] /= numInstances; } }
From source file:mulan.transformations.IncludeLabelsTransformation.java
License:Open Source License
/** * * @param mlData multi-label data// w ww.j a v a 2s . c o m * @return transformed instances * @throws Exception Potential exception thrown. To be handled in an upper level. */ public Instances transformInstances(MultiLabelInstances mlData) throws Exception { int numLabels = mlData.getNumLabels(); labelIndices = mlData.getLabelIndices(); // remove all labels Instances transformed = RemoveAllLabels.transformInstances(mlData); // add at the end an attribute with values the label names ArrayList<String> labelNames = new ArrayList<String>(numLabels); for (int counter = 0; counter < numLabels; counter++) { labelNames.add(mlData.getDataSet().attribute(labelIndices[counter]).name()); } Attribute attrLabel = new Attribute("Label", labelNames); transformed.insertAttributeAt(attrLabel, transformed.numAttributes()); // and at the end a binary attribute ArrayList<String> binaryValues = new ArrayList<String>(2); binaryValues.add("0"); binaryValues.add("1"); Attribute classAttr = new Attribute("Class", binaryValues); transformed.insertAttributeAt(classAttr, transformed.numAttributes()); // add instances transformed = new Instances(transformed, 0); transformed.setClassIndex(transformed.numAttributes() - 1); Instances data = mlData.getDataSet(); for (int instanceIndex = 0; instanceIndex < data.numInstances(); instanceIndex++) { for (int labelCounter = 0; labelCounter < numLabels; labelCounter++) { Instance temp; temp = RemoveAllLabels.transformInstance(data.instance(instanceIndex), labelIndices); temp.setDataset(null); temp.insertAttributeAt(temp.numAttributes()); temp.insertAttributeAt(temp.numAttributes()); temp.setDataset(transformed); temp.setValue(temp.numAttributes() - 2, (String) labelNames.get(labelCounter)); if (data.attribute(labelIndices[labelCounter]) .value((int) data.instance(instanceIndex).value(labelIndices[labelCounter])).equals("1")) { temp.setValue(temp.numAttributes() - 1, "1"); } else { temp.setValue(temp.numAttributes() - 1, "0"); } transformed.add(temp); } } return transformed; }
From source file:mulan.transformations.LabelPowersetTransformation.java
License:Open Source License
public Instances transformInstances(MultiLabelInstances mlData) throws Exception { Instances data = mlData.getDataSet(); int numLabels = mlData.getNumLabels(); int[] labelIndices = mlData.getLabelIndices(); Instances newData = null;/*from ww w .j av a 2 s . c o m*/ // gather distinct label combinations HashSet<LabelSet> labelSets = new HashSet<LabelSet>(); int numInstances = data.numInstances(); for (int i = 0; i < numInstances; i++) { // construct labelset double[] dblLabels = new double[numLabels]; for (int j = 0; j < numLabels; j++) { int index = labelIndices[j]; dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index))); } LabelSet labelSet = new LabelSet(dblLabels); // add labelset if not already present labelSets.add(labelSet); } // create class attribute ArrayList<String> classValues = new ArrayList<String>(labelSets.size()); for (LabelSet subset : labelSets) { classValues.add(subset.toBitString()); } Attribute newClass = new Attribute("class", classValues); // remove all labels newData = RemoveAllLabels.transformInstances(data, labelIndices); // add new class attribute newData.insertAttributeAt(newClass, newData.numAttributes()); newData.setClassIndex(newData.numAttributes() - 1); // add class values for (int i = 0; i < newData.numInstances(); i++) { //System.out.println(newData.instance(i).toString()); String strClass = ""; for (int j = 0; j < numLabels; j++) { int index = labelIndices[j]; strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index)); } //System.out.println(strClass); newData.instance(i).setClassValue(strClass); } transformedFormat = new Instances(newData, 0); return newData; }
From source file:mulan.transformations.PT6Transformation.java
License:Open Source License
public Instances transformInstances(MultiLabelInstances mlData) throws Exception { int numLabels = mlData.getNumLabels(); labelIndices = mlData.getLabelIndices(); // remove all labels Instances transformed = RemoveAllLabels.transformInstances(mlData); // add at the end an attribute with values the label names ArrayList<String> labelNames = new ArrayList<String>(numLabels); for (int counter = 0; counter < numLabels; counter++) { labelNames.add(mlData.getDataSet().attribute(labelIndices[counter]).name()); }/*from w w w.j a v a 2s.co m*/ Attribute attrLabel = new Attribute("Label", labelNames); transformed.insertAttributeAt(attrLabel, transformed.numAttributes()); // and at the end a binary attribute ArrayList<String> binaryValues = new ArrayList<String>(2); binaryValues.add("0"); binaryValues.add("1"); Attribute classAttr = new Attribute("Class", binaryValues); transformed.insertAttributeAt(classAttr, transformed.numAttributes()); // add instances transformed = new Instances(transformed, 0); transformed.setClassIndex(transformed.numAttributes() - 1); Instances data = mlData.getDataSet(); for (int instanceIndex = 0; instanceIndex < data.numInstances(); instanceIndex++) { for (int labelCounter = 0; labelCounter < numLabels; labelCounter++) { Instance temp; temp = RemoveAllLabels.transformInstance(data.instance(instanceIndex), labelIndices); temp.setDataset(null); temp.insertAttributeAt(temp.numAttributes()); temp.insertAttributeAt(temp.numAttributes()); temp.setDataset(transformed); temp.setValue(temp.numAttributes() - 2, (String) labelNames.get(labelCounter)); if (data.attribute(labelIndices[labelCounter]) .value((int) data.instance(instanceIndex).value(labelIndices[labelCounter])).equals("1")) { temp.setValue(temp.numAttributes() - 1, "1"); } else { temp.setValue(temp.numAttributes() - 1, "0"); } transformed.add(temp); } } return transformed; }
From source file:mulan.transformations.regression.ChainTransformation.java
License:Open Source License
/** * Deletes all target attributes that appear after the first targetsToKeep in the chain. The * target attribute at position targetsToKeep in the chain is set as the class attribute. * /*from w ww . j ava 2 s .co m*/ * @param data the input data set * @param chain a chain (permutation) of the indices of the target attributes * @param numTargetsToKeep the number of target attributes from the beginning of the chain that * should be kept, 1<=numTargetsToKeep<=numOfTargets * @return the transformed Instances object. The input object is not modified. * @throws Exception Potential exception thrown. To be handled in an upper level. */ public static Instances transformInstances(Instances data, int[] chain, int numTargetsToKeep) throws Exception { int numOfTargets = chain.length; if (numTargetsToKeep < 1 || numTargetsToKeep > numOfTargets) { throw new Exception("keepFirstKTargets should be between 1 and numOfTargets"); } // Indices of attributes to remove int[] indicesToRemove = new int[numOfTargets - numTargetsToKeep]; // the indices of the target attributes whose position in the chain is // after the first keepFirstKTargets attributes are marked for removal for (int i = 0; i < numOfTargets - numTargetsToKeep; i++) { indicesToRemove[i] = chain[numTargetsToKeep + i]; } Remove remove = new Remove(); remove.setAttributeIndicesArray(indicesToRemove); remove.setInputFormat(data); // get the class attribute name, the name of the target attribute which is placed in the // targetsToKeep position of the chain String classAttributeName = data.attribute(chain[numTargetsToKeep - 1]).name(); Instances transformed = Filter.useFilter(data, remove); transformed.setClass(transformed.attribute(classAttributeName)); return transformed; }
From source file:myclassifier.MyC45.java
/** * Method building ID3 tree.//from www .ja va 2s . com * * @param data the training data * @exception Exception if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = -1; //Instance.missingValue(); m_Distribution = new double[data.numClasses()]; return; } // Compute attribute with maximum information gain. double[] gainRatios = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); gainRatios[att.index()] = computeGainRatio(data, att); } m_Attribute = data.attribute(Utils.maxIndex(gainRatios)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(gainRatios[m_Attribute.index()], 0)) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; } Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { Instances[] splitData = splitData(data, m_Attribute); m_Successors = new MyC45[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyC45(); m_Successors[j].makeTree(splitData[j]); } } }
From source file:myclassifier.myC45Pack.SplitModel.java
public void buildClassifier(Instances dataSet) throws Exception { // Initialize the remaining instance variables. numSubsets = 0;/*from www . ja v a 2 s . co m*/ splitPointValue = Double.MAX_VALUE; infoGain = 0; gainRatio = 0; // Different treatment for enumerated and numeric attributes. if (dataSet.attribute(attribIndex).isNominal()) { numOfBranches = dataSet.attribute(attribIndex).numValues(); numOfSplitPoints = dataSet.attribute(attribIndex).numValues(); handleNominalAttribute(dataSet); } else { //attribute numeric numOfBranches = 2; numOfSplitPoints = 0; dataSet.sort(dataSet.attribute(attribIndex)); handleNumericAttribute(dataSet); } }
From source file:myclassifier.myC45Pack.SplitModel.java
public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; double temp;//from w ww.java 2 s . c o m Instance instance; if ((allInstances.attribute(attribIndex).isNumeric()) && (numSubsets > 1)) { Enumeration instancesEnum = allInstances.enumerateInstances(); while (instancesEnum.hasMoreElements()) { instance = (Instance) instancesEnum.nextElement(); if (!instance.isMissing(attribIndex)) { temp = instance.value(attribIndex); if ((temp > newSplitPoint) && (temp <= splitPointValue)) { newSplitPoint = temp; } } } splitPointValue = newSplitPoint; } }