Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:mulan.classifier.neural.NormalizationFilter.java

License:Open Source License

private void Initialize(MultiLabelInstances mlData) {
    Instances dataSet = mlData.getDataSet();
    int[] featureIndices = mlData.getFeatureIndices();
    for (int attIndex : featureIndices) {
        Attribute feature = dataSet.attribute(attIndex);
        if (feature.isNumeric()) {
            Stats stats = dataSet.attributeStats(attIndex).numericStats;
            attStats.put(attIndex, new double[] { stats.min, stats.max });
        }//from  ww  w.j  av  a2 s  .  c  o m
    }
}

From source file:mulan.classifier.transformation.CalibratedLabelRanking.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
    // Virtual label models
    debug("Building calibration label models");
    System.out.println("Building calibration label models");
    virtualLabelModels = new BinaryRelevance(getBaseClassifier());
    virtualLabelModels.setDebug(getDebug());
    virtualLabelModels.build(trainingSet);

    // One-vs-one models
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];
    metaDataTest = new Instances[numModels];

    Instances trainingData = trainingSet.getDataSet();

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        // Attribute of label 1
        Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]);
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            System.out.println("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // Attribute of label 2
            Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]);

            // initialize training set
            Instances dataOneVsOne = new Instances(trainingData, 0);
            // filter out examples with no preference
            for (int i = 0; i < trainingData.numInstances(); i++) {
                Instance tempInstance;//from  ww w .ja v a  2 s . c o m
                if (trainingData.instance(i) instanceof SparseInstance) {
                    tempInstance = new SparseInstance(trainingData.instance(i));
                } else {
                    tempInstance = new DenseInstance(trainingData.instance(i));
                }

                int nominalValueIndex;
                nominalValueIndex = (int) tempInstance.value(labelIndices[label1]);
                String value1 = attrLabel1.value(nominalValueIndex);
                nominalValueIndex = (int) tempInstance.value(labelIndices[label2]);
                String value2 = attrLabel2.value(nominalValueIndex);

                if (!value1.equals(value2)) {
                    tempInstance.setValue(attrLabel1, value1);
                    dataOneVsOne.add(tempInstance);
                }
            }

            // remove all labels apart from label1 and place it at the end
            Reorder filter = new Reorder();
            int numPredictors = trainingData.numAttributes() - numLabels;
            int[] reorderedIndices = new int[numPredictors + 1];
            for (int i = 0; i < numPredictors; i++) {
                reorderedIndices[i] = featureIndices[i];
            }
            reorderedIndices[numPredictors] = labelIndices[label1];
            filter.setAttributeIndicesArray(reorderedIndices);
            filter.setInputFormat(dataOneVsOne);
            dataOneVsOne = Filter.useFilter(dataOneVsOne, filter);
            //System.out.println(dataOneVsOne.toString());
            dataOneVsOne.setClassIndex(numPredictors);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;
            }
            dataOneVsOne.delete();
            metaDataTest[counter] = dataOneVsOne;
            counter++;
        }
    }
}

From source file:mulan.classifier.transformation.LabelPowerset.java

License:Open Source License

protected void buildInternal(MultiLabelInstances mlData) throws Exception {
    Instances transformedData;
    transformation = new LabelPowersetTransformation();
    debug("Transforming the training set.");
    transformedData = transformation.transformInstances(mlData);
    //================================================================
    //    System.out.println("Transformed training set: \n" + transformedData.toString());

    // check for unary class
    //   System.out.println("Building single-label classifier.");
    //==============================================================
    if (transformedData.attribute(transformedData.numAttributes() - 1).numValues() > 1) {
        baseClassifier.buildClassifier(transformedData);
    }// w  ww . ja  va  2s. c o m
}

From source file:mulan.classifier.transformation.LabelsetPruning.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances mlDataSet) throws Exception {
    Instances data = mlDataSet.getDataSet();
    format = new Instances(data, 0);
    int numInstances = data.numInstances();

    ListInstancePerLabel = new HashMap<LabelSet, ArrayList<Instance>>();
    for (int i = 0; i < numInstances; i++) {
        double[] dblLabels = new double[numLabels];
        for (int j = 0; j < numLabels; j++) {
            int index = labelIndices[j];
            double value = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index)));
            dblLabels[j] = value;// w  w w.  j a v a2 s.c  o  m
        }
        LabelSet labelSet = new LabelSet(dblLabels);
        if (ListInstancePerLabel.containsKey(labelSet)) {
            ListInstancePerLabel.get(labelSet).add(data.instance(i));
        } else {
            ArrayList<Instance> li = new ArrayList<Instance>();
            li.add(data.instance(i));
            ListInstancePerLabel.put(labelSet, li);
        }
    }

    // Iterates the structure and a) if occurences of a labelset are higher
    // than p parameter then add them to the training set, b) if occurences
    // are less, then depending on the strategy discard/reintroduce them
    Instances newData = new Instances(data, 0);
    Iterator<LabelSet> it = ListInstancePerLabel.keySet().iterator();
    while (it.hasNext()) {
        LabelSet ls = it.next();
        ArrayList<Instance> instances = ListInstancePerLabel.get(ls);
        if (instances.size() > p) {
            for (int i = 0; i < instances.size(); i++) {
                newData.add(instances.get(i));
            }
        } else {
            ArrayList<Instance> processed = processRejected(ls);
            newData.addAll(processed);
        }
    }

    super.buildInternal(new MultiLabelInstances(newData, mlDataSet.getLabelsMetaData()));
}

From source file:mulan.classifier.transformation.MultiLabelStacking.java

License:Open Source License

/**
 * Attaches an index attribute at the beginning of each instance
 *
 * @param original//  w w  w .j  ava2  s  .  c o  m
 * @return
 */
protected Instances attachIndexes(Instances original) {

    ArrayList<Attribute> attributes = new ArrayList<Attribute>(original.numAttributes() + 1);

    for (int i = 0; i < original.numAttributes(); i++) {
        attributes.add(original.attribute(i));
    }
    // Add attribute for holding the index at the beginning.
    attributes.add(0, new Attribute("Index"));
    Instances transformed = new Instances("Meta format", attributes, 0);
    for (int i = 0; i < original.numInstances(); i++) {
        Instance newInstance;
        newInstance = (Instance) original.instance(i).copy();
        newInstance.setDataset(null);
        newInstance.insertAttributeAt(0);
        newInstance.setValue(0, i);

        transformed.add(newInstance);
    }

    transformed.setClassIndex(original.classIndex() + 1);
    return transformed;
}

From source file:mulan.classifier.transformation.TwoStageClassifierChainArchitecture.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
    // Virtual label models
    debug("Building calibration label models");
    virtualLabelModels = new BinaryRelevance(getBaseClassifier());
    virtualLabelModels.setDebug(getDebug());
    virtualLabelModels.build(trainingSet);

    //Generate the chain: Test the same dataset
    MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet);

    labelIndices = tempTrainingSet.getLabelIndices();
    featureIndices = tempTrainingSet.getFeatureIndices();

    // One-vs-one models
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];
    metaDataTest = new Instances[numModels];

    Instances trainingData = tempTrainingSet.getDataSet();

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        // Attribute of label 1
        Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]);
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // Attribute of label 2
            Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]);

            // initialize training set
            Instances dataOneVsOne = new Instances(trainingData, 0);
            // filter out examples with no preference
            for (int i = 0; i < trainingData.numInstances(); i++) {
                Instance tempInstance;//from  w ww.ja  v a2s .  co  m
                if (trainingData.instance(i) instanceof SparseInstance) {
                    tempInstance = new SparseInstance(trainingData.instance(i));
                } else {
                    tempInstance = new DenseInstance(trainingData.instance(i));
                }

                int nominalValueIndex;
                nominalValueIndex = (int) tempInstance.value(labelIndices[label1]);
                String value1 = attrLabel1.value(nominalValueIndex);
                nominalValueIndex = (int) tempInstance.value(labelIndices[label2]);
                String value2 = attrLabel2.value(nominalValueIndex);

                if (!value1.equals(value2)) {
                    tempInstance.setValue(attrLabel1, value1);
                    dataOneVsOne.add(tempInstance);
                }
            }

            // remove all labels apart from label1 and place it at the end
            Reorder filter = new Reorder();
            int numPredictors = trainingData.numAttributes() - numLabels;
            int[] reorderedIndices = new int[numPredictors + 1];

            System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors);
            reorderedIndices[numPredictors] = labelIndices[label1];
            filter.setAttributeIndicesArray(reorderedIndices);
            filter.setInputFormat(dataOneVsOne);
            dataOneVsOne = Filter.useFilter(dataOneVsOne, filter);
            //System.out.println(dataOneVsOne.toString());
            dataOneVsOne.setClassIndex(numPredictors);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;
            }
            dataOneVsOne.delete();
            metaDataTest[counter] = dataOneVsOne;
            counter++;
        }
    }
}

From source file:mulan.classifier.transformation.TwoStagePrunedClassifierChainArchitecture.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
    // Virtual label models
    debug("Building calibration label models");
    virtualLabelModels = new BinaryRelevance(getBaseClassifier());
    virtualLabelModels.setDebug(getDebug());
    virtualLabelModels.build(trainingSet);

    // One-vs-one models
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];
    metaDataTest = new Instances[numModels];

    ArrayList<MultiLabelOutput> predictions;
    predictions = predictLabels(trainingSet);

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            //Generate the chain: Test the same dataset
            MultiLabelInstances tempTrainingSet = GenerateChain(trainingSet, label1, label2, predictions);

            Instances trainingData = tempTrainingSet.getDataSet();

            labelIndices = tempTrainingSet.getLabelIndices();
            featureIndices = tempTrainingSet.getFeatureIndices();

            // Attribute of label 1
            Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]);

            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // Attribute of label 2
            Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]);

            // initialize training set
            Instances dataOneVsOne = new Instances(trainingData, 0);
            // filter out examples with no preference
            for (int i = 0; i < trainingData.numInstances(); i++) {
                Instance tempInstance;//from   w  ww.  j a  va2s  .c om
                if (trainingData.instance(i) instanceof SparseInstance) {
                    tempInstance = new SparseInstance(trainingData.instance(i));
                } else {
                    tempInstance = new DenseInstance(trainingData.instance(i));
                }

                int nominalValueIndex;
                nominalValueIndex = (int) tempInstance.value(labelIndices[label1]);
                String value1 = attrLabel1.value(nominalValueIndex);
                nominalValueIndex = (int) tempInstance.value(labelIndices[label2]);
                String value2 = attrLabel2.value(nominalValueIndex);

                if (!value1.equals(value2)) {
                    tempInstance.setValue(attrLabel1, value1);
                    dataOneVsOne.add(tempInstance);
                }
            }

            // remove all labels apart from label1 and place it at the end
            Reorder filter = new Reorder();
            int numPredictors = trainingData.numAttributes() - numLabels;
            int[] reorderedIndices = new int[numPredictors + 1];
            System.arraycopy(featureIndices, 0, reorderedIndices, 0, numPredictors);
            reorderedIndices[numPredictors] = labelIndices[label1];
            filter.setAttributeIndicesArray(reorderedIndices);
            filter.setInputFormat(dataOneVsOne);
            dataOneVsOne = Filter.useFilter(dataOneVsOne, filter);
            //System.out.println(dataOneVsOne.toString());
            dataOneVsOne.setClassIndex(numPredictors);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;
            }
            dataOneVsOne.delete();
            metaDataTest[counter] = dataOneVsOne;
            counter++;
        }
    }
}

From source file:mulan.data.ConverterCLUS.java

License:Open Source License

/**
 * Converts the original dataset to mulan compatible dataset.
 *
 * @param sourceFilename the source file name
 * @param arffFilename the converted arff name
 * @param xmlFilename the xml name/*from  www .j a  va 2  s  . c  o  m*/
 * @throws java.lang.Exception
 */
public static void convert(String sourceFilename, String arffFilename, String xmlFilename) throws Exception {
    String line;
    try {
        BufferedReader brInput = new BufferedReader(new FileReader(sourceFilename));

        String relationName = null;
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        Instances data = null;
        int numAttributes = 0;
        String[] labelNames = null;
        while ((line = brInput.readLine()) != null) {
            if (line.startsWith("@RELATION")) {
                relationName = line.replace("@RELATION ", "").replaceAll("'", "").trim();
                continue;
            }
            if (line.startsWith("@ATTRIBUTE ")) {
                String tokens[] = line.split("\\s+");
                Attribute att;
                if (line.startsWith("@ATTRIBUTE class")) {
                    labelNames = tokens[3].split(",");
                    for (int i = 0; i < labelNames.length; i++) {
                        ArrayList<String> labelValues = new ArrayList<String>();
                        labelValues.add("0");
                        labelValues.add("1");
                        att = new Attribute(labelNames[i], labelValues);
                        attInfo.add(att);
                    }
                } else {
                    numAttributes++;
                    if (tokens[2].equals("numeric")) {
                        att = new Attribute(tokens[1]);
                    } else {
                        ArrayList<String> nominalValues = new ArrayList<String>();
                        tokens[2].substring(1, tokens[2].length() - 1);
                        String[] nominalTokens = tokens[2].substring(1, tokens[2].length() - 1).split(",");
                        for (int i = 0; i < nominalTokens.length; i++) {
                            nominalValues.add(nominalTokens[i]);
                        }
                        att = new Attribute(tokens[1], nominalValues);
                    }
                    attInfo.add(att);
                }
                continue;
            }
            if (line.toLowerCase().startsWith("@data")) {
                data = new Instances(relationName, attInfo, 0);
                while ((line = brInput.readLine()) != null) {
                    // fill data
                    String[] tokens = line.split(",");
                    double[] values = new double[attInfo.size()];
                    for (int i = 0; i < numAttributes; i++) {
                        Attribute att = (Attribute) attInfo.get(i);
                        if (att.isNumeric()) {
                            values[i] = Double.parseDouble(tokens[i]);
                        } else {
                            values[i] = att.indexOfValue(tokens[i]);
                        }
                    }
                    String[] labels = tokens[numAttributes].split("@");
                    // fill class values
                    for (int j = 0; j < labels.length; j++) {
                        String[] splitedLabels = labels[j].split("/");
                        String attrName = splitedLabels[0];
                        Attribute att = data.attribute(attrName);
                        values[attInfo.indexOf(att)] = 1;
                        for (int k = 1; k < splitedLabels.length; k++) {
                            attrName = attrName + "/" + splitedLabels[k];
                            att = data.attribute(attrName);
                            values[attInfo.indexOf(att)] = 1;
                        }
                    }
                    Instance instance = new DenseInstance(1, values);
                    data.add(instance);
                }
            }
        }
        BufferedWriter writer;
        writer = new BufferedWriter(new FileWriter(arffFilename));
        writer.write(data.toString());
        writer.close();

        // write xml file
        writer = new BufferedWriter(new FileWriter(xmlFilename));
        writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n");
        writer.write("<labels xmlns=\"http://mulan.sourceforge.net/labels\">\n");
        writer.write("<label name=\"" + labelNames[0] + "\">");
        int depth = 0;
        for (int i = 1; i < labelNames.length; i++) {
            int difSlashes = countSlashes(labelNames[i]) - countSlashes(labelNames[i - 1]);
            // child
            if (difSlashes == 1) {
                depth++;
                writer.write("\n");
                for (int j = 0; j < depth; j++) {
                    writer.write("\t");
                }
                writer.write("<label name=\"" + labelNames[i] + "\">");
            }
            // sibling
            if (difSlashes == 0) {
                writer.write("</label>\n");
                for (int j = 0; j < depth; j++) {
                    writer.write("\t");
                }
                writer.write("<label name=\"" + labelNames[i] + "\">");
            }
            // ancestor
            if (difSlashes < 0) {
                writer.write("</label>\n");
                for (int j = 0; j < Math.abs(difSlashes); j++) {
                    depth--;
                    for (int k = 0; k < depth; k++) {
                        writer.write("\t");
                    }
                    writer.write("</label>\n");
                }
                for (int j = 0; j < depth; j++) {
                    writer.write("\t");
                }
                writer.write("<label name=\"" + labelNames[i] + "\">");
            }
        }
        writer.write("</label>\n");
        while (depth > 0) {
            for (int k = 0; k < depth; k++) {
                writer.write("\t");
            }
            writer.write("</label>\n");
            depth--;
        }
        writer.write("</labels>");
        writer.close();

    } catch (IOException ioEx) {
        ioEx.printStackTrace();
    }
}

From source file:mulan.data.MultiLabelInstances.java

License:Open Source License

/**
 * If {@link Instances} data set are retrieved from {@link MultiLabelInstances} and
 * post-processed, modified by custom code, it can be again reintegrated into
 * {@link MultiLabelInstances} if needed. The underlying {@link LabelsMetaData} are
 * modified to reflect changes in data set. The method creates new instance of
 * {@link MultiLabelInstances} with modified data set and new meta-data.
 * <br></br>/*  w  w  w. ja v  a 2 s  .com*/
 * The supported changes are:<br></br>
 * - remove of label {@link Attribute} to the existing {@link Instances}<br></br>
 * - add/remove of {@link Instance} from the existing {@link Instances}<br></br>
 * - add/remove of feature/predictor {@link Attribute} to the existing {@link Instances}<br></br>
 *
 * @param modifiedDataSet the modified data set
 * @return the modified data set
 * @throws IllegalArgumentException if specified modified data set is null
 * @throws InvalidDataFormatException if multi-label data format with specified modifications is not valid
 */
public MultiLabelInstances reintegrateModifiedDataSet(Instances modifiedDataSet)
        throws InvalidDataFormatException {
    if (modifiedDataSet == null) {
        throw new IllegalArgumentException("The modified data set is null.");
    }

    //TODO: add support for addition of label attributes to modified data set if necessary

    LabelsMetaDataImpl newMetaData = (LabelsMetaDataImpl) labelsMetaData.clone();
    Set<String> origLabelNames = labelsMetaData.getLabelNames();
    for (String labelName : origLabelNames) {
        if (modifiedDataSet.attribute(labelName) == null) {
            newMetaData.removeLabelNode(labelName);
        }
    }

    return new MultiLabelInstances(modifiedDataSet, newMetaData);
}

From source file:mulan.data.MultiLabelInstances.java

License:Open Source License

private LabelsMetaData loadLabesMeta(Instances data, int numLabels) throws InvalidDataFormatException {
    LabelsMetaDataImpl labelsData = new LabelsMetaDataImpl();
    int numAttributes = data.numAttributes();
    for (int index = numAttributes - numLabels; index < numAttributes; index++) {
        String attrName = data.attribute(index).name();
        labelsData.addRootNode(new LabelNodeImpl(attrName));
    }//from  w w w . ja v  a 2s.  c  om

    if (labelsData.getNumLabels() < numLabels) {
        throw new InvalidDataFormatException("The names of label attributes are not unique.");
    }

    return labelsData;
}