Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:cn.edu.xmu.dm.d3c.clustering.SimpleKMeans.java

License:Open Source License

/**
 * Move the centroid to it's new coordinates. Generate the centroid coordinates based 
 * on it's  members (objects assigned to the cluster of the centroid) and the distance 
 * function being used./*from w  w w . j a v  a  2 s .c o  m*/
 * @param centroidIndex index of the centroid which the coordinates will be computed
 * @param members the objects that are assigned to the cluster of this centroid
 * @param updateClusterInfo if the method is supposed to update the m_Cluster arrays
 * @return the centroid coordinates
 */
protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) {
    double[] vals = new double[members.numAttributes()];

    //used only for Manhattan Distance
    Instances sortedMembers = null;
    int middle = 0;
    boolean dataIsEven = false;

    if (m_DistanceFunction instanceof ManhattanDistance) {
        middle = (members.numInstances() - 1) / 2;
        dataIsEven = ((members.numInstances() % 2) == 0);
        if (m_PreserveOrder) {
            sortedMembers = members;
        } else {
            sortedMembers = new Instances(members);
        }
    }

    for (int j = 0; j < members.numAttributes(); j++) {

        //in case of Euclidian distance the centroid is the mean point
        //in case of Manhattan distance the centroid is the median point
        //in both cases, if the attribute is nominal, the centroid is the mode
        if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) {
            vals[j] = members.meanOrMode(j);
        } else if (m_DistanceFunction instanceof ManhattanDistance) {
            //singleton special case
            if (members.numInstances() == 1) {
                vals[j] = members.instance(0).value(j);
            } else {
                sortedMembers.kthSmallestValue(j, middle + 1);
                vals[j] = sortedMembers.instance(middle).value(j);
                if (dataIsEven) {
                    sortedMembers.kthSmallestValue(j, middle + 2);
                    vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2;
                }
            }
        }

        if (updateClusterInfo) {
            m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount;
            m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts;
            if (members.attribute(j).isNominal()) {
                if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils
                        .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) {
                    vals[j] = Utils.missingValue(); // mark mode as missing
                }
            } else {
                if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) {
                    vals[j] = Utils.missingValue(); // mark mean as missing
                }
            }
        }
    }
    if (updateClusterInfo)
        m_ClusterCentroids.add(new DenseInstance(1.0, vals));
    return vals;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

protected Instances runExp(Instances samplePoints, int round, String postfix, boolean resuming) {
    Instances retval = null;
    try {/*from   w  w w . j  av  a  2s.  c  o  m*/
        //DataIOFile.saveDataToArffFile("data/zyqTestRange.arff", samplePoints);

        if (resuming) {
            samplePoints = manager.collectPerfs(samplePoints, perfAttName);
        }

        retval = manager.runExp(samplePoints, perfAttName);
        //we output the result set for future debugging and testing purposes
        DataIOFile.saveDataToArffFile("data/trainingBestConf" + round + "_" + postfix + ".arff", samplePoints);

        //evict all bad configurations
        Attribute perfAtt = retval.attribute(perfAttName);
        Iterator<Instance> itr = retval.iterator();
        ArrayList<Integer> toRemove = new ArrayList<Integer>();
        Instance next;
        while (itr.hasNext()) {
            next = itr.next();
            if (next.value(perfAtt) == -1)
                toRemove.add(retval.indexOf(next));
        }
        while (!toRemove.isEmpty())
            retval.remove(toRemove.remove(0));
    } catch (IOException e) {
        e.printStackTrace();
    }

    if (allInstances == null) {
        allInstances = new Instances(retval);
    } else {
        allInstances.addAll(retval);
    }

    return retval;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 100)) / 100.0,
                        minDis);//www.j  av a2 s. c om
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static ArrayList<String> preprocessInstances(Instances retval) {
    double[][] cMatrix;
    ArrayList<String> result = new ArrayList<String>();
    ArrayList<String> deleteAttNames = new ArrayList<String>();
    PrincipalComponents pc = new PrincipalComponents();
    HashMap<Integer, ArrayList<Integer>> filter = new HashMap<Integer, ArrayList<Integer>>();
    try {//from  w  ww .j  av  a2s. c  o  m
        pc.buildEvaluator(retval);
        cMatrix = pc.getCorrelationMatrix();
        for (int i = 0; i < cMatrix.length; i++) {
            ArrayList<Integer> record = new ArrayList<Integer>();
            for (int j = i + 1; j < cMatrix.length; j++)
                if (cMatrix[i][j] >= correlationFactorThreshold
                        || cMatrix[i][j] <= -correlationFactorThreshold) {
                    record.add(j);
                }
            if (record.size() != 0) {
                filter.put(i, record);
            }
        }
        Iterator<Map.Entry<Integer, ArrayList<Integer>>> iter = filter.entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<Integer, ArrayList<Integer>> entry = iter.next();
            ArrayList<Integer> arr = entry.getValue();
            for (int i = 0; i < arr.size(); i++)
                if (arr.get(i) != cMatrix.length - 1
                        && !deleteAttNames.contains(retval.attribute(arr.get(i)).name())) {
                    deleteAttNames.add(retval.attribute(arr.get(i)).name());
                }
            if (arr.contains(cMatrix.length - 1)) {
                result.add(retval.attribute(Integer.parseInt(entry.getKey().toString())).name());
            }
        }
        for (int i = 0; i < deleteAttNames.size(); i++) {
            retval.deleteAttributeAt(retval.attribute(deleteAttNames.get(i)).index());
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownNeighbordists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = -1;
    if (previousSet.attribute(PerformanceAttName) != null)
        pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double[] minDists = new double[2];
    double val;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDists[0] = 1 - Double.MAX_VALUE;
        minDists[1] = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center)) {
                val = ins.value(i) - center.value(i);
                if (val < 0)
                    minDists[0] = Math.max((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[0]);
                else
                    minDists[1] = Math.min((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                            minDists[1]);
            }/*from   www  .j ava2s  .  c  o m*/
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDists[1], lower = center.value(i) + minDists[0];

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java

License:Open Source License

private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center) {
    ArrayList<Attribute> localAtts = new ArrayList<Attribute>();
    int attNum = center.numAttributes();

    int pos = previousSet.attribute(PerformanceAttName).index();

    //traverse each dimension
    Enumeration<Instance> enu;
    double minDis;
    for (int i = 0; i < attNum; i++) {
        if (i == pos)
            continue;

        enu = previousSet.enumerateInstances();
        minDis = Double.MAX_VALUE;

        while (enu.hasMoreElements()) {
            Instance ins = enu.nextElement();
            if (!ins.equals(center))
                minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 1000)) / 1000.0,
                        minDis);//from  w  w  w .j av  a 2s. c om
        }

        //now we set the range
        Properties p1 = new Properties();
        double upper = center.value(i) + minDis, lower = center.value(i) - minDis;

        TreeSet<Double> detourSet = new TreeSet<Double>();
        detourSet.add(upper);
        detourSet.add(lower);
        detourSet.add(previousSet.attribute(i).getUpperNumericBound());
        detourSet.add(previousSet.attribute(i).getLowerNumericBound());
        switch (detourSet.size()) {
        case 1:
            upper = lower = detourSet.first();
            break;
        case 2:
            upper = detourSet.last();
            lower = detourSet.first();
            break;
        case 3:
            upper = lower = detourSet.higher(detourSet.first());
            break;
        default://case 4:
            upper = detourSet.lower(detourSet.last());
            lower = detourSet.higher(detourSet.first());
            break;
        }

        p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
        ProtectedProperties prop1 = new ProtectedProperties(p1);

        localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1));
    }

    return localAtts;
}

From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java

License:Open Source License

public Instances runExp(Instances samplePoints, String perfAttName) {
    Instances retVal = null;/*from  w  w w .java2 s.co m*/
    if (samplePoints.attribute(perfAttName) == null) {
        Attribute performance = new Attribute(perfAttName);
        samplePoints.insertAttributeAt(performance, samplePoints.numAttributes());
    }
    int pos = samplePoints.numInstances();
    int count = 0;
    for (int i = 0; i < pos; i++) {
        Instance ins = samplePoints.get(i);
        HashMap hm = new HashMap();
        int tot = 0;
        for (int j = 0; j < ins.numAttributes(); j++) {
            hm.put(ins.attribute(j).name(), ins.value(ins.attribute(j)));
        }

        boolean testRet;
        if (Double.isNaN(ins.value(ins.attribute(ins.numAttributes() - 1)))) {
            testRet = this.startTest(hm, i, isInterrupt);
            double y = 0;
            if (!testRet) {// the setting does not work, we skip it
                y = -1;
                count++;
                if (count >= targetTestErrorNum) {
                    System.out.println(
                            "There must be somthing wrong with the system. Please check and restart.....");
                    System.exit(1);
                }
            } else {
                y = getPerformanceByType(performanceType);
                count = 0;
            }

            ins.setValue(samplePoints.numAttributes() - 1, y);
            writePerfstoFile(ins);
        } else {
            continue;
        }
    }
    retVal = samplePoints;
    retVal.setClassIndex(retVal.numAttributes() - 1);

    return retVal;
}

From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java

License:Open Source License

@Override
public Instances collectPerfs(Instances samplePoints, String perfAttName) {
    Instances retVal = null;/*from   www. j a  va  2  s. co  m*/

    if (samplePoints.attribute(perfAttName) == null) {
        Attribute performance = new Attribute(perfAttName);
        samplePoints.insertAttributeAt(performance, samplePoints.numAttributes());
    }

    File perfFolder = new File(perfsfilepath);
    int tot = 0;
    if (perfFolder.exists()) {
        //let's get all the name set for the sample points
        Iterator<Instance> itr = samplePoints.iterator();
        TreeSet<String> insNameSet = new TreeSet<String>();
        HashMap<String, Integer> mapping = new HashMap<String, Integer>();
        int pos = 0;
        while (itr.hasNext()) {
            String mdstr = getMD5(itr.next());
            insNameSet.add(mdstr);
            mapping.put(mdstr, new Integer(pos++));
        }

        //now we collect
        File[] perfFiles = perfFolder.listFiles(new PerfsFileFilter(insNameSet));
        tot = perfFiles.length;
        if (tot > 0)
            isInterrupt = true;
        for (int i = 0; i < tot; i++) {
            Instance ins = samplePoints.get(mapping.get(perfFiles[i].getName()));
            double[] results = getPerf(perfFiles[i].getAbsolutePath());
            if (results != null) {
                ins.setValue(samplePoints.numAttributes() - 1, results[0]);
            }
        }
    }
    retVal = samplePoints;
    retVal.setClassIndex(retVal.numAttributes() - 1);
    System.out.println("Total number of collected performances is : " + tot);
    return retVal;
}

From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java

License:Apache License

/**
 * Method that converts a text message into an instance.
 *
 * @param text the message content to convert
 * @param data the header information// ww w.  j a va  2s  .  c o  m
 * @return the generated Instance
 */
private Instance makeInstance(String text, Instances data) {
    Instance instance = new Instance(2);
    Attribute messageAtt = data.attribute("content");
    instance.setValue(messageAtt, messageAtt.addStringValue(text));
    instance.setDataset(data);
    return instance;
}

From source file:com.emar.recsys.user.model.WekaExperiment.java

License:Open Source License

/**
 * Expects the following parameters:/* w w  w .  j  av  a  2s . c o m*/
 * <ul>
 * <li>-classifier "classifier incl. parameters"</li>
 * <li>-exptype "classification|regression"</li>
 * <li>-splittype "crossvalidation|randomsplit"</li>
 * <li>-runs "# of runs"</li>
 * <li>-folds "# of cross-validation folds"</li>
 * <li>-percentage "percentage for randomsplit"</li>
 * <li>-result "arff file for storing the results"</li>
 * <li>-t "dataset" (can be supplied multiple times)</li>
 * </ul>
 * 
 * @param args
 *            the commandline arguments
 * @throws Exception
 *             if something goes wrong
 */
public static void main(String[] args) throws Exception {
    // parameters provided?
    if (args.length == 0) {
        System.out.println("\nUsage: ExperimentDemo\n" + "\t   -classifier <classifier incl. parameters>\n"
                + "\t   -exptype <classification|regression>\n"
                + "\t   -splittype <crossvalidation|randomsplit>\n" + "\t   -runs <# of runs>\n"
                + "\t   -folds <folds for CV>\n" + "\t   -percentage <percentage for randomsplit>\n"
                + "\t   -result <ARFF file for storing the results>\n"
                + "\t   -t dataset (can be supplied multiple times)\n");
        System.exit(1);
    }

    // 1. setup the experiment
    System.out.println("Setting up...");
    Experiment exp = new Experiment();
    exp.setPropertyArray(new Classifier[0]);
    exp.setUsePropertyIterator(true);

    String option;

    // classification or regression
    option = Utils.getOption("exptype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No experiment type provided!");

    SplitEvaluator se = null;
    Classifier sec = null;
    boolean classification = false;
    if (option.equals("classification")) {
        classification = true;
        se = new ClassifierSplitEvaluator();
        sec = ((ClassifierSplitEvaluator) se).getClassifier();
    } else if (option.equals("regression")) {
        se = new RegressionSplitEvaluator();
        sec = ((RegressionSplitEvaluator) se).getClassifier();
    } else {
        throw new IllegalArgumentException("Unknown experiment type '" + option + "'!");
    }

    // crossvalidation or randomsplit
    option = Utils.getOption("splittype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No split type provided!");

    if (option.equals("crossvalidation")) {
        CrossValidationResultProducer cvrp = new CrossValidationResultProducer();
        option = Utils.getOption("folds", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No folds provided!");
        cvrp.setNumFolds(Integer.parseInt(option));
        cvrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class),
                    CrossValidationResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(cvrp);
        exp.setPropertyPath(propertyPath);

    } else if (option.equals("randomsplit")) {
        RandomSplitResultProducer rsrp = new RandomSplitResultProducer();
        rsrp.setRandomizeData(true);
        option = Utils.getOption("percentage", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No percentage provided!");
        rsrp.setTrainPercent(Double.parseDouble(option));
        rsrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class),
                    RandomSplitResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(rsrp);
        exp.setPropertyPath(propertyPath);
    } else {
        throw new IllegalArgumentException("Unknown split type '" + option + "'!");
    }

    // runs
    option = Utils.getOption("runs", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No runs provided!");
    exp.setRunLower(1);
    exp.setRunUpper(Integer.parseInt(option));

    // classifier
    option = Utils.getOption("classifier", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No classifier provided!");
    String[] options = Utils.splitOptions(option);
    String classname = options[0];
    options[0] = "";
    Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options);
    exp.setPropertyArray(new Classifier[] { c });

    // datasets
    boolean data = false;
    DefaultListModel model = new DefaultListModel();
    do {
        option = Utils.getOption("t", args);
        if (option.length() > 0) {
            File file = new File(option);
            if (!file.exists())
                throw new IllegalArgumentException("File '" + option + "' does not exist!");
            data = true;
            model.addElement(file);
        }
    } while (option.length() > 0);
    if (!data)
        throw new IllegalArgumentException("No data files provided!");
    exp.setDatasets(model);

    // result
    option = Utils.getOption("result", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No result file provided!");
    InstancesResultListener irl = new InstancesResultListener();
    irl.setOutputFile(new File(option));
    exp.setResultListener(irl);

    // 2. run experiment
    System.out.println("Initializing...");
    exp.initialize();
    System.out.println("Running...");
    exp.runExperiment();
    System.out.println("Finishing...");
    exp.postProcess();

    // 3. calculate statistics and output them
    System.out.println("Evaluating...");
    PairedTTester tester = new PairedCorrectedTTester();
    Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile())));
    tester.setInstances(result);
    tester.setSortColumn(-1);
    tester.setRunColumn(result.attribute("Key_Run").index());
    if (classification)
        tester.setFoldColumn(result.attribute("Key_Fold").index());
    tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1)));
    tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + ","
            + (result.attribute("Key_Scheme_options").index() + 1) + ","
            + (result.attribute("Key_Scheme_version_ID").index() + 1)));
    tester.setResultMatrix(new ResultMatrixPlainText());
    tester.setDisplayedResultsets(null);
    tester.setSignificanceLevel(0.05);
    tester.setShowStdDevs(true);
    // fill result matrix (but discarding the output)
    if (classification)
        tester.multiResultsetFull(0, result.attribute("Percent_correct").index());
    else
        tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index());
    // output results for reach dataset
    System.out.println("\nResult:");
    ResultMatrix matrix = tester.getResultMatrix();
    for (int i = 0; i < matrix.getColCount(); i++) {
        System.out.println(matrix.getColName(i));
        System.out.println("    Perc. correct: " + matrix.getMean(i, 0));
        System.out.println("    StdDev: " + matrix.getStdDev(i, 0));
    }
}