Example usage for weka.core Instances setClass

List of usage examples for weka.core Instances setClass

Introduction

In this page you can find the example usage for weka.core Instances setClass.

Prototype

public void setClass(Attribute att) 

Source Link

Document

Sets the class attribute.

Usage

From source file:DocClassifier.java

public Instances createInstances(File[] files) {
    Instances instances = new Instances("Inst" + files.hashCode(), attrList, files.length);
    for (File file : files) {
        Instance inst = createInstance(file);
        inst.setDataset(instances);//from  ww w .j ava2  s. c o  m
        instances.add(inst);
        instances.setClass((Attribute) attrList.lastElement());
    }
    return instances;
}

From source file:asap.PostProcess.java

public void loadTrainingDataStream(PreProcessOutputStream pposTrainingData) {
    Instances instancesTrainingSet;

    DataSource source = new DataSource(pposTrainingData);
    try {//from   ww  w .  j  a v  a2s  .  co m
        instancesTrainingSet = source.getDataSet();

    } catch (Exception ex) {
        Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex);
        return;
    }
    // setting class attribute if the data format does not provide this information
    if (instancesTrainingSet.classIndex() == -1) {
        instancesTrainingSet.setClass(instancesTrainingSet.attribute("gold_standard"));
    }

    for (String wekaModelsCmd : Config.getWekaModelsCmd()) {
        String[] classifierCmd;
        try {
            classifierCmd = Utils.splitOptions(wekaModelsCmd);
        } catch (Exception ex) {
            Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex);
            continue;
        }
        String classname = classifierCmd[0];
        classifierCmd[0] = "";
        try {
            AbstractClassifier cl = (AbstractClassifier) Utils.forName(Classifier.class, classname,
                    classifierCmd);
            //                String modelName = String.format("%s%s%s%s.model", modelDirectory, File.separatorChar, i, classname);
            //                System.out.println(String.format("\tBuilding model %s (%s) and doing cross-validation...", i++, modelName));
            //                System.out.println(CrossValidation.performCrossValidationMT(trainSet, cl, Config.getCrossValidationSeed(), Config.getCrossValidationFolds(), modelName));
            systems.add(new NLPSystem(cl, instancesTrainingSet, null));
            System.out.println("\tAdded system " + systems.get(systems.size() - 1).shortName());
        } catch (Exception ex) {
            Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

}

From source file:asap.PostProcess.java

public void loadEvaluationDataStream(PreProcessOutputStream pposEvaluationData) {

    Instances instancesEvaluationSet;

    DataSource source = new DataSource(pposEvaluationData);

    try {/*from ww w. j av a2s . c om*/
        instancesEvaluationSet = source.getDataSet();
    } catch (Exception ex) {
        Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex);
        return;
    }
    // setting class attribute if the data format does not provide this information
    if (instancesEvaluationSet.classIndex() == -1) {
        instancesEvaluationSet.setClass(instancesEvaluationSet.attribute("gold_standard"));
    }

    for (NLPSystem system : systems) {
        system.setEvaluationSet(instancesEvaluationSet);
    }
}

From source file:classifiers.ComplexClassifier.java

@Override
public double[][] test(Instances testinst) {
    double count = 0;
    long anfangszeit = System.currentTimeMillis();
    ;/*from  w w  w . ja  v  a 2s .  co  m*/
    long endzeit;
    double[][] ausgabe = new double[1][2];
    if (testinst.numAttributes() != 0) {

        testinst.setClass(testinst.attribute(testinst.numAttributes() - 1));

        for (int i = 0; i < testinst.numInstances(); i++) {

            if (!Classify(testinst.instance(i))) {
                count++;
            } else {
            }

        }

        endzeit = System.currentTimeMillis();
        ausgabe[0][0] = (count / testinst.numInstances()) * 100;

        ausgabe[0][1] = ((endzeit - anfangszeit));
        // System.out.println(testinst);
        return ausgabe;
    } else {
        // System.out.println(testinst);
        return ausgabe;
    }

}

From source file:classifiers.ComplexClassifierZufall.java

@Override
@SuppressWarnings("empty-statement")
public double[][] test(Instances testinst) {
    double count = 0;
    long anfangszeit = System.currentTimeMillis();
    ;// ww w.  j  a  v a  2  s. c o  m
    long endzeit;
    double[][] ausgabe = new double[1][2];
    if (testinst.numAttributes() != 0) {

        testinst.setClass(testinst.attribute(testinst.numAttributes() - 1));

        for (int i = 0; i < testinst.numInstances(); i++) {

            if (!Classify(testinst.instance(i))) {
                count++;
            } else {
            }

        }

        endzeit = System.currentTimeMillis();
        ausgabe[0][0] = (count / testinst.numInstances()) * 100;

        ausgabe[0][1] = ((endzeit - anfangszeit));
        // System.out.println(testinst);
        return ausgabe;
    } else {
        // System.out.println(testinst);
        return ausgabe;
    }

}

From source file:controller.MineroControler.java

public String clasificardorArbolAleat(String atributo) {
    BufferedReader breader = null;
    Instances datos = null;
    breader = new BufferedReader(fuente_arff);
    try {/*  ww  w.  j a va  2  s . co m*/
        datos = new Instances(breader);
        Attribute atr = datos.attribute(atributo);
        datos.setClass(atr);
        //datos.setClassIndex(0);
    } catch (IOException ex) {
        System.err.println("Problemas al intentar cargar los datos");
        return null;
    }

    RandomTree arbol = new RandomTree(); // Class for constructing a tree that considers K randomly chosen attributes at each node. 

    try {

        arbol.setNumFolds(100);
        arbol.setKValue(0);
        arbol.setMinNum(1);
        arbol.setMaxDepth(0);
        arbol.setSeed(1);
        arbol.buildClassifier(datos);

    } catch (Exception ex) {
        System.err.println("Problemas al ejecutar algorimo de clasificacion" + ex.getLocalizedMessage());
    }
    return arbol.toString();
}

From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    String[] lineSplit;//from ww  w .j a va2 s .co m
    String[] lineSplitBug;

    try {
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    int revisionIndex = -1;
    int bugIndex = -1;
    lineSplitBug = linesBug[0].split(";");
    for (int j = 0; j < lineSplitBug.length; j++) {
        if (lineSplitBug[j].equals("svnrev")) {
            revisionIndex = j;
        }
        if (lineSplitBug[j].equals("num_bugs_trace")) {
            bugIndex = j;
        }
    }
    if (revisionIndex < 0) {
        throw new RuntimeException("could not find SVN revisions");
    }
    if (bugIndex < 0) {
        throw new RuntimeException("could not find bug information");
    }

    int metricsStartIndex = -1;
    int metricsEndIndex = -1;
    lineSplit = lines[0].split(";");
    for (int j = 0; j < lineSplit.length; j++) {
        if (lineSplit[j].equals("lm_LOC")) {
            metricsStartIndex = j;
        }
        if (lineSplit[j].equals("h_E")) {
            metricsEndIndex = j;
        }
    }
    if (metricsStartIndex < 0) {
        throw new RuntimeException("could not find first metric, i.e., lm_LOC");
    }
    if (metricsEndIndex < 0) {
        throw new RuntimeException("could not find last metric, i.e., h_E");
    }
    int numMetrics = metricsEndIndex - metricsStartIndex + 1;

    // create sets of all filenames and revisions
    SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>();
    for (int i = 1; i < linesBug.length; i++) {
        lineSplitBug = linesBug[i].split(";");
        entityRevisionPairs
                .put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i);
    }

    // prepare weka instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();
    lineSplit = lines[0].split(";");
    for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
        atts.add(new Attribute(lineSplit[j] + "_delta"));
    }
    for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
        atts.add(new Attribute(lineSplit[j] + "_abs"));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // create data
    String lastFile = null;
    double[] lastValues = null;
    int lastNumBugs = 0;
    for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) {
        try {
            // first get values
            lineSplit = lines[entry.getValue()].split(";");
            lineSplitBug = linesBug[entry.getValue()].split(";");
            int i = 0;
            double[] values = new double[numMetrics];
            for (int j = metricsStartIndex; j <= metricsEndIndex; j++) {
                values[i] = Double.parseDouble(lineSplit[j]);
                i++;
            }
            int numBugs = Integer.parseInt(lineSplitBug[bugIndex]);

            // then check if an entity must be created
            if (entry.getKey().entity.equals(lastFile)) {
                // create new instance
                double[] instanceValues = new double[2 * numMetrics + 1];
                for (int j = 0; j < numMetrics; j++) {
                    instanceValues[j] = values[j] - lastValues[j];
                    instanceValues[j + numMetrics] = values[j];
                }
                // check if any value>0
                boolean changeOccured = false;
                for (int j = 0; j < numMetrics; j++) {
                    if (instanceValues[j] > 0) {
                        changeOccured = true;
                    }
                }
                if (changeOccured) {
                    instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1;
                    data.add(new DenseInstance(1.0, instanceValues));
                }
            }
            lastFile = entry.getKey().entity;
            lastValues = values;
            lastNumBugs = numBugs;
        } catch (IllegalArgumentException e) {
            System.err.println("error in line " + entry.getValue() + ": " + e.getMessage());
            System.err.println("metrics line: " + lines[entry.getValue()]);
            System.err.println("bugs line: " + linesBug[entry.getValue()]);
            System.err.println("line is ignored");
        }
    }

    return data;
}

From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java

License:Apache License

public Instances load(File file, String dummy) {
    final String[] lines;
    try {/* ww w . j a v  a 2s.co m*/
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(";");
    // ignore first three/four and last two columns
    int offset;
    if (lineSplit[3].equals("project_rev")) {
        offset = 4;
    } else {
        offset = 3;
    }
    for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
        atts.add(new Attribute(lineSplit[j + offset]));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        boolean validInstance = true;
        lineSplit = lines[i].split(";");
        String[] lineSplitBug = linesBug[i].split(";");
        double[] values = new double[data.numAttributes()];
        for (int j = 0; validInstance && j < values.length - 1; j++) {
            if (lineSplit[j + offset].trim().isEmpty()) {
                validInstance = false;
            } else {
                values[j] = Double.parseDouble(lineSplit[j + offset].trim());
            }
        }
        if (offset == 3) {
            values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
        } else {
            values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
        }

        if (validInstance) {
            data.add(new DenseInstance(1.0, values));
        } else {
            System.out.println("instance " + i + " is invalid");
        }
    }
    return data;
}

From source file:de.ugoe.cs.cpdp.loader.AUDIDataLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    try {//from  w  w w. java  2s .  co m
        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // information about bugs are in another file
    String path = file.getAbsolutePath();
    path = path.substring(0, path.length() - 14) + "repro.csv";
    final String[] linesBug;
    try {
        linesBug = FileTools.getLinesFromFile(path);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(";");
    // ignore first three/four and last two columns
    int offset;
    if (lineSplit[3].equals("project_rev")) {
        offset = 4;
    } else {
        offset = 3;
    }
    for (int j = 0; j < lineSplit.length - (offset + 2); j++) {
        atts.add(new Attribute(lineSplit[j + offset]));
    }
    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        boolean validInstance = true;
        lineSplit = lines[i].split(";");
        String[] lineSplitBug = linesBug[i].split(";");
        double[] values = new double[data.numAttributes()];
        for (int j = 0; validInstance && j < values.length - 1; j++) {
            if (lineSplit[j + offset].trim().isEmpty()) {
                validInstance = false;
            } else {
                values[j] = Double.parseDouble(lineSplit[j + offset].trim());
            }
        }
        if (offset == 3) {
            values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1;
        } else {
            values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1;
        }

        if (validInstance) {
            data.add(new DenseInstance(1.0, values));
        } else {
            System.out.println("instance " + i + " is invalid");
        }
    }
    return data;
}

From source file:de.ugoe.cs.cpdp.loader.CSVMockusDataLoader.java

License:Apache License

@Override
public Instances load(File file) {
    final String[] lines;
    try {//from  ww  w  .j  a va  2s .c  o m

        lines = FileTools.getLinesFromFile(file.getAbsolutePath());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    // configure Instances
    final ArrayList<Attribute> atts = new ArrayList<Attribute>();

    String[] lineSplit = lines[0].split(",");
    for (int j = 0; j < lineSplit.length - 3; j++) {
        atts.add(new Attribute(lineSplit[j + 2]));
    }

    final ArrayList<String> classAttVals = new ArrayList<String>();
    classAttVals.add("0");
    classAttVals.add("1");
    final Attribute classAtt = new Attribute("bug", classAttVals);
    atts.add(classAtt);

    final Instances data = new Instances(file.getName(), atts, 0);
    data.setClass(classAtt);

    // fetch data
    for (int i = 1; i < lines.length; i++) {
        lineSplit = lines[i].split(",");
        double[] values = new double[lineSplit.length - 2];
        for (int j = 0; j < values.length - 1; j++) {
            values[j] = Double.parseDouble(lineSplit[j + 2].trim());
        }
        values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1;
        data.add(new DenseInstance(1.0, values));
    }

    return data;
}