List of usage examples for weka.core Instances setClass
public void setClass(Attribute att)
From source file:DocClassifier.java
public Instances createInstances(File[] files) { Instances instances = new Instances("Inst" + files.hashCode(), attrList, files.length); for (File file : files) { Instance inst = createInstance(file); inst.setDataset(instances);//from ww w .j ava2 s. c o m instances.add(inst); instances.setClass((Attribute) attrList.lastElement()); } return instances; }
From source file:asap.PostProcess.java
public void loadTrainingDataStream(PreProcessOutputStream pposTrainingData) { Instances instancesTrainingSet; DataSource source = new DataSource(pposTrainingData); try {//from ww w . j a v a2s . co m instancesTrainingSet = source.getDataSet(); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); return; } // setting class attribute if the data format does not provide this information if (instancesTrainingSet.classIndex() == -1) { instancesTrainingSet.setClass(instancesTrainingSet.attribute("gold_standard")); } for (String wekaModelsCmd : Config.getWekaModelsCmd()) { String[] classifierCmd; try { classifierCmd = Utils.splitOptions(wekaModelsCmd); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); continue; } String classname = classifierCmd[0]; classifierCmd[0] = ""; try { AbstractClassifier cl = (AbstractClassifier) Utils.forName(Classifier.class, classname, classifierCmd); // String modelName = String.format("%s%s%s%s.model", modelDirectory, File.separatorChar, i, classname); // System.out.println(String.format("\tBuilding model %s (%s) and doing cross-validation...", i++, modelName)); // System.out.println(CrossValidation.performCrossValidationMT(trainSet, cl, Config.getCrossValidationSeed(), Config.getCrossValidationFolds(), modelName)); systems.add(new NLPSystem(cl, instancesTrainingSet, null)); System.out.println("\tAdded system " + systems.get(systems.size() - 1).shortName()); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:asap.PostProcess.java
public void loadEvaluationDataStream(PreProcessOutputStream pposEvaluationData) { Instances instancesEvaluationSet; DataSource source = new DataSource(pposEvaluationData); try {/*from ww w. j av a2s . c om*/ instancesEvaluationSet = source.getDataSet(); } catch (Exception ex) { Logger.getLogger(PostProcess.class.getName()).log(Level.SEVERE, null, ex); return; } // setting class attribute if the data format does not provide this information if (instancesEvaluationSet.classIndex() == -1) { instancesEvaluationSet.setClass(instancesEvaluationSet.attribute("gold_standard")); } for (NLPSystem system : systems) { system.setEvaluationSet(instancesEvaluationSet); } }
From source file:classifiers.ComplexClassifier.java
@Override public double[][] test(Instances testinst) { double count = 0; long anfangszeit = System.currentTimeMillis(); ;/*from w w w . ja v a 2s . co m*/ long endzeit; double[][] ausgabe = new double[1][2]; if (testinst.numAttributes() != 0) { testinst.setClass(testinst.attribute(testinst.numAttributes() - 1)); for (int i = 0; i < testinst.numInstances(); i++) { if (!Classify(testinst.instance(i))) { count++; } else { } } endzeit = System.currentTimeMillis(); ausgabe[0][0] = (count / testinst.numInstances()) * 100; ausgabe[0][1] = ((endzeit - anfangszeit)); // System.out.println(testinst); return ausgabe; } else { // System.out.println(testinst); return ausgabe; } }
From source file:classifiers.ComplexClassifierZufall.java
@Override @SuppressWarnings("empty-statement") public double[][] test(Instances testinst) { double count = 0; long anfangszeit = System.currentTimeMillis(); ;// ww w. j a v a 2 s. c o m long endzeit; double[][] ausgabe = new double[1][2]; if (testinst.numAttributes() != 0) { testinst.setClass(testinst.attribute(testinst.numAttributes() - 1)); for (int i = 0; i < testinst.numInstances(); i++) { if (!Classify(testinst.instance(i))) { count++; } else { } } endzeit = System.currentTimeMillis(); ausgabe[0][0] = (count / testinst.numInstances()) * 100; ausgabe[0][1] = ((endzeit - anfangszeit)); // System.out.println(testinst); return ausgabe; } else { // System.out.println(testinst); return ausgabe; } }
From source file:controller.MineroControler.java
public String clasificardorArbolAleat(String atributo) { BufferedReader breader = null; Instances datos = null; breader = new BufferedReader(fuente_arff); try {/* ww w. j a va 2 s . co m*/ datos = new Instances(breader); Attribute atr = datos.attribute(atributo); datos.setClass(atr); //datos.setClassIndex(0); } catch (IOException ex) { System.err.println("Problemas al intentar cargar los datos"); return null; } RandomTree arbol = new RandomTree(); // Class for constructing a tree that considers K randomly chosen attributes at each node. try { arbol.setNumFolds(100); arbol.setKValue(0); arbol.setMinNum(1); arbol.setMaxDepth(0); arbol.setSeed(1); arbol.buildClassifier(datos); } catch (Exception ex) { System.err.println("Problemas al ejecutar algorimo de clasificacion" + ex.getLocalizedMessage()); } return arbol.toString(); }
From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java
License:Apache License
@Override public Instances load(File file) { final String[] lines; String[] lineSplit;//from ww w .j a va2 s .co m String[] lineSplitBug; try { lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // information about bugs are in another file String path = file.getAbsolutePath(); path = path.substring(0, path.length() - 14) + "repro.csv"; final String[] linesBug; try { linesBug = FileTools.getLinesFromFile(path); } catch (IOException e) { throw new RuntimeException(e); } int revisionIndex = -1; int bugIndex = -1; lineSplitBug = linesBug[0].split(";"); for (int j = 0; j < lineSplitBug.length; j++) { if (lineSplitBug[j].equals("svnrev")) { revisionIndex = j; } if (lineSplitBug[j].equals("num_bugs_trace")) { bugIndex = j; } } if (revisionIndex < 0) { throw new RuntimeException("could not find SVN revisions"); } if (bugIndex < 0) { throw new RuntimeException("could not find bug information"); } int metricsStartIndex = -1; int metricsEndIndex = -1; lineSplit = lines[0].split(";"); for (int j = 0; j < lineSplit.length; j++) { if (lineSplit[j].equals("lm_LOC")) { metricsStartIndex = j; } if (lineSplit[j].equals("h_E")) { metricsEndIndex = j; } } if (metricsStartIndex < 0) { throw new RuntimeException("could not find first metric, i.e., lm_LOC"); } if (metricsEndIndex < 0) { throw new RuntimeException("could not find last metric, i.e., h_E"); } int numMetrics = metricsEndIndex - metricsStartIndex + 1; // create sets of all filenames and revisions SortedMap<EntityRevisionPair, Integer> entityRevisionPairs = new TreeMap<>(); for (int i = 1; i < linesBug.length; i++) { lineSplitBug = linesBug[i].split(";"); entityRevisionPairs .put(new EntityRevisionPair(lineSplitBug[0], Integer.parseInt(lineSplitBug[revisionIndex])), i); } // prepare weka instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); lineSplit = lines[0].split(";"); for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { atts.add(new Attribute(lineSplit[j] + "_delta")); } for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { atts.add(new Attribute(lineSplit[j] + "_abs")); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // create data String lastFile = null; double[] lastValues = null; int lastNumBugs = 0; for (Entry<EntityRevisionPair, Integer> entry : entityRevisionPairs.entrySet()) { try { // first get values lineSplit = lines[entry.getValue()].split(";"); lineSplitBug = linesBug[entry.getValue()].split(";"); int i = 0; double[] values = new double[numMetrics]; for (int j = metricsStartIndex; j <= metricsEndIndex; j++) { values[i] = Double.parseDouble(lineSplit[j]); i++; } int numBugs = Integer.parseInt(lineSplitBug[bugIndex]); // then check if an entity must be created if (entry.getKey().entity.equals(lastFile)) { // create new instance double[] instanceValues = new double[2 * numMetrics + 1]; for (int j = 0; j < numMetrics; j++) { instanceValues[j] = values[j] - lastValues[j]; instanceValues[j + numMetrics] = values[j]; } // check if any value>0 boolean changeOccured = false; for (int j = 0; j < numMetrics; j++) { if (instanceValues[j] > 0) { changeOccured = true; } } if (changeOccured) { instanceValues[instanceValues.length - 1] = numBugs <= lastNumBugs ? 0 : 1; data.add(new DenseInstance(1.0, instanceValues)); } } lastFile = entry.getKey().entity; lastValues = values; lastNumBugs = numBugs; } catch (IllegalArgumentException e) { System.err.println("error in line " + entry.getValue() + ": " + e.getMessage()); System.err.println("metrics line: " + lines[entry.getValue()]); System.err.println("bugs line: " + linesBug[entry.getValue()]); System.err.println("line is ignored"); } } return data; }
From source file:de.ugoe.cs.cpdp.loader.AUDIChangeLoader.java
License:Apache License
public Instances load(File file, String dummy) { final String[] lines; try {/* ww w . j a v a 2s.co m*/ lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // information about bugs are in another file String path = file.getAbsolutePath(); path = path.substring(0, path.length() - 14) + "repro.csv"; final String[] linesBug; try { linesBug = FileTools.getLinesFromFile(path); } catch (IOException e) { throw new RuntimeException(e); } // configure Instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); String[] lineSplit = lines[0].split(";"); // ignore first three/four and last two columns int offset; if (lineSplit[3].equals("project_rev")) { offset = 4; } else { offset = 3; } for (int j = 0; j < lineSplit.length - (offset + 2); j++) { atts.add(new Attribute(lineSplit[j + offset])); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // fetch data for (int i = 1; i < lines.length; i++) { boolean validInstance = true; lineSplit = lines[i].split(";"); String[] lineSplitBug = linesBug[i].split(";"); double[] values = new double[data.numAttributes()]; for (int j = 0; validInstance && j < values.length - 1; j++) { if (lineSplit[j + offset].trim().isEmpty()) { validInstance = false; } else { values[j] = Double.parseDouble(lineSplit[j + offset].trim()); } } if (offset == 3) { values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; } else { values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; } if (validInstance) { data.add(new DenseInstance(1.0, values)); } else { System.out.println("instance " + i + " is invalid"); } } return data; }
From source file:de.ugoe.cs.cpdp.loader.AUDIDataLoader.java
License:Apache License
@Override public Instances load(File file) { final String[] lines; try {//from w w w. java 2s . co m lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // information about bugs are in another file String path = file.getAbsolutePath(); path = path.substring(0, path.length() - 14) + "repro.csv"; final String[] linesBug; try { linesBug = FileTools.getLinesFromFile(path); } catch (IOException e) { throw new RuntimeException(e); } // configure Instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); String[] lineSplit = lines[0].split(";"); // ignore first three/four and last two columns int offset; if (lineSplit[3].equals("project_rev")) { offset = 4; } else { offset = 3; } for (int j = 0; j < lineSplit.length - (offset + 2); j++) { atts.add(new Attribute(lineSplit[j + offset])); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // fetch data for (int i = 1; i < lines.length; i++) { boolean validInstance = true; lineSplit = lines[i].split(";"); String[] lineSplitBug = linesBug[i].split(";"); double[] values = new double[data.numAttributes()]; for (int j = 0; validInstance && j < values.length - 1; j++) { if (lineSplit[j + offset].trim().isEmpty()) { validInstance = false; } else { values[j] = Double.parseDouble(lineSplit[j + offset].trim()); } } if (offset == 3) { values[values.length - 1] = lineSplitBug[7].equals("0") ? 0 : 1; } else { values[values.length - 1] = lineSplitBug[8].equals("0") ? 0 : 1; } if (validInstance) { data.add(new DenseInstance(1.0, values)); } else { System.out.println("instance " + i + " is invalid"); } } return data; }
From source file:de.ugoe.cs.cpdp.loader.CSVMockusDataLoader.java
License:Apache License
@Override public Instances load(File file) { final String[] lines; try {//from ww w .j a va 2s .c o m lines = FileTools.getLinesFromFile(file.getAbsolutePath()); } catch (IOException e) { throw new RuntimeException(e); } // configure Instances final ArrayList<Attribute> atts = new ArrayList<Attribute>(); String[] lineSplit = lines[0].split(","); for (int j = 0; j < lineSplit.length - 3; j++) { atts.add(new Attribute(lineSplit[j + 2])); } final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); atts.add(classAtt); final Instances data = new Instances(file.getName(), atts, 0); data.setClass(classAtt); // fetch data for (int i = 1; i < lines.length; i++) { lineSplit = lines[i].split(","); double[] values = new double[lineSplit.length - 2]; for (int j = 0; j < values.length - 1; j++) { values[j] = Double.parseDouble(lineSplit[j + 2].trim()); } values[values.length - 1] = lineSplit[lineSplit.length - 1].trim().equals("0") ? 0 : 1; data.add(new DenseInstance(1.0, values)); } return data; }