List of usage examples for weka.core Instances setRelationName
public void setRelationName(String newName)
From source file:adams.opt.optimise.genetic.AbstractGeneticAlgorithm.java
License:Open Source License
/** * Creates a new dataset, with the setup as the new relation name. * * @param data the data to replace the relation name with the setup * @param job the associated job//from www .ja va 2s. co m * @return the updated dataset */ protected Instances updateHeader(Instances data) { Properties props; props = storeSetup(data); data.setRelationName(props.toString()); return data; }
From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java
License:Open Source License
/** * Callback for best measure so far/* w w w .j av a 2 s .c o m*/ */ @Override public void newBest(double val, OptData opd) { int cnt = 0; int[] weights = getWeights(opd); Instances newInstances = new Instances(getInstances()); for (int i = 0; i < getInstances().numInstances(); i++) { Instance in = newInstances.instance(i); cnt = 0; for (int a = 0; a < getInstances().numAttributes(); a++) { if (a == getInstances().classIndex()) continue; if (weights[cnt++] == 0) { in.setValue(a, 0); } else { in.setValue(a, in.value(a)); } } } try { File file = new File(getOutputDirectory().getAbsolutePath() + File.separator + Double.toString(getMeasure().adjust(val)) + ".arff"); file.createNewFile(); Writer writer = new BufferedWriter(new FileWriter(file)); Instances header = new Instances(newInstances, 0); // remove filter setup Remove remove = new Remove(); remove.setAttributeIndices(getRemoveAsString(weights)); remove.setInvertSelection(true); header.setRelationName(OptionUtils.getCommandLine(remove)); writer.write(header.toString()); writer.write("\n"); for (int i = 0; i < newInstances.numInstances(); i++) { writer.write(newInstances.instance(i).toString()); writer.write("\n"); } writer.flush(); writer.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:de.ugoe.cs.cpdp.execution.ClassifierCreationExperiment.java
License:Apache License
/** * Executes the experiment with the steps as described in the class comment. * // w w w.j a v a2 s.co m * @see Runnable#run() */ @Override public void run() { final List<SoftwareVersion> versions = new LinkedList<>(); boolean writeHeader = true; for (IVersionLoader loader : config.getLoaders()) { versions.addAll(loader.load()); } File resultsDir = new File(config.getResultsPath()); if (!resultsDir.exists()) { resultsDir.mkdir(); } int versionCount = 1; for (SoftwareVersion testVersion : versions) { // At first: traindata == testdata Instances testdata = testVersion.getInstances(); Instances traindata = new Instances(testdata); List<Double> efforts = testVersion.getEfforts(); // Give the dataset a new name testdata.setRelationName(testVersion.getProject()); for (IProcessesingStrategy processor : config.getPreProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying preprocessor %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), processor.getClass().getName())); processor.apply(testdata, traindata); } for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), dataselector.getClass().getName())); traindata = dataselector.apply(testdata, traindata); } for (IProcessesingStrategy processor : config.getPostProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), processor.getClass().getName())); processor.apply(testdata, traindata); } // Trainerlist for evaluation later on List<ITrainer> allTrainers = new LinkedList<>(); for (ITrainingStrategy trainer : config.getTrainers()) { // Add trainer to list for evaluation allTrainers.add(trainer); // Train classifier trainer.apply(traindata); if (config.getSaveClassifier()) { // If classifier should be saved, train him and save him // be careful with typecasting here! IWekaCompatibleTrainer trainerToSave = (IWekaCompatibleTrainer) trainer; // Console.println(trainerToSave.getClassifier().toString()); try { weka.core.SerializationHelper.write(resultsDir.getAbsolutePath() + "/" + trainer.getName() + "-" + testVersion.getProject(), trainerToSave.getClassifier()); } catch (Exception e) { e.printStackTrace(); } } } for (IEvaluationStrategy evaluator : config.getEvaluators()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject(), evaluator.getClass().getName())); if (writeHeader) { evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv"); } evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader, config.getResultStorages()); writeHeader = false; } versionCount++; Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished", config.getExperimentName(), versionCount, versions.size(), testVersion.getProject())); } }
From source file:de.ugoe.cs.cpdp.loader.NetgeneLoader.java
License:Apache License
@Override public Instances load(File fileMetricsFile) { // first determine all files String path = fileMetricsFile.getParentFile().getAbsolutePath(); String project = fileMetricsFile.getName().split("_")[0]; File bugsFile = new File(path + "/" + project + "_bugs_per_file.csv"); File networkMetrics = new File(path + "/" + project + "_network_metrics.csv"); Instances metricsData = null; try {/*from w w w .j a va 2s . com*/ CSVLoader wekaCsvLoader = new CSVLoader(); wekaCsvLoader.setSource(fileMetricsFile); metricsData = wekaCsvLoader.getDataSet(); wekaCsvLoader.setSource(bugsFile); Instances bugsData = wekaCsvLoader.getDataSet(); wekaCsvLoader.setSource(networkMetrics); Instances networkData = wekaCsvLoader.getDataSet(); metricsData.setRelationName(project); // fix nominal attributes (i.e., NA values) for (int j = 2; j < networkData.numAttributes(); j++) { if (networkData.attribute(j).isNominal()) { String attributeName = networkData.attribute(j).name(); double[] tmpVals = new double[networkData.size()]; // get temporary values for (int i = 0; i < networkData.size(); i++) { Instance inst = networkData.instance(i); if (!inst.isMissing(j)) { String val = networkData.instance(i).stringValue(j); try { tmpVals[i] = Double.parseDouble(val); } catch (NumberFormatException e) { // not a number, using 0.0; tmpVals[i] = 0.0; } } else { tmpVals[i] = 0.0; } } // replace attribute networkData.deleteAttributeAt(j); networkData.insertAttributeAt(new Attribute(attributeName), j); for (int i = 0; i < networkData.size(); i++) { networkData.instance(i).setValue(j, tmpVals[i]); } } } // fix string attributes for (int j = 2; j < networkData.numAttributes(); j++) { if (networkData.attribute(j).isString()) { String attributeName = networkData.attribute(j).name(); double[] tmpVals = new double[networkData.size()]; // get temporary values for (int i = 0; i < networkData.size(); i++) { Instance inst = networkData.instance(i); if (!inst.isMissing(j)) { String val = networkData.instance(i).stringValue(j); try { tmpVals[i] = Double.parseDouble(val); } catch (NumberFormatException e) { // not a number, using 0.0; tmpVals[i] = 0.0; } } else { tmpVals[i] = 0.0; } } // replace attribute networkData.deleteAttributeAt(j); networkData.insertAttributeAt(new Attribute(attributeName), j); for (int i = 0; i < networkData.size(); i++) { networkData.instance(i).setValue(j, tmpVals[i]); } } } Map<String, Integer> filenames = new HashMap<>(); for (int j = 0; j < metricsData.size(); j++) { filenames.put(metricsData.instance(j).stringValue(0), j); } // merge with network data int attributeIndex; for (int j = 2; j < networkData.numAttributes(); j++) { attributeIndex = metricsData.numAttributes(); metricsData.insertAttributeAt(networkData.attribute(j), attributeIndex); for (int i = 0; i < networkData.size(); i++) { Integer instanceIndex = filenames.get(networkData.instance(i).stringValue(1)); if (instanceIndex != null) { metricsData.instance(instanceIndex).setValue(attributeIndex, networkData.instance(i).value(j)); } } } // add bug information attributeIndex = metricsData.numAttributes(); final ArrayList<String> classAttVals = new ArrayList<String>(); classAttVals.add("0"); classAttVals.add("1"); final Attribute classAtt = new Attribute("bug", classAttVals); metricsData.insertAttributeAt(classAtt, attributeIndex); for (int i = 0; i < bugsData.size(); i++) { if (bugsData.instance(i).value(2) > 0.0d) { Integer instanceIndex = filenames.get(bugsData.instance(i).stringValue(1)); if (instanceIndex != null) { metricsData.instance(instanceIndex).setValue(attributeIndex, 1.0); } } } // remove filenames metricsData.deleteAttributeAt(0); Attribute eigenvector = metricsData.attribute("eigenvector"); if (eigenvector != null) { for (int j = 0; j < metricsData.numAttributes(); j++) { if (metricsData.attribute(j) == eigenvector) { metricsData.deleteAttributeAt(j); } } } metricsData.setClassIndex(metricsData.numAttributes() - 1); // set all missing values to 0 for (int i = 0; i < metricsData.size(); i++) { for (int j = 0; j < metricsData.numAttributes(); j++) { if (metricsData.instance(i).isMissing(j)) { metricsData.instance(i).setValue(j, 0.0d); } } } } catch (IOException e) { Console.traceln(Level.SEVERE, "failure reading file: " + e.getMessage()); metricsData = null; } return metricsData; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts the instances in the given dataset to binary, setting the specified labels to positive. * Note this method is destructive to data, directly modifying its contents. * @param data the multiclass dataset to be converted to binary. * @param positiveClassValue the class value to treat as positive. *///from w w w .j a va 2 s . c o m public static void convertMulticlassToBinary(Instances data, String positiveClassValue) { // ensure that data is nominal if (!data.classAttribute().isNominal()) throw new IllegalArgumentException("Instances must have a nominal class."); // create the new class attribute FastVector newClasses = new FastVector(2); newClasses.addElement("Y"); newClasses.addElement("N"); Attribute newClassAttribute = new Attribute("class", newClasses); // alter the class attribute to be binary int newClassAttIdx = data.classIndex(); data.insertAttributeAt(newClassAttribute, newClassAttIdx); int classAttIdx = data.classIndex(); // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively) int numInstances = data.numInstances(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); if (inst.stringValue(classAttIdx).equals(positiveClassValue)) { inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y } else { inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0 } } // switch the class index to the new class and delete the old class data.setClassIndex(newClassAttIdx); data.deleteAttributeAt(classAttIdx); // alter the dataset name data.setRelationName(data.relationName() + "-" + positiveClassValue); }
From source file:jjj.asap.sas.parser.job.ImportParserData.java
License:Open Source License
private void process(final String parent, int essaySet, Map<Double, List<String>> tags, Map<Double, List<String>> parseTrees, Map<Double, List<String>> depends) { // check if output exists boolean any = false; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-extra-stats.arff")) any = true;/*from w w w.j a v a 2 s. c om*/ if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-pos-tags.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-parse-tree.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends0.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends1.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends2.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends3.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends4.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends5.arff")) any = true; if (!IOUtils.exists("work/datasets/" + parent + "/" + essaySet + "-depends6.arff")) any = true; if (!any) { Job.log("NOTE", "work/datasets/" + parent + "/" + essaySet + "-*.arff returns all required datasets - nothing to do"); return; } // Load an existing dataset to use as a template. Instances dataset = Dataset.load("work/datasets/" + parent + "/" + essaySet + "-spell-checked.arff"); // create the output datasets here. except for the extra statistics, // the format is the same as 'dataset'. Instances tagsData = new Instances(dataset, 0); tagsData.setRelationName(essaySet + "-pos-tags.arff"); Instances treeData = new Instances(dataset, 0); treeData.setRelationName(essaySet + "-parse-tree.arff"); Instances dependsData[] = new Instances[7]; for (int j = 0; j < 7; j++) { dependsData[j] = new Instances(dataset, 0); dependsData[j].setRelationName(essaySet + "-depends" + j + ".arff"); } // extra stats DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); if (Contest.isMultiChoice(essaySet)) { builder.addNominalVariable("color", Contest.COLORS); } builder.addVariable("x_sent"); builder.addVariable("x_para"); builder.addVariable("x_length"); builder.addVariable("x_words"); builder.addVariable("x_unique_words"); builder.addNominalVariable("score", Contest.getRubrics(essaySet)); Instances extraStats = builder.getDataset(essaySet + "-extra-stats.arff"); // now add rows for each instance for (int i = 0; i < dataset.numInstances(); i++) { // common variables Instance ob = dataset.instance(i); double id = ob.value(0); String y = ob.isMissing(dataset.numAttributes() - 1) ? null : ob.stringValue(dataset.numAttributes() - 1); String color = Contest.isMultiChoice(essaySet) ? ob.stringValue(dataset.attribute("color")) : null; String str = ob.stringValue(dataset.attribute("text")); // // Extra stats // int nSent = tags.containsKey(id) ? tags.get(id).size() : 0; int nPara = 0; for (int a = 0; a < str.length(); a++) { if (str.charAt(a) == '^') nPara++; } int nLength = str.length(); int nWords = 0; int nUniqueWords = 0; String[] words = str.toLowerCase().split(" "); nWords = words.length; Set<String> u = new HashSet<String>(); for (String w : words) { u.add(w); } nUniqueWords = u.size(); extraStats.add(new DenseInstance(extraStats.numAttributes())); Instance extra = extraStats.lastInstance(); extra.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { extra.setValue(1, color); } extra.setValue(extraStats.attribute("x_sent"), nSent); extra.setValue(extraStats.attribute("x_para"), nPara); extra.setValue(extraStats.attribute("x_length"), nLength); extra.setValue(extraStats.attribute("x_words"), nWords); extra.setValue(extraStats.attribute("x_unique_words"), nUniqueWords); if (y == null) extra.setValue(extraStats.numAttributes() - 1, Utils.missingValue()); else extra.setValue(extraStats.numAttributes() - 1, y); // // POS tags // String tagsText = ""; List<String> tagsList = tags.get(id); if (tagsList == null || tagsList.isEmpty()) { Job.log("WARNING", "no tags for " + id); tagsText = "x"; } else { for (String tagsItem : tagsList) { tagsText += tagsItem; } } tagsData.add(new DenseInstance(ob.numAttributes())); Instance tagsOb = tagsData.lastInstance(); tagsOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { tagsOb.setValue(1, color); tagsOb.setValue(2, tagsText.trim()); if (y == null) { tagsOb.setValue(3, Utils.missingValue()); } else { tagsOb.setValue(3, y); } } else { tagsOb.setValue(1, tagsText.trim()); if (y == null) { tagsOb.setValue(2, Utils.missingValue()); } else { tagsOb.setValue(2, y); } } // // Parse Tree // String treeText = ""; List<String> treeList = parseTrees.get(id); if (treeList == null || treeList.isEmpty()) { Job.log("WARNING", "no parse tree for " + id); treeText = "x"; } else { for (String treeItem : treeList) { treeText += treeItem; } } treeData.add(new DenseInstance(ob.numAttributes())); Instance treeOb = treeData.lastInstance(); treeOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { treeOb.setValue(1, color); treeOb.setValue(2, treeText.trim()); if (y == null) { treeOb.setValue(3, Utils.missingValue()); } else { treeOb.setValue(3, y); } } else { treeOb.setValue(1, treeText.trim()); if (y == null) { treeOb.setValue(2, Utils.missingValue()); } else { treeOb.setValue(2, y); } } // // Depends data // for (int j = 0; j < 7; j++) { String text = ""; List<String> list = depends.get(id); if (list == null || list.isEmpty()) { Job.log("WARNING", "no depends for " + id); text = "x"; } else { for (String item : list) { String[] term = StringUtils.safeSplit(item, "/", 3); switch (j) { case 0: text += item; break; case 1: text += term[1] + "/" + term[2]; break; case 2: text += term[0] + "/" + term[2]; break; case 3: text += term[0] + "/" + term[1]; break; case 4: text += term[0]; break; case 5: text += term[1]; break; case 6: text += term[2]; break; } text += " "; } } dependsData[j].add(new DenseInstance(ob.numAttributes())); Instance dependsOb = dependsData[j].lastInstance(); dependsOb.setValue(0, id); if (Contest.isMultiChoice(essaySet)) { dependsOb.setValue(1, color); dependsOb.setValue(2, text.trim()); if (y == null) { dependsOb.setValue(3, Utils.missingValue()); } else { dependsOb.setValue(3, y); } } else { dependsOb.setValue(1, text.trim()); if (y == null) { dependsOb.setValue(2, Utils.missingValue()); } else { dependsOb.setValue(2, y); } } } // j } // dataset // Now save the new datasets Dataset.save("work/datasets/" + parent + "/" + tagsData.relationName(), tagsData); Dataset.save("work/datasets/" + parent + "/" + treeData.relationName(), treeData); for (int j = 0; j < 7; j++) { Dataset.save("work/datasets/" + parent + "/" + dependsData[j].relationName(), dependsData[j]); } Dataset.save("work/datasets/" + parent + "/" + extraStats.relationName(), extraStats); }
From source file:jwebminer2.FeatureValueFileSaver.java
/** * Save the given text to the given location in the given format or * save the stored feature values, depending on the chosen_file_extension. * A progress bar is displayed (although not incremented). * * @param chosen_file_extension The file extension (corresponding to one * of the extensions published by the * getFileFormatExtension method) to use when * saving data_to_save, and the corresponding * file format. * @param data_to_save The HTML code displayed on-screen. May be * null for non-HTML saving. * @param save_location The file to save data_to_save to. * @throws Exception Throws an Exception if the file cannot be * saved.//from w ww.jav a2 s .co m */ public void saveContents(String chosen_file_extension, String data_to_save, File save_location) throws Exception { // Prepare the progress bar SimpleProgressBarDialog progress_bar = new SimpleProgressBarDialog(1, results_panel); // Write the whole contents of data_to_save verbatim as an HTML file // if an HTML file is to be saved if (chosen_file_extension.equals("HTML")) { DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); writer.writeBytes(data_to_save); writer.close(); } // Only save the table of final feature values itself if a non-HTML // file format is to be saved else { // Access information to store double[][] feature_table = results_panel.feature_values; String[] column_labels = results_panel.column_labels; String[] row_labels = results_panel.row_labels; String[] orig_column_labels = column_labels; if (AnalysisProcessor.lastfm_enabled && AnalysisProcessor.is_cross_tabulation && (AnalysisProcessor.yahoo_application_id != null || AnalysisProcessor.google_license_key != null)) { String[] column_labels_lastfm_websearch = new String[2 * column_labels.length]; for (int i = 0; i < column_labels.length; i++) { column_labels_lastfm_websearch[i] = column_labels[i] + "_WS"; column_labels_lastfm_websearch[i + column_labels.length] = column_labels[i] + "_LastFM"; } column_labels = column_labels_lastfm_websearch; } else { column_labels = orig_column_labels; } // Save as tab delimited text file if (chosen_file_extension.equals("TXT")) { // Calculate the table to save String[][] results_table = new String[row_labels.length + 1][column_labels.length + 1]; results_table[0][0] = ""; for (int i = 0; i < results_table.length; i++) { for (int j = 0; j < results_table[i].length; j++) { if (i == 0) { if (j != 0) results_table[i][j] = column_labels[j - 1]; } else { if (j == 0) results_table[i][j] = row_labels[i - 1]; else results_table[i][j] = String.valueOf(feature_table[i - 1][j - 1]); } } } // Save the table DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); for (int i = 0; i < results_table.length; i++) { for (int j = 0; j < results_table[i].length; j++) { // Write the table entry writer.writeBytes(results_table[i][j]); // Add a tab or a line break if (j == results_table[i].length - 1) writer.writeBytes("\n"); else writer.writeBytes("\t"); } } // Close the writing stream writer.close(); } // Save as ACE XML file else if (chosen_file_extension.equals("ACE XML")) { // Set the name of the dataset to the name of the file // that is tob be saved String data_set_name = mckay.utilities.staticlibraries.StringMethods .removeExtension(save_location.getName()); // Prepare feature definitions and store feature names to // put in DataSets FeatureDefinition[] feature_definitions = new FeatureDefinition[column_labels.length]; String[] feature_names = new String[column_labels.length]; for (int feat = 0; feat < feature_definitions.length; feat++) { feature_definitions[feat] = new FeatureDefinition(column_labels[feat], "", false, 1); feature_names[feat] = column_labels[feat]; } // Prepare the the DataSets to write DataSet[] data_sets = new DataSet[row_labels.length]; for (int instance = 0; instance < data_sets.length; instance++) { // Instantiate the DataSet data_sets[instance] = new DataSet(); // Store the instance names data_sets[instance].identifier = row_labels[instance]; // Store the names of the features data_sets[instance].feature_names = feature_names; // Store the features for this DataSet as well as the // feature names double[][] these_feature_values = new double[feature_table[instance].length][1]; for (int feat = 0; feat < these_feature_values.length; feat++) these_feature_values[feat][0] = feature_table[instance][feat]; data_sets[instance].feature_values = these_feature_values; // Validate, order and compact the DataSet data_sets[instance].orderAndCompactFeatures(feature_definitions, true); } // Save the feature values DataSet.saveDataSets(data_sets, feature_definitions, save_location, "Features extracted with jWebMiner 2.0"); } // Save as Weka ARFF file else if (chosen_file_extension.equals("Weka ARFF")) { // Set the name of the dataset to the name of the file // that is to be saved String data_set_name = mckay.utilities.staticlibraries.StringMethods .removeExtension(save_location.getName()); // Set the Attributes (feature names and class names) FastVector attributes_vector = new FastVector(column_labels.length + 1); // extra 1 is for class name for (int feat = 0; feat < column_labels.length; feat++) attributes_vector.addElement(new Attribute(column_labels[feat])); FastVector class_names_vector = new FastVector(column_labels.length); for (int cat = 0; cat < orig_column_labels.length; cat++) class_names_vector.addElement(orig_column_labels[cat]); attributes_vector.addElement(new Attribute("Class", class_names_vector)); // Store attributes in an Instances object Instances instances = new Instances(data_set_name, attributes_vector, row_labels.length); instances.setClassIndex(instances.numAttributes() - 1); // Store the feature values and model classifications for (int inst = 0; inst < row_labels.length; inst++) { // Initialize an instance Instance this_instance = new Instance(instances.numAttributes()); this_instance.setDataset(instances); int current_attribute = 0; // Set feature values for the instance for (int feat = 0; feat < column_labels.length; feat++) this_instance.setValue(feat, feature_table[inst][feat]); // Set the class value for the instance // this_instance.setClassValue("a"); instances.setRelationName("jWebMiner2"); // Add this instance to instances instances.add(this_instance); } // Prepare the buffer to save to and add comments indicating // the names of the rows DataOutputStream writer = mckay.utilities.staticlibraries.FileMethods .getDataOutputStream(save_location); writer.writeBytes("% INSTANCES (DATA ROWS) BELOW CORRESPOND TO:\n%\n"); for (int inst = 0; inst < row_labels.length; inst++) writer.writeBytes("% " + (inst + 1) + ") " + row_labels[inst] + "\n"); writer.writeBytes("%\n"); // Save the ARFF file ArffSaver arff_saver = new ArffSaver(); arff_saver.setInstances(instances); arff_saver.setFile(save_location); arff_saver.setDestination(writer); try { arff_saver.writeBatch(); } catch (Exception e) { throw new Exception( "File only partially saved.\n\nTry resaving the file with a .arff extension."); } // Close the writer writer.close(); } } // Terminate the progress bar progress_bar.done(); }
From source file:lu.lippmann.cdb.common.gui.dataset.InstancesLoaderDialogFactory.java
License:Open Source License
private static Instances showDialog(final Component parent, final boolean setClass) throws Exception { final Preferences prefs = Preferences.userRoot().node("CadralDecisionBuild"); final String path = prefs.get(REG_KEY, WekaDataAccessUtil.DEFAULT_SAMPLE_DIR); final JFileChooser fc = new JFileChooser(); fc.setCurrentDirectory(new File(path)); final int returnVal = fc.showOpenDialog(parent); if (returnVal == JFileChooser.APPROVE_OPTION) { final File file = fc.getSelectedFile(); if (file != null) { prefs.put(REG_KEY, file.getPath()); final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(file); final Attribute defaultClassAttr = ds.classIndex() >= 0 ? ds.classAttribute() : ds.attribute(0); ds.setClassIndex(-1);//from w ww.ja v a 2 s. co m ds.setRelationName(file.getPath()); final List<String> attributesNames = new ArrayList<String>(); final Enumeration<?> e = ds.enumerateAttributes(); while (e.hasMoreElements()) { final Attribute attr = (Attribute) e.nextElement(); attributesNames.add(attr.name()); } if (setClass) { final String s = (String) JOptionPane.showInputDialog(parent, "Select the class attribute for '" + file.getName() + "' (default:'" + defaultClassAttr.name() + "'): ", "Class selection", JOptionPane.QUESTION_MESSAGE, null, // icon attributesNames.toArray(), attributesNames.get(attributesNames.size() - 1)); if (s != null) { ds.setClass(ds.attribute(s)); } else { //Otherwise no class defined and CACHE attributeClass => No class index defined after cancel + retry ds.setClass(defaultClassAttr); return null; } } else { ds.setClass(defaultClassAttr); } return ds; } else throw new Exception(); } else return null; }
From source file:mao.datamining.RemoveUselessColumnsByMissingValues.java
License:Open Source License
/** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws Exception if no input format defined *///from w w w . jav a 2s .c om public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_removeFilter == null) { // establish attributes to remove from first batch Instances toFilter = getInputFormat(); int[] attsToDelete = new int[toFilter.numAttributes()]; int numToDelete = 0; for (int i = 0; i < toFilter.numAttributes(); i++) { if (i == toFilter.classIndex()) continue; // skip class AttributeStats stats = toFilter.attributeStats(i); //remove those attributes who has high ratio of missing values if ((stats.missingCount * 100) / stats.totalCount > m_maxMissingPercentage) { // System.out.println("stats.missingPercentage: " + (stats.missingCount*100)/stats.totalCount+"%"); attsToDelete[numToDelete++] = i; } //remove those columns defined in the list by manual check if (this.column2DeleteSet.contains(toFilter.attribute(i).name())) { attsToDelete[numToDelete++] = i; } } int[] finalAttsToDelete = new int[numToDelete]; System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete); m_removeFilter = new Remove(); m_removeFilter.setAttributeIndicesArray(finalAttsToDelete); m_removeFilter.setInvertSelection(false); m_removeFilter.setInputFormat(toFilter); for (int i = 0; i < toFilter.numInstances(); i++) { m_removeFilter.input(toFilter.instance(i)); } m_removeFilter.batchFinished(); Instance processed; Instances outputDataset = m_removeFilter.getOutputFormat(); // restore old relation name to hide attribute filter stamp outputDataset.setRelationName(toFilter.relationName()); setOutputFormat(outputDataset); while ((processed = m_removeFilter.output()) != null) { processed.setDataset(outputDataset); push(processed); } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }
From source file:meka.core.MLUtils.java
License:Open Source License
/** * Fixes the relation name by adding the "-C" attribute to it if necessary. * * @param data the dataset to fix//w ww .j a v a 2 s .c o m * @param numClassAtts the number of class attributes (0 for none, >0 for attributes at start, <0 for attributes at end) */ public static void fixRelationName(Instances data, int numClassAtts) { if (data.relationName().indexOf(":") == -1) data.setRelationName(data.relationName() + ": -C " + numClassAtts); }