List of usage examples for weka.core Instances insertAttributeAt
public void insertAttributeAt(Attribute att, int position)
From source file:distributed.core.DistributedUtils.java
License:Open Source License
public static Instances makeHeaderWithSummaryAtts(Instances denormalized, boolean treatZerosAsMissing) { Instances header = new Instances(denormalized, 0); for (int i = 0; i < denormalized.numAttributes(); i++) { AttributeStats stats = denormalized.attributeStats(i); if (denormalized.attribute(i).isNumeric()) { NumericStats ns = new NumericStats(denormalized.attribute(i).name()); if (!treatZerosAsMissing) { ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = stats.numericStats.min; ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = stats.numericStats.max; ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()] = stats.numericStats.count; ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] = stats.numericStats.sum; ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] = stats.numericStats.sumSq; ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()] = stats.missingCount; ns.computeDerived();//from w ww .ja va2 s . co m } else { ns = getNumericAttributeStatsSparse(denormalized, i); } Attribute newAtt = ns.makeAttribute(); header.insertAttributeAt(newAtt, header.numAttributes()); } else if (denormalized.attribute(i).isNominal()) { NominalStats nom = new NominalStats(denormalized.attribute(i).name()); nom.setNumMissing(stats.missingCount); double[] labelFreqs = stats.nominalWeights; for (int j = 0; j < denormalized.attribute(i).numValues(); j++) { nom.add(denormalized.attribute(i).value(j), labelFreqs[j]); } Attribute newAtt = nom.makeAttribute(); header.insertAttributeAt(newAtt, header.numAttributes()); } } return header; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Converts the instances in the given dataset to binary, setting the specified labels to positive. * Note this method is destructive to data, directly modifying its contents. * @param data the multiclass dataset to be converted to binary. * @param positiveClassValue the class value to treat as positive. *///from ww w.ja v a2 s. c o m public static void convertMulticlassToBinary(Instances data, String positiveClassValue) { // ensure that data is nominal if (!data.classAttribute().isNominal()) throw new IllegalArgumentException("Instances must have a nominal class."); // create the new class attribute FastVector newClasses = new FastVector(2); newClasses.addElement("Y"); newClasses.addElement("N"); Attribute newClassAttribute = new Attribute("class", newClasses); // alter the class attribute to be binary int newClassAttIdx = data.classIndex(); data.insertAttributeAt(newClassAttribute, newClassAttIdx); int classAttIdx = data.classIndex(); // set the instances classes to be binary, with the labels [Y,N] (indices 0 and 1 respectively) int numInstances = data.numInstances(); for (int instIdx = 0; instIdx < numInstances; instIdx++) { Instance inst = data.instance(instIdx); if (inst.stringValue(classAttIdx).equals(positiveClassValue)) { inst.setValue(newClassAttIdx, 0); // set it to the first class, which will be Y } else { inst.setValue(newClassAttIdx, 1); // set it to the second class, which will be 0 } } // switch the class index to the new class and delete the old class data.setClassIndex(newClassAttIdx); data.deleteAttributeAt(classAttIdx); // alter the dataset name data.setRelationName(data.relationName() + "-" + positiveClassValue); }
From source file:edu.utexas.cs.tactex.utils.RegressionUtils.java
License:Open Source License
/** * adding y attributes without giving it values *///from ww w . j a v a2s.com public static Instances addYforWeka(Instances xInsts) { // add another column for y int n = xInsts.numAttributes(); xInsts.insertAttributeAt(new Attribute(Integer.toString(n)), n); // last attribute is y value, the class 'label' xInsts.setClassIndex(n); return xInsts; }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Train one vs all models over the given training data. * /*w w w.ja va2s.c om*/ * @param modelpath directory to store each model for the one vs. all method * @param prefix prefix the models should have (each model will have the name of its class appended * @throws Exception */ public void trainOneVsAll(String modelpath, String prefix) throws Exception { Instances orig = new Instances(traindata); Enumeration<Object> classValues = traindata.classAttribute().enumerateValues(); String classAtt = traindata.classAttribute().name(); while (classValues.hasMoreElements()) { String v = (String) classValues.nextElement(); System.err.println("trainer onevsall for class " + v + " classifier"); //needed because of weka's sparse data format problems THIS IS TROUBLE! ... if (v.equalsIgnoreCase("dummy")) { continue; } // copy instances and set the same class value Instances ovsa = new Instances(orig); //create a new class attribute // // Declare the class attribute along with its values ArrayList<String> classVal = new ArrayList<String>(); classVal.add("dummy"); //needed because of weka's sparse data format problems... classVal.add(v); classVal.add("UNKNOWN"); ovsa.insertAttributeAt(new Attribute(classAtt + "2", classVal), ovsa.numAttributes()); //change all instance labels that have not the current class value to "other" for (int i = 0; i < ovsa.numInstances(); i++) { Instance inst = ovsa.instance(i); String instClass = inst.stringValue(ovsa.attribute(classAtt).index()); if (instClass.equalsIgnoreCase(v)) { inst.setValue(ovsa.attribute(classAtt + "2").index(), v); } else { inst.setValue(ovsa.attribute(classAtt + "2").index(), "UNKNOWN"); } } //delete the old class attribute and set the new. ovsa.setClassIndex(ovsa.attribute(classAtt + "2").index()); ovsa.deleteAttributeAt(ovsa.attribute(classAtt).index()); ovsa.renameAttribute(ovsa.attribute(classAtt + "2").index(), classAtt); ovsa.setClassIndex(ovsa.attribute(classAtt).index()); //build the classifier, crossvalidate and store the model setTraindata(ovsa); saveModel(modelpath + File.separator + prefix + "_" + v + ".model"); setTestdata(ovsa); testModel(modelpath + File.separator + prefix + "_" + v + ".model"); System.err.println("trained onevsall " + v + " classifier"); } setTraindata(orig); }
From source file:es.jarias.FMC.ClassCompoundTransformation.java
License:Open Source License
/** * /* w w w . ja v a 2s . c o m*/ * @param mlData * @return the transformed instances * @throws Exception */ public Instances transformInstances(MultiLabelInstances mlData) throws Exception { data = mlData.getDataSet(); numLabels = mlData.getNumLabels(); labelIndices = mlData.getLabelIndices(); Instances newData = null; // This must be different in order to combine ALL class states, not only existing ones. // gather distinct label combinations // ASSUME CLASSES ARE BINARY ArrayList<LabelSet> labelSets = new ArrayList<LabelSet>(); double[] dblLabels = new double[numLabels]; double nCombinations = Math.pow(2, numLabels); for (int i = 0; i < nCombinations; i++) { for (int l = 0; l < numLabels; l++) { int digit = (int) Math.pow(2, numLabels - 1 - l); dblLabels[l] = (digit & i) / digit; } LabelSet labelSet = new LabelSet(dblLabels); labelSets.add(labelSet); } // for (int i = 0; i < numInstances; i++) { // // construct labelset // double[] dblLabels = new double[numLabels]; // for (int j = 0; j < numLabels; j++) { // int index = labelIndices[j]; // dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index))); // } // LabelSet labelSet = new LabelSet(dblLabels); // // // add labelset if not already present // labelSets.add(labelSet); // } // create class attribute ArrayList<String> classValues = new ArrayList<String>(labelSets.size()); for (LabelSet subset : labelSets) { classValues.add(subset.toBitString()); } newClass = new Attribute("class", classValues); // for (String s : classValues) // { // System.out.print(s+", "); // // } // System.out.println(); // remove all labels newData = RemoveAllLabels.transformInstances(data, labelIndices); // add new class attribute newData.insertAttributeAt(newClass, newData.numAttributes()); newData.setClassIndex(newData.numAttributes() - 1); // add class values for (int i = 0; i < newData.numInstances(); i++) { //System.out.println(newData.instance(i).toString()); String strClass = ""; for (int j = 0; j < numLabels; j++) { int index = labelIndices[j]; strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index)); } //System.out.println(strClass); newData.instance(i).setClassValue(strClass); } transformedFormat = new Instances(newData, 0); return newData; }
From source file:etc.aloe.filters.AbstractRegexFilter.java
License:Open Source License
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (stringAttributeName == null) { throw new IllegalStateException("String attribute name not set"); }/* w w w . j a va2s . com*/ Instances outputFormat = new Instances(inputFormat, 0); Attribute stringAttr = inputFormat.attribute(stringAttributeName); stringAttributeIndex = stringAttr.index(); //Add the new columns. There is one for each regex feature. NamedRegex[] regexFeatures = getRegexFeatures(); for (int i = 0; i < regexFeatures.length; i++) { String name = regexFeatures[i].getName(); Attribute attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); if (countRegexLengths) { name = name + "_L"; attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); } } return outputFormat; }
From source file:etc.aloe.filters.StringToDictionaryVector.java
License:Open Source License
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (getStringAttribute() == null) { throw new IllegalStateException("String attribute name not set"); }//from w w w . j av a 2 s.co m stringAttributeIndex = inputFormat.attribute(getStringAttribute()).index(); inputFormat = getInputFormat(); //This generates m_selectedTerms and m_DocsCounts int[] docsCountsByTermIdx = determineDictionary(inputFormat); //Initialize the output format to be just like the input Instances outputFormat = new Instances(inputFormat, 0); //Set up the map from attr index to document frequency m_DocsCounts = new int[m_selectedTerms.size()]; //And add the new attributes for (int i = 0; i < m_selectedTerms.size(); i++) { int attrIdx = outputFormat.numAttributes(); int docsCount = docsCountsByTermIdx[i]; m_DocsCounts[i] = docsCount; outputFormat.insertAttributeAt(new Attribute(m_Prefix + m_selectedTerms.get(i)), attrIdx); } return outputFormat; }
From source file:etc.aloe.filters.WordFeaturesExtractor.java
License:Open Source License
private Instances generateOutputFormat(Instances inputFormat) { Instances outputFormat = new Instances(inputFormat, 0); //Add the new columns. There is one for each unigram and each bigram. for (int i = 0; i < unigrams.size(); i++) { String name = "uni_" + unigrams.get(i); Attribute attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); }/*from w w w . jav a 2 s.com*/ for (int i = 0; i < bigrams.size(); i++) { String name = "bi_" + bigrams.get(i); Attribute attr = new Attribute(name); outputFormat.insertAttributeAt(attr, outputFormat.numAttributes()); } return outputFormat; }
From source file:examples.TrainerFrame.java
private void jButtonTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButtonTrainActionPerformed //This is a temporary fix to make it appear like its finished pBar.setMaximum(7);//from w ww .j ava2s .c o m pBar.setValue(0); pBar.repaint(); jLabelTrainerStatus.setText("Extracting Target Features"); //Generate Target Features String featuresTarget = null; new Thread(new TrainerFrame.thread1()).start(); try { featuresTarget = GlobalData.getFeatures(jTextFieldCallDirectory.getText()); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(1); pBar.repaint(); jLabelTrainerStatus.setText("Extracting Other Features"); //Generate Non-targe features Features String featuresOther = null; new Thread(new TrainerFrame.thread1()).start(); try { featuresOther = GlobalData.getFeatures(jTextFieldOtherSoundDirectory.getText()); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(2); pBar.repaint(); jLabelTrainerStatus.setText("Parsing Features"); //Load Target Arrf File BufferedReader readerTarget; Instances dataTarget = null; try { readerTarget = new BufferedReader(new FileReader(featuresTarget)); dataTarget = new Instances(readerTarget); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(3); pBar.repaint(); //Load Other Arrf File BufferedReader readerOther; Instances dataOther = null; try { readerOther = new BufferedReader(new FileReader(featuresOther)); dataOther = new Instances(readerOther); } catch (FileNotFoundException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(4); pBar.repaint(); jLabelTrainerStatus.setText("Training Classifier"); Instances newData = new Instances(dataTarget); FastVector typeList = new FastVector() { }; typeList.add("target"); typeList.add("other"); newData.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList), newData.numAttributes()); for (Instance instance : newData) { instance.setValue(newData.numAttributes() - 1, "target"); } dataOther.insertAttributeAt(new Attribute("NewNominal", (java.util.List<String>) typeList), dataOther.numAttributes()); for (Instance instance : dataOther) { instance.setValue(newData.numAttributes() - 1, "other"); newData.add(instance); } newData.setClassIndex(newData.numAttributes() - 1); pBar.setValue(5); pBar.repaint(); ArffSaver saver = new ArffSaver(); saver.setInstances(newData); try { saver.setFile(new File("AnimalCallTrainingFile.arff")); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } try { saver.writeBatch(); } catch (IOException ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } pBar.setValue(6); pBar.repaint(); //Train a classifier String[] options = new String[1]; options[0] = "-U"; J48 tree = new J48(); try { tree.setOptions(options); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } try { tree.buildClassifier(newData); } catch (Exception ex) { Logger.getLogger(TrainerFrame.class.getName()).log(Level.SEVERE, null, ex); } Debug.saveToFile("Classifiers/" + jTextFieldClassifierName.getText(), tree); System.out.println("classifier saved"); MyClassifier tempClass = new MyClassifier(jTextFieldClassifierName.getText()); GlobalData.classifierList.addElement(tempClass.name); pBar.setValue(7); pBar.repaint(); jLabelTrainerStatus.setText("Finished"); }
From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java
License:Open Source License
/** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception /*from w w w . j a va 2s. c o m*/ * * @param file * the .arff file * */ public ClassifierSet initializePopulation(final String file) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = InstancesUtility.openInstance(file); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, file); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); //System.out.println(partitions[i]); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomClusteringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } System.out.println(initialClassifiers); return initialClassifiers; }