List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:meka.core.MLUtils.java
License:Open Source License
/** * Stack two Instances together row-wise. *///from w w w . ja v a 2 s . c om public static final Instances combineInstances(Instances D1, Instances D2) { Instances D = new Instances(D1); for (int i = 0; i < D2.numInstances(); i++) { D.add(D2.instance(i)); } return D; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset/*from ww w. ja v a 2s . c o m*/ * @param L number of labels in the original dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances PSTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.PSUtils.java
License:Open Source License
/** * Transform instances into a multi-class representation. * @param D original dataset/*from w w w . j av a 2s.c om*/ * @param L number of labels in that dataset * @param cname class name for the new dataset (may want to encode the list of indices here for RAkEL-like methods) * @param p pruning value * @param n restoration value * @return transformed dataset */ public static Instances SLTransformation(Instances D, int L, String cname, int p, int n) { D = new Instances(D); // Gather combinations HashMap<LabelSet, Integer> distinctCombinations = PSUtils.countCombinationsSparse(D, L); // Prune combinations if (p > 0) MLUtils.pruneCountHashMap(distinctCombinations, p); // Check there are > 2 if (distinctCombinations.size() <= 1 && p > 0) { // ... or try again if not ... System.err.println("[Warning] You did too much pruning, setting P = P-1"); return PSTransformation(D, L, cname, p - 1, n); } // Create class attribute ArrayList<String> ClassValues = new ArrayList<String>(); for (LabelSet y : distinctCombinations.keySet()) ClassValues.add(y.toString()); Attribute C = new Attribute(cname, ClassValues); // Insert new special attribute (which has all possible combinations of labels) D.insertAttributeAt(C, L); D.setClassIndex(L); //Add class values int N = D.numInstances(); for (int i = 0; i < N; i++) { Instance x = D.instance(i); LabelSet y = new LabelSet(MLUtils.toSparseIntArray(x, L)); String y_string = y.toString(); // add it if (ClassValues.contains(y_string)) //if its class value exists x.setClassValue(y_string); // decomp else if (n > 0) { //String d_subsets[] = getTopNSubsets(comb,distinctCombinations,n); LabelSet d_subsets[] = PSUtils.getTopNSubsets(y, distinctCombinations, n); //LabelSet d_subsets[] = PSUtils.cover(y,distinctCombinations); if (d_subsets.length > 0) { // fast x.setClassValue(d_subsets[0].toString()); // additional if (d_subsets.length > 1) { for (int s_i = 1; s_i < d_subsets.length; s_i++) { Instance x_ = (Instance) (x).copy(); x_.setClassValue(d_subsets[s_i].toString()); D.add(x_); } } } else { x.setClassMissing(); } } } // remove with missing class D.deleteWithMissingClass(); try { D = F.removeLabels(D, L); } catch (Exception e) { // should never happen } D.setClassIndex(0); return D; }
From source file:meka.core.Result.java
License:Open Source License
/** * Convert a list of Results into an Instances. * @param results An ArrayList of Results * @return Instances/*from w ww .j a v a 2s. co m*/ */ public static Instances getResultsAsInstances(ArrayList<HashMap<String, Object>> metrics) { HashMap<String, Object> o_master = metrics.get(0); ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); for (String key : o_master.keySet()) { if (o_master.get(key) instanceof Double) { //System.out.println("key="+key); attInfo.add(new Attribute(key)); } } Instances resultInstances = new Instances("Results", attInfo, metrics.size()); for (HashMap<String, Object> o : metrics) { Instance rx = new DenseInstance(attInfo.size()); for (Attribute att : attInfo) { String name = att.name(); rx.setValue(att, (double) o.get(name)); } resultInstances.add(rx); } //System.out.println(""+resultInstances); return resultInstances; }
From source file:meka.core.SuperLabelUtils.java
License:Open Source License
/** * Super Label Transformation - transform dataset D into a dataset with <code>k</code> multi-class target attributes. * Use the NSR/PS-style pruning and recomposition, according to partition 'indices', and pruning values 'p' and 'n'. * @see PSUtils.PSTransformation/*from w ww. j a va 2 s . com*/ * @param indices m by k: m super variables, each relating to k original variables * @param D either multi-label or multi-target dataset * @param p pruning value * @param n subset relpacement value * @return a multi-target dataset */ public static Instances SLTransformation(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ // F.removeLabels(D_,L); for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! return D_; }
From source file:meka.experiment.statisticsexporters.WekaFilter.java
License:Open Source License
/** * Turns the statistics into Instances./*from w w w. java 2 s. co m*/ * * @param stats the statistics to convert * @return the generated data */ protected Instances toInstances(List<EvaluationStatistics> stats) { Instances result; ArrayList<Attribute> atts; List<String> headers; Instance inst; double[] values; int i; // header headers = EvaluationStatisticsUtils.headers(stats, true, true); atts = new ArrayList<>(); for (String header : headers) { if (header.equals(EvaluationStatistics.KEY_CLASSIFIER) || header.equals(EvaluationStatistics.KEY_RELATION)) atts.add(new Attribute(header, (List) null)); else atts.add(new Attribute(header)); } result = new Instances("stats", atts, stats.size()); // data for (EvaluationStatistics stat : stats) { values = new double[result.numAttributes()]; for (i = 0; i < values.length; i++) { if (headers.get(i).equals(EvaluationStatistics.KEY_CLASSIFIER)) values[i] = result.attribute(i).addStringValue(stat.getCommandLine()); else if (headers.get(i).equals(EvaluationStatistics.KEY_RELATION)) values[i] = result.attribute(i).addStringValue(stat.getRelation()); else if (stat.containsKey(headers.get(i))) values[i] = stat.get(headers.get(i)).doubleValue(); else values[i] = Utils.missingValue(); } inst = new DenseInstance(1.0, values); result.add(inst); } return result; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels - Make a new 'D', with labels made into superlabels, according to partition 'indices', and pruning values 'p' and 'n'. * @param D assume attributes in D labeled by original index * @return Instances with attributes at j and k moved to position L as (j,k), with classIndex = L-1 */// w w w . j a va2s . c om public static Instances mergeLabels(Instances D, int indices[][], int p, int n) { int L = D.classIndex(); int K = indices.length; ArrayList<String> values[] = new ArrayList[K]; HashMap<String, Integer> counts[] = new HashMap[K]; // create D_ Instances D_ = new Instances(D); // clear D_ for (int j = 0; j < L; j++) { D_.deleteAttributeAt(0); } // create atts for (int j = 0; j < K; j++) { int att[] = indices[j]; //int values[] = new int[2]; //getValues(indices,D,p); counts[j] = getCounts(D, att, p); Set<String> vals = counts[j].keySet(); //getValues(D,att,p); values[j] = new ArrayList(vals); D_.insertAttributeAt(new Attribute(encodeClass(att), new ArrayList(vals)), j); } // copy over values ArrayList<Integer> deleteList = new ArrayList<Integer>(); for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); for (int j = 0; j < K; j++) { String y = encodeValue(x, indices[j]); try { D_.instance(i).setValue(j, y); // y = } catch (Exception e) { // value not allowed deleteList.add(i); // mark it for deletion String y_close[] = NSR.getTopNSubsets(y, counts[j], n); // get N subsets for (int m = 0; m < y_close.length; m++) { //System.out.println("add "+y_close[m]+" "+counts[j]); Instance x_copy = (Instance) D_.instance(i).copy(); x_copy.setValue(j, y_close[m]); x_copy.setWeight(1.0 / y_close.length); D_.add(x_copy); } } } } // clean up Collections.sort(deleteList, Collections.reverseOrder()); //System.out.println("Deleting "+deleteList.size()+" defunct instances."); for (int i : deleteList) { D_.delete(i); } // set class D_.setClassIndex(K); // done! D = null; return D_; }
From source file:meka.gui.dataviewer.DataTableModel.java
License:Open Source License
/** * sorts the instances via the given attribute * * @param columnIndex the index of the column * @param ascending ascending if true, otherwise descending *//* w w w . ja v a 2s. c o m*/ public void sortInstances(int columnIndex, boolean ascending) { if ((columnIndex > 0) && (columnIndex < getColumnCount())) { addUndoPoint(); m_Data.stableSort(columnIndex - 1); if (!ascending) { Instances reversedData = new Instances(m_Data, m_Data.numInstances()); int i = m_Data.numInstances(); while (i > 0) { i--; int equalCount = 1; while ((i > 0) && (m_Data.instance(i).value(columnIndex - 1) == m_Data.instance(i - 1) .value(columnIndex - 1))) { equalCount++; i--; } int j = 0; while (j < equalCount) { reversedData.add(m_Data.instance(i + j)); j++; } } m_Data = reversedData; } notifyListener(new TableModelEvent(this)); } }
From source file:meka.gui.explorer.classify.PredictionsOnTestset.java
License:Open Source License
/** * Returns the action lister to use in the menu. * * @param history the current history//from www. jav a 2 s. c o m * @param index the selected history item * @return the listener */ @Override public ActionListener getActionListener(final ResultHistoryList history, final int index) { final MultiLabelClassifier classifier = (MultiLabelClassifier) getClassifier(history, index); final Instances header = getHeader(history, index); return new ActionListener() { @Override public void actionPerformed(ActionEvent e) { Runnable run = new Runnable() { @Override public void run() { ClassifyTab owner = (ClassifyTab) getOwner(); Instances test; owner.startBusy("Predictions on test..."); try { MLUtils.prepareData(owner.getTestData()); test = new Instances(owner.getTestData()); test.setClassIndex(owner.getTestData().classIndex()); String msg = header.equalHeadersMsg(test); if (msg != null) throw new IllegalArgumentException( "Model's training set and current test set are not compatible:\n" + msg); // collect predictions Instances predicted = new Instances(test, 0); for (int i = 0; i < test.numInstances(); i++) { double pred[] = classifier.distributionForInstance(test.instance(i)); // Cut off any [no-longer-needed] probabalistic information from MT classifiers. if (classifier instanceof MultiTargetClassifier) pred = Arrays.copyOf(pred, test.classIndex()); Instance predInst = (Instance) test.instance(i).copy(); for (int j = 0; j < pred.length; j++) predInst.setValue(j, pred[j]); predicted.add(predInst); if ((i + 1) % 100 == 0) owner.showStatus( "Predictions on test (" + (i + 1) + "/" + test.numInstances() + ")..."); } owner.finishBusy(); // display predictions DataViewerDialog dialog = new DataViewerDialog(GUIHelper.getParentFrame(owner), ModalityType.MODELESS); dialog.setDefaultCloseOperation(DataViewerDialog.DISPOSE_ON_CLOSE); dialog.setInstances(predicted); dialog.setSize(800, 600); dialog.setLocationRelativeTo(owner); dialog.setVisible(true); } catch (Exception e) { owner.handleException("Predictions failed on test set:", e); owner.finishBusy("Predictions failed: " + e); JOptionPane.showMessageDialog(owner, "Predictions failed:\n" + e, "Error", JOptionPane.ERROR_MESSAGE); } } }; ((ClassifyTab) getOwner()).start(run); } }; }
From source file:MetaBlocking.EnhancedMetaBlocking.FastImplementations.RedefinedCardinalityNodePruning.java
License:Open Source License
protected void verifyValidEntities(int entityId, Instances trainingInstances) { if (validEntities.isEmpty()) { return;//from w w w . ja v a 2 s. co m } topKEdges.clear(); minimumWeight = Double.MIN_VALUE; Iterator<Integer> it = validEntitiesNeighbor.iterator(); for (int neighborId : validEntities) { // System.out.println("comparison A" + entityId +" "+ neighborId); // if(entityId==2516) // System.out.println("2516 ---"); double weight = getWeight(entityId, neighborId); int blockId = it.next(); if (neighborId == 6792) System.out.println("ok"); if (weight < minimumWeight) { continue; } Comparison comparison = getComparison(entityId, neighborId); comparison.setUtilityMeasure(weight); comparison.blockId = blockId; topKEdges.add(comparison); if (threshold < topKEdges.size()) { Comparison lastComparison = topKEdges.poll(); minimumWeight = lastComparison.getUtilityMeasure(); } } nearestEntities[entityId] = new HashSet<Comparison>(topKEdges); Iterator<Comparison> itb = nearestEntities[entityId].iterator(); while (itb.hasNext()) { Comparison c = itb.next(); int neighborId_clean; int neighborId = c.getEntityId1() == entityId ? c.getEntityId2() : c.getEntityId1(); neighborId_clean = neighborId; if (neighborId_clean == 6792 || neighborId == 6792) System.out.println("ok"); if (cleanCleanER && entityId < datasetLimit) { neighborId += datasetLimit; } // // if (nearestEntities[neighborId] == null) { // continue; // } // // if (nearestEntities[neighborId].contains(c)) { // if(! (entityId < neighborId)) // continue; // } // System.out.println(entityId +" "+ neighborId); // if(entityId>datasetLimit){ // int temp=neighborId_clean; // neighborId=entityId; // entityId=temp; // } Comparison comp = new Comparison(true, entityId, neighborId_clean); final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(c.blockId, comp); if (commonBlockIndices == null) continue; double[] instanceValues = new double[8]; double ibf1 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(entityId, 0)); double ibf2 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(neighborId, 0)); instanceValues[0] = commonBlockIndices.size() * ibf1 * ibf2; double raccb = 0; for (Integer index1 : commonBlockIndices) { raccb += 1.0 / comparisonsPerBlock[index1]; } if (raccb < 1.0E-6) { raccb = 1.0E-6; } instanceValues[1] = raccb; instanceValues[2] = commonBlockIndices.size() / (redundantCPE[entityId] + redundantCPE[neighborId] - commonBlockIndices.size()); instanceValues[3] = nonRedundantCPE[entityId]; instanceValues[4] = nonRedundantCPE[neighborId]; // instanceValues[5] = ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2()); instanceValues[5] = neighborId; instanceValues[6] = entityId;//c.getUtilityMeasure(); //(Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) * getWeight(c.getEntityId1(), c.getEntityId2()+datasetLimit); instanceValues[7] = adp.isSuperfluous(c) == true ? 0 : 1;//adp.isSuperfluous(getComparison(c.getEntityId1(), c.getEntityId2()+datasetLimit))?1:0; Instance newInstance = new DenseInstance(1.0, instanceValues); newInstance.setDataset(trainingInstances); trainingInstances.add(newInstance); // for (int i = 5; i < instanceValues.length-1; i++) { // System.out.print(instanceValues[i] +" "); // } // System.out.println(); // if(instanceValues[6]!=instanceValues[5]) // System.out.println("erro"); // else // System.out.print("..."); } }