List of usage examples for weka.filters.unsupervised.attribute AddValues AddValues
AddValues
From source file:gov.va.chir.tagline.dao.DatasetUtil.java
License:Open Source License
@SuppressWarnings("unchecked") public static Instances createDataset(final Instances header, final Collection<Document> documents) throws Exception { // Update header to include all docIDs from the passed in documents // (Weka requires all values for nominal features) final Set<String> docIds = new TreeSet<String>(); for (Document document : documents) { docIds.add(document.getName());//from ww w . j a v a2 s . c om } final AddValues avf = new AddValues(); avf.setLabels(StringUtils.join(docIds, ",")); // Have to add 1 because SingleIndex.setValue() has a bug, expecting // the passed in index to be 1-based rather than 0-based. Why? I have // no idea. // Calling path: AddValues.setInputFormat() --> // SingleIndex.setUpper() --> // SingleIndex.setValue() avf.setAttributeIndex(String.valueOf(header.attribute(DOC_ID).index() + 1)); avf.setInputFormat(header); final Instances newHeader = Filter.useFilter(header, avf); final Instances instances = new Instances(newHeader, documents.size()); // Map attributes final Map<String, Attribute> attrMap = new HashMap<String, Attribute>(); final Enumeration<Attribute> en = newHeader.enumerateAttributes(); while (en.hasMoreElements()) { final Attribute attr = en.nextElement(); attrMap.put(attr.name(), attr); } attrMap.put(newHeader.classAttribute().name(), newHeader.classAttribute()); final Attribute docId = attrMap.get(DOC_ID); final Attribute lineId = attrMap.get(LINE_ID); final Attribute classAttr = attrMap.get(LABEL); // Add data for (Document document : documents) { final Map<String, Object> docFeatures = document.getFeatures(); for (Line line : document.getLines()) { final Instance instance = new DenseInstance(attrMap.size()); final Map<String, Object> lineFeatures = line.getFeatures(); lineFeatures.putAll(docFeatures); instance.setValue(docId, document.getName()); instance.setValue(lineId, line.getLineId()); if (line.getLabel() == null) { instance.setMissing(classAttr); } else { instance.setValue(classAttr, line.getLabel()); } for (Attribute attribute : attrMap.values()) { if (!attribute.equals(docId) && !attribute.equals(lineId) && !attribute.equals(classAttr)) { final String name = attribute.name(); final Object obj = lineFeatures.get(name); if (obj instanceof Double) { instance.setValue(attribute, ((Double) obj).doubleValue()); } else if (obj instanceof Integer) { instance.setValue(attribute, ((Integer) obj).doubleValue()); } else { instance.setValue(attribute, obj.toString()); } } } instances.add(instance); } } // Set last attribute as class instances.setClassIndex(attrMap.size() - 1); return instances; }
From source file:wekimini.DataManager.java
private void updateInstancesForNewHigherMaxClass(int index, int newNumClasses) { //Change allInstances, dummyInstances // dummyInstances.attribute(numMetaData + numInputs + index). AddValues a = new AddValues(); int oldMaxClasses = numClasses[index]; StringBuilder sb = new StringBuilder(); for (int i = oldMaxClasses + 1; i < newNumClasses; i++) { sb.append(Integer.toString(i)).append(","); }//from w w w . j a va2 s. c om sb.append(Integer.toString(newNumClasses)); Instances newAll; try { a.setAttributeIndex(Integer.toString(numMetaData + numInputs + index + 1)); //Weka indexing stupidity a.setLabels(sb.toString()); a.setSort(false); a.setInputFormat(allInstances); newAll = Filter.useFilter(allInstances, a); } catch (Exception ex) { Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex); return; } if (newAll.numInstances() != allInstances.numInstances()) { logger.log(Level.SEVERE, "Problem: deleted instances when removing class attribute"); } allInstances = newAll; Instances newD; try { newD = Filter.useFilter(dummyInstances, a); } catch (Exception ex) { Logger.getLogger(DataManager.class.getName()).log(Level.SEVERE, null, ex); return; } dummyInstances = newD; }