List of usage examples for weka.core Attribute isString
public finalboolean isString()
From source file:adams.flow.sink.WekaInstanceViewer.java
License:Open Source License
/** * Displays the token (the panel and dialog have already been created at * this stage)./*from w ww . j av a 2 s. co m*/ * * @param token the token to display */ @Override protected void display(Token token) { InstanceContainerManager manager; InstanceContainer cont; weka.core.Instance winst; weka.core.Attribute att; String id; adams.data.instance.Instance inst; if (token.getPayload() instanceof weka.core.Instance) { winst = (weka.core.Instance) token.getPayload(); inst = new adams.data.instance.Instance(); inst.set(winst); if (!m_ID.isEmpty()) { att = winst.dataset().attribute(m_ID); if (att != null) { if (att.isNominal() || att.isString()) id = winst.stringValue(att.index()); else id = "" + winst.value(att.index()); inst.setID(id); } } } else { inst = (adams.data.instance.Instance) token.getPayload(); if (inst.hasReport() && inst.getReport().hasValue(m_ID)) inst.setID("" + inst.getReport().getValue(new Field(m_ID, DataType.UNKNOWN))); } manager = m_InstancePanel.getContainerManager(); cont = manager.newContainer(inst); manager.startUpdate(); manager.add(cont); m_Updater.update(m_InstancePanel, cont); }
From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java
License:Open Source License
/** * This method makes one or more decisions about a single object, returning those decisions as * Features in a vector.//from www. jav a 2 s . c o m * * @param exampleFeatures The example's array of feature indices. * @param exampleValues The example's array of feature values. * @return A feature vector with a single feature containing the prediction for this example. **/ public FeatureVector classify(int[] exampleFeatures, double[] exampleValues) { if (!trained) { System.err.println( "WekaWrapper: Error - Cannot make a classification with an " + "untrained classifier."); new Exception().printStackTrace(); System.exit(1); } /* * Assuming that the first Attribute in our attributeInfo vector is the class attribute, * decide which case we are in */ Attribute classAtt = (Attribute) attributeInfo.elementAt(0); if (classAtt.isNominal() || classAtt.isString()) { double[] dist = getDistribution(exampleFeatures, exampleValues); int best = 0; for (int i = 1; i < dist.length; ++i) if (dist[i] > dist[best]) best = i; Feature label = labelLexicon.lookupKey(best); if (label == null) return new FeatureVector(); String value = label.getStringValue(); return new FeatureVector(new DiscretePrimitiveStringFeature(containingPackage, name, "", value, valueIndexOf(value), (short) allowableValues().length)); } else if (classAtt.isNumeric()) { return new FeatureVector(new RealPrimitiveStringFeature(containingPackage, name, "", getDistribution(exampleFeatures, exampleValues)[0])); } else { System.err.println("WekaWrapper: Error - illegal class type."); new Exception().printStackTrace(); System.exit(1); } return new FeatureVector(); }
From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java
License:Open Source License
/** * Produces a set of scores indicating the degree to which each possible discrete classification * value is associated with the given example object. **//*from w ww. j ava 2s .c o m*/ public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { double[] dist = getDistribution(exampleFeatures, exampleValues); /* * Assuming that the first Attribute in our attributeInfo vector is the class attribute, * decide which case we are in */ Attribute classAtt = (Attribute) attributeInfo.elementAt(0); ScoreSet scores = new ScoreSet(); if (classAtt.isNominal() || classAtt.isString()) { Enumeration enumeratedValues = classAtt.enumerateValues(); int i = 0; while (enumeratedValues.hasMoreElements()) { if (i >= dist.length) { System.err.println( "WekaWrapper: Error - scores found more possible values than " + "probabilities."); new Exception().printStackTrace(); System.exit(1); } double s = dist[i]; String v = (String) enumeratedValues.nextElement(); scores.put(v, s); ++i; } } else if (classAtt.isNumeric()) { System.err.println("WekaWrapper: Error - The 'scores' function should not be called " + "when the class attribute is numeric."); new Exception().printStackTrace(); System.exit(1); } else { System.err.println( "WekaWrapper: Error - ScoreSet: Class Types must be either " + "Nominal, String, or Numeric."); new Exception().printStackTrace(); System.exit(1); } return scores; }
From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java
License:Open Source License
/** * Convert a single instance over if the class is nominal. The converted * instance is added to the end of the output queue. * * @param instance the instance to convert *///w ww . j ava 2s.c om private void convertInstance(Instance instance) { // create a copy of the input instance Instance inst = null; if (instance instanceof SparseInstance) { inst = new SparseInstance(instance.weight(), instance.toDoubleArray()); } else { inst = new DenseInstance(instance.weight(), instance.toDoubleArray()); } // copy the string values from this instance as well (only the existing ones) inst.setDataset(getOutputFormat()); copyValues(inst, false, instance.dataset(), getOutputFormat()); // beware of weird behavior of this function (see source)!! inst.setDataset(getOutputFormat()); // find the missing values to be filled + the double values for the new "missing" label and store it double[] vals = instance.toDoubleArray(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = instance.attribute(j); if (m_Columns.isInRange(j) && instance.isMissing(j)) { // find the "missing" value in the output nominal attribute if (att.isNominal()) { vals[j] = inst.dataset().attribute(j).indexOfValue(m_ReplVal); } // add a string value for the new "missing" label else if (att.isString()) { vals[j] = inst.dataset().attribute(j).addStringValue(m_ReplVal); } } } // fill in the missing values found inst.replaceMissingValues(vals); push(inst); }
From source file:org.opentox.jaqpot3.qsar.util.AttributeCleanup.java
License:Open Source License
private Instances remove(Instances input) throws QSARException { Remove remove = new Remove(); ArrayList<Integer> attributeList = new ArrayList<Integer>(); for (int i = 0; i < input.numAttributes(); i++) { Attribute attribute = input.attribute(i); if ((attribute.name().equals("compound_uri") || attribute.name().equalsIgnoreCase("uri")) && isKeepCompoundURI()) { continue; } else if (!isKeepCompoundURI() && (attribute.name().equals("compound_uri") || attribute.name().equalsIgnoreCase("uri"))) { attributeList.add(i);//from w ww. j a v a 2 s .co m } if (attribute.isNominal() && toBeRemoved.contains(AttributeType.nominal)) { attributeList.add(i); continue; } else if (attribute.isString() && toBeRemoved.contains(AttributeType.string)) { attributeList.add(i); continue; } else if (attribute.isNumeric() && toBeRemoved.contains(AttributeType.numeric)) { attributeList.add(i); continue; } } int[] attributeIndices = new int[attributeList.size()]; for (int i = 0; i < attributeList.size(); i++) { attributeIndices[i] = attributeList.get(i).intValue(); } remove.setAttributeIndicesArray(attributeIndices); try { remove.setInputFormat(input); } catch (Exception ex) { throw new QSARException("FilteringError: Invalid input format " + "for attribute-type removing filter", ex); } Instances output; try { output = Remove.useFilter(input, remove); } catch (Exception ex) { throw new QSARException("FilteringError: The filter is unable to " + "remove the specified types :" + toBeRemoved.toString(), ex); } return output; }
From source file:org.opentox.toxotis.factory.DatasetFactory.java
License:Open Source License
/** * Create a {@link DataEntry data entry} from a single instance. * @param instance/*from w w w.jav a2 s .c om*/ * @return * A Data Entry that corresponds to the provided instance. * @throws ToxOtisException */ public DataEntry createDataEntry(Instance instance) throws ToxOtisException { Enumeration attributes = instance.enumerateAttributes(); DataEntry de = new DataEntry(); try { while (attributes.hasMoreElements()) { Attribute attribute = (Attribute) attributes.nextElement(); if (attribute.name().equals(Dataset.COMPOUND_URI) || attribute.name().equals("URI")) { de.setConformer(new Compound(new VRI(instance.stringValue(attribute)))); } else { FeatureValue fv = new FeatureValue(); Feature feature = new Feature(new VRI(attribute.name())); LiteralValue value = null; if (attribute.isNumeric()) { value = new LiteralValue<Double>(instance.value(attribute), XSDDatatype.XSDdouble); feature.getOntologicalClasses().add(OTClasses.numericFeature()); } else if (attribute.isString() || attribute.isDate()) { value = new LiteralValue<String>(instance.stringValue(attribute), XSDDatatype.XSDstring); feature.getOntologicalClasses().add(OTClasses.stringFeature()); } else if (attribute.isNominal()) { value = new LiteralValue<String>(instance.stringValue(attribute), XSDDatatype.XSDstring); Enumeration nominalValues = attribute.enumerateValues(); feature.getOntologicalClasses().add(OTClasses.nominalFeature()); while (nominalValues.hasMoreElements()) { String nomValue = (String) nominalValues.nextElement(); feature.getAdmissibleValues() .add(new LiteralValue<String>(nomValue, XSDDatatype.XSDstring)); } } fv.setFeature(feature); fv.setValue(value); de.addFeatureValue(fv); } } } catch (URISyntaxException ex) { throw new ToxOtisException(ex); } return de; }
From source file:org.packDataMining.SMOTE.java
License:Open Source License
/** * The procedure implementing the SMOTE algorithm. The output * instances are pushed onto the output queue for collection. * //from ww w . j a va 2s. co m * @throws Exception if provided options cannot be executed * on input instances */ protected void doSMOTE() throws Exception { int minIndex = 0; int min = Integer.MAX_VALUE; if (m_DetectMinorityClass) { // find minority class int[] classCounts = getInputFormat().attributeStats(getInputFormat().classIndex()).nominalCounts; for (int i = 0; i < classCounts.length; i++) { if (classCounts[i] != 0 && classCounts[i] < min) { min = classCounts[i]; minIndex = i; } } } else { String classVal = getClassValue(); if (classVal.equalsIgnoreCase("first")) { minIndex = 1; } else if (classVal.equalsIgnoreCase("last")) { minIndex = getInputFormat().numClasses(); } else { minIndex = Integer.parseInt(classVal); } if (minIndex > getInputFormat().numClasses()) { throw new Exception("value index must be <= the number of classes"); } minIndex--; // make it an index } int nearestNeighbors; if (min <= getNearestNeighbors()) { nearestNeighbors = min - 1; } else { nearestNeighbors = getNearestNeighbors(); } if (nearestNeighbors < 1) throw new Exception("Cannot use 0 neighbors!"); // compose minority class dataset // also push all dataset instances Instances sample = getInputFormat().stringFreeStructure(); Enumeration instanceEnum = getInputFormat().enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); push((Instance) instance.copy()); if ((int) instance.classValue() == minIndex) { sample.add(instance); } } // compute Value Distance Metric matrices for nominal features Map vdmMap = new HashMap(); Enumeration attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { if (attr.isNominal() || attr.isString()) { double[][] vdm = new double[attr.numValues()][attr.numValues()]; vdmMap.put(attr, vdm); int[] featureValueCounts = new int[attr.numValues()]; int[][] featureValueCountsByClass = new int[getInputFormat().classAttribute().numValues()][attr .numValues()]; instanceEnum = getInputFormat().enumerateInstances(); while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); int value = (int) instance.value(attr); int classValue = (int) instance.classValue(); featureValueCounts[value]++; featureValueCountsByClass[classValue][value]++; } for (int valueIndex1 = 0; valueIndex1 < attr.numValues(); valueIndex1++) { for (int valueIndex2 = 0; valueIndex2 < attr.numValues(); valueIndex2++) { double sum = 0; for (int classValueIndex = 0; classValueIndex < getInputFormat() .numClasses(); classValueIndex++) { double c1i = (double) featureValueCountsByClass[classValueIndex][valueIndex1]; double c2i = (double) featureValueCountsByClass[classValueIndex][valueIndex2]; double c1 = (double) featureValueCounts[valueIndex1]; double c2 = (double) featureValueCounts[valueIndex2]; double term1 = c1i / c1; double term2 = c2i / c2; sum += Math.abs(term1 - term2); } vdm[valueIndex1][valueIndex2] = sum; } } } } } // use this random source for all required randomness Random rand = new Random(getRandomSeed()); // find the set of extra indices to use if the percentage is not evenly divisible by 100 List extraIndices = new LinkedList(); double percentageRemainder = (getPercentage() / 100) - Math.floor(getPercentage() / 100.0); int extraIndicesCount = (int) (percentageRemainder * sample.numInstances()); if (extraIndicesCount >= 1) { for (int i = 0; i < sample.numInstances(); i++) { extraIndices.add(i); } } Collections.shuffle(extraIndices, rand); extraIndices = extraIndices.subList(0, extraIndicesCount); Set extraIndexSet = new HashSet(extraIndices); // the main loop to handle computing nearest neighbors and generating SMOTE // examples from each instance in the original minority class data Instance[] nnArray = new Instance[nearestNeighbors]; for (int i = 0; i < sample.numInstances(); i++) { Instance instanceI = sample.instance(i); // find k nearest neighbors for each instance List distanceToInstance = new LinkedList(); for (int j = 0; j < sample.numInstances(); j++) { Instance instanceJ = sample.instance(j); if (i != j) { double distance = 0; attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { double iVal = instanceI.value(attr); double jVal = instanceJ.value(attr); if (attr.isNumeric()) { distance += Math.pow(iVal - jVal, 2); } else { distance += ((double[][]) vdmMap.get(attr))[(int) iVal][(int) jVal]; } } } distance = Math.pow(distance, .5); distanceToInstance.add(new Object[] { distance, instanceJ }); } } // sort the neighbors according to distance Collections.sort(distanceToInstance, new Comparator() { public int compare(Object o1, Object o2) { double distance1 = (Double) ((Object[]) o1)[0]; double distance2 = (Double) ((Object[]) o2)[0]; return (int) Math.ceil(distance1 - distance2); } }); // populate the actual nearest neighbor instance array Iterator entryIterator = distanceToInstance.iterator(); int j = 0; while (entryIterator.hasNext() && j < nearestNeighbors) { nnArray[j] = (Instance) ((Object[]) entryIterator.next())[1]; j++; } // create synthetic examples int n = (int) Math.floor(getPercentage() / 100); while (n > 0 || extraIndexSet.remove(i)) { double[] values = new double[sample.numAttributes()]; int nn = rand.nextInt(nearestNeighbors); attrEnum = getInputFormat().enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); if (!attr.equals(getInputFormat().classAttribute())) { if (attr.isNumeric()) { double dif = nnArray[nn].value(attr) - instanceI.value(attr); double gap = rand.nextDouble(); values[attr.index()] = (double) (instanceI.value(attr) + gap * dif); } else if (attr.isDate()) { double dif = nnArray[nn].value(attr) - instanceI.value(attr); double gap = rand.nextDouble(); values[attr.index()] = (long) (instanceI.value(attr) + gap * dif); } else { int[] valueCounts = new int[attr.numValues()]; int iVal = (int) instanceI.value(attr); valueCounts[iVal]++; for (int nnEx = 0; nnEx < nearestNeighbors; nnEx++) { int val = (int) nnArray[nnEx].value(attr); valueCounts[val]++; } int maxIndex = 0; int max = Integer.MIN_VALUE; for (int index = 0; index < attr.numValues(); index++) { if (valueCounts[index] > max) { max = valueCounts[index]; maxIndex = index; } } values[attr.index()] = maxIndex; } } } values[sample.classIndex()] = minIndex; Instance synthetic = new Instance(1.0, values); push(synthetic); n--; } } }
From source file:org.pentaho.di.scoring.WekaScoringData.java
License:Open Source License
/** * Finds a mapping between the attributes that a Weka model has been trained * with and the incoming Kettle row format. Returns an array of indices, where * the element at index 0 of the array is the index of the Kettle field that * corresponds to the first attribute in the Instances structure, the element * at index 1 is the index of the Kettle fields that corresponds to the second * attribute, .../*from w w w . j a v a2 s . co m*/ * * @param header the Instances header * @param inputRowMeta the meta data for the incoming rows * @return the mapping as an array of integer indices */ public static int[] findMappings(Instances header, RowMetaInterface inputRowMeta) { // Instances header = m_model.getHeader(); int[] mappingIndexes = new int[header.numAttributes()]; HashMap<String, Integer> inputFieldLookup = new HashMap<String, Integer>(); for (int i = 0; i < inputRowMeta.size(); i++) { ValueMetaInterface inField = inputRowMeta.getValueMeta(i); inputFieldLookup.put(inField.getName(), Integer.valueOf(i)); } // check each attribute in the header against what is incoming for (int i = 0; i < header.numAttributes(); i++) { Attribute temp = header.attribute(i); String attName = temp.name(); // look for a matching name Integer matchIndex = inputFieldLookup.get(attName); boolean ok = false; int status = NO_MATCH; if (matchIndex != null) { // check for type compatibility ValueMetaInterface tempField = inputRowMeta.getValueMeta(matchIndex.intValue()); if (tempField.isNumeric() || tempField.isBoolean()) { if (temp.isNumeric()) { ok = true; status = 0; } else { status = TYPE_MISMATCH; } } else if (tempField.isString()) { if (temp.isNominal() || temp.isString()) { ok = true; status = 0; // All we can assume is that this input field is ok. // Since we wont know what the possible values are // until the data is pumping throug, we will defer // the matching of legal values until then } else { status = TYPE_MISMATCH; } } else { // any other type is a mismatch (might be able to do // something with dates at some stage) status = TYPE_MISMATCH; } } if (ok) { mappingIndexes[i] = matchIndex.intValue(); } else { // mark this attribute as missing or type mismatch mappingIndexes[i] = status; } } return mappingIndexes; }
From source file:org.pentaho.di.scoring.WekaScoringDialog.java
License:Open Source License
/** * Build a string that shows the mappings between Weka attributes and incoming * Kettle fields.//from www. j ava 2s.c om * * @param model a <code>WekaScoringModel</code> value */ private void mappingString(WekaScoringModel model) { try { StepMeta stepMetaTemp = transMeta.findStep(stepname); if (stepMetaTemp != null) { RowMetaInterface rowM = transMeta.getPrevStepFields(stepMetaTemp); Instances header = model.getHeader(); int[] mappings = WekaScoringData.findMappings(header, rowM); StringBuffer result = new StringBuffer(header.numAttributes() * 10); int maxLength = 0; for (int i = 0; i < header.numAttributes(); i++) { if (header.attribute(i).name().length() > maxLength) { maxLength = header.attribute(i).name().length(); } } maxLength += 12; // length of " (nominal)"/" (numeric)" int minLength = 16; // "Model attributes".length() String headerS = BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.ModelAttsHeader"); //$NON-NLS-1$ String sep = "----------------"; //$NON-NLS-1$ if (maxLength < minLength) { maxLength = minLength; } headerS = getFixedLengthString(headerS, ' ', maxLength); sep = getFixedLengthString(sep, '-', maxLength); sep += "\t ----------------\n"; //$NON-NLS-1$ headerS += "\t " //$NON-NLS-1$ + BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.IncomingFields") + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ result.append(headerS); result.append(sep); for (int i = 0; i < header.numAttributes(); i++) { Attribute temp = header.attribute(i); String attName = "("; //$NON-NLS-1$ if (temp.isNumeric()) { attName += BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.Numeric") + ")"; //$NON-NLS-1$ //$NON-NLS-2$ } else if (temp.isNominal()) { attName += BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.Nominal") + ")"; //$NON-NLS-1$ //$NON-NLS-2$ } else if (temp.isString()) { attName += BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.String") + ")"; //$NON-NLS-1$ //$NON-NLS-2$ } attName += (" " + temp.name()); //$NON-NLS-1$ attName = getFixedLengthString(attName, ' ', maxLength); attName += "\t--> "; //$NON-NLS-1$ result.append(attName); String inFieldNum = ""; //$NON-NLS-1$ if (mappings[i] == WekaScoringData.NO_MATCH) { inFieldNum += "- "; //$NON-NLS-1$ result.append(inFieldNum + BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.MissingNoMatch") + "\n"); //$NON-NLS-2$ //$NON-NLS-2$ } else if (mappings[i] == WekaScoringData.TYPE_MISMATCH) { inFieldNum += (rowM.indexOfValue(temp.name()) + 1) + " "; //$NON-NLS-1$ result.append(inFieldNum + BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.MissingTypeMismatch") + "\n"); //$NON-NLS-2$ //$NON-NLS-2$ } else { ValueMetaInterface tempField = rowM.getValueMeta(mappings[i]); String fieldName = "" + (mappings[i] + 1) + " ("; //$NON-NLS-1$ //$NON-NLS-2$ if (tempField.isBoolean()) { fieldName += BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.Boolean") + ")"; //$NON-NLS-2$ //$NON-NLS-2$ } else if (tempField.isNumeric()) { fieldName += BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.Numeric") + ")"; //$NON-NLS-2$ //$NON-NLS-2$ } else if (tempField.isString()) { fieldName += BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Mapping.String") + ")"; //$NON-NLS-2$ //$NON-NLS-2$ } fieldName += " " + tempField.getName(); //$NON-NLS-1$ result.append(fieldName + "\n"); //$NON-NLS-1$ } } // set the text of the text area in the Mappings tab m_wMappingText.setText(result.toString()); } } catch (KettleException e) { log.logError(BaseMessages.getString(WekaScoringMeta.PKG, "WekaScoringDialog.Log.UnableToFindInput")); //$NON-NLS-1$ return; } }