List of usage examples for weka.core Attribute enumerateValues
public finalEnumeration<Object> enumerateValues()
From source file:cezeri.utils.FactoryInstance.java
public static String[] getOriginalClasses(Instances data) { Attribute att = data.attribute(data.classIndex()); String[] ret = new String[data.numClasses()]; Enumeration enu = att.enumerateValues(); int q = 0;/*from ww w. jav a 2s . c o m*/ while (enu.hasMoreElements()) { ret[q++] = (String) enu.nextElement(); } return ret; }
From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java
License:Apache License
/** * Get Samoa attribute from a weka attribute. * * @param index the index/*from w ww. j a v a2 s. co m*/ * @param attribute the attribute * @return the attribute */ protected Attribute samoaAttribute(int index, weka.core.Attribute attribute) { Attribute samoaAttribute; if (attribute.isNominal()) { Enumeration enu = attribute.enumerateValues(); List<String> attributeValues = new ArrayList<String>(); while (enu.hasMoreElements()) { attributeValues.add((String) enu.nextElement()); } samoaAttribute = new Attribute(attribute.name(), attributeValues); } else { samoaAttribute = new Attribute(attribute.name()); } return samoaAttribute; }
From source file:de.ugoe.cs.cpdp.dataprocessing.NominalAttributeFilter.java
License:Apache License
@Override public void apply(Instances testdata, Instances traindata) { int indexOfConfidenceAttribute = -1; // Find index of the named confidence attribute to filter for for (int i = 0; i < traindata.numAttributes(); i++) { if (traindata.attribute(i).name().equals(nominalAttributeName)) { indexOfConfidenceAttribute = i; }/* w ww.j ava 2 s . c o m*/ } // if it was not found return if (indexOfConfidenceAttribute == -1) { return; } // Find index of nominal values Attribute confidenceAttribute = traindata.attribute(indexOfConfidenceAttribute); ArrayList<Object> nominalValuesOfConfidenceAttribute = Collections .list(confidenceAttribute.enumerateValues()); ArrayList<Double> indexOfnominalAttributeValues = new ArrayList<Double>(); for (int k = 0; k < nominalValuesOfConfidenceAttribute.size(); k++) { for (String attributeValue : nominalAttributeValues) { if (((String) nominalValuesOfConfidenceAttribute.get(k)).equals(attributeValue)) { indexOfnominalAttributeValues.add((double) k); } } } // Go through all instances and check if nominal attribute equals for (int j = traindata.numInstances() - 1; j >= 0; j--) { Instance wekaInstance = traindata.get(j); // delete all instances where nominal attribute has the value of one of the parameter if (indexOfnominalAttributeValues.contains(wekaInstance.value(indexOfConfidenceAttribute))) { traindata.delete(j); } } }
From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java
License:Open Source License
/** * Produces a set of scores indicating the degree to which each possible discrete classification * value is associated with the given example object. **///from w w w . java 2 s .co m public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) { double[] dist = getDistribution(exampleFeatures, exampleValues); /* * Assuming that the first Attribute in our attributeInfo vector is the class attribute, * decide which case we are in */ Attribute classAtt = (Attribute) attributeInfo.elementAt(0); ScoreSet scores = new ScoreSet(); if (classAtt.isNominal() || classAtt.isString()) { Enumeration enumeratedValues = classAtt.enumerateValues(); int i = 0; while (enumeratedValues.hasMoreElements()) { if (i >= dist.length) { System.err.println( "WekaWrapper: Error - scores found more possible values than " + "probabilities."); new Exception().printStackTrace(); System.exit(1); } double s = dist[i]; String v = (String) enumeratedValues.nextElement(); scores.put(v, s); ++i; } } else if (classAtt.isNumeric()) { System.err.println("WekaWrapper: Error - The 'scores' function should not be called " + "when the class attribute is numeric."); new Exception().printStackTrace(); System.exit(1); } else { System.err.println( "WekaWrapper: Error - ScoreSet: Class Types must be either " + "Nominal, String, or Numeric."); new Exception().printStackTrace(); System.exit(1); } return scores; }
From source file:elh.eus.absa.Features.java
License:Open Source License
/** * Creates a feature set from a previously saved model. This allows to load previously saved feature sets. * /*from w ww. java 2 s . co m*/ * @param model string: path to the serialized model containing header information * @throws IOException */ private void createFeatureSetFromModel(String model) throws IOException { try { WekaWrapper ww = new WekaWrapper(model); Instances header = ww.loadHeader(model); int attNum = header.numAttributes(); for (int i = 0; i < attNum; i++) { Attribute att = header.attribute(i); String name = att.name(); if (att.isNumeric()) { addNumericFeature(name); //System.out.println("numeric feature: "+name); } else if (att.isNominal()) { //System.out.println("nominal feature: "+name+" - "+att.toString()); ArrayList<String> vals = new ArrayList<String>(); Enumeration<Object> e = att.enumerateValues(); while (e.hasMoreElements()) { vals.add(e.nextElement().toString()); } addNominalFeature(name, vals); } } //General polarity lexicon if (header.attribute("polLexGen_posScore") != null) { this.polarLexiconGen = new Lexicon(new File(params.getProperty("polarLexiconGeneral")), "lemma"); System.err.println("Features : createFeatureSet() - General polarity lexicon loaded -> " + params.getProperty("polarLexiconGeneral") + " (" + this.polarLexiconGen.size() + " entries)"); System.out.println("Features : createFeatureSet() - General polarity lexicon loaded -> " + params.getProperty("polarLexiconGeneral") + " (" + this.polarLexiconGen.size() + " entries)"); } //Domain polarity lexicon if (header.attribute("polLexDom_posScore") != null) { //this.polarLexiconDom = loadPolarityLexiconFromFile(params.getProperty("polarLexiconDomain"), "polLexDom_"); this.polarLexiconDom = new Lexicon(new File(params.getProperty("polarLexiconDomain")), "lemma"); System.err.println("Features : createFeatureSet() - Domain polarity lexicon loaded -> " + params.getProperty("polarLexiconDomain") + " (" + this.polarLexiconDom.size() + " entries)"); System.out.println("Features : createFeatureSet() - Domain polarity lexicon loaded -> " + params.getProperty("polarLexiconDomain") + " (" + this.polarLexiconDom.size() + " entries)"); } // Load clark cluster category info from files loadClusterFeatures("clark"); // Load brown cluster category info from files loadClusterFeatures("brown"); // Load word2vec cluster category info from files loadClusterFeatures("word2vec"); } catch (Exception e) { System.err.println("Features::createFeatureSetFromFile -> error when loading model header"); e.printStackTrace(); } }
From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java
License:Open Source License
/** * Set the output format if the class is nominal. *///from www .j av a 2 s .co m private void setOutputFormat() { FastVector newAtts; Instances outputFormat; newAtts = new FastVector(); BitSet attrSrc = new BitSet(); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = null; Attribute srcAtt = getInputFormat().attribute(j); if (!m_Columns.isInRange(j) || srcAtt.indexOfValue(m_ReplVal) >= 0) { att = (Attribute) srcAtt.copy(); } else if (srcAtt.isNominal()) { Enumeration<String> valsEnum = srcAtt.enumerateValues(); ArrayList<String> valsList = new ArrayList<String>(); while (valsEnum.hasMoreElements()) { valsList.add(valsEnum.nextElement()); } valsList.add(m_ReplVal); att = new Attribute(srcAtt.name(), valsList); } else { // string attributes att = (Attribute) srcAtt.copy(); att.addStringValue(m_ReplVal); } newAtts.addElement(att); attrSrc.set(j); } outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0); outputFormat.setClassIndex(getInputFormat().classIndex()); setOutputFormat(outputFormat); m_StringToCopy = new AttributeLocator(getInputFormat(), Attribute.STRING, MathUtils.findTrue(attrSrc)); }
From source file:gov.va.chir.tagline.TagLineEvaluator.java
License:Open Source License
private List<Set<Object>> getFoldDocIds(final Attribute attrDocId) { // Setup list of docs per fold final List<Set<Object>> folds = new ArrayList<Set<Object>>(); for (int i = 0; i < numFolds; i++) { folds.add(new HashSet<Object>()); }//ww w . j a v a 2s.co m // Get distinct values final List<Object> docIds = new ArrayList<Object>(); final Enumeration<?> enumer = attrDocId.enumerateValues(); while (enumer.hasMoreElements()) { docIds.add((Object) enumer.nextElement()); } if (docIds.size() < numFolds) { throw new IllegalStateException(String.format( "Number of folds must be less than or equal to number of " + "distinct document IDs [num folds = %d | " + "num distinct document IDs = %d]", numFolds, docIds.size())); } // Randomly assign doc IDs to folds final Random random = new Random(randomSeed); int i = 0; int selected = -1; while (!docIds.isEmpty()) { selected = random.nextInt(docIds.size()); folds.get(i).add(docIds.get(selected)); docIds.remove(selected); if (++i >= numFolds) { i = 0; } } return folds; }
From source file:lu.lippmann.cdb.graph.GraphUtil.java
License:Open Source License
public static void updateVariables(final Instances dataSet, final GraphWithOperations gr) { for (final CVariable var : gr.getVariables()) { final Attribute attribute = dataSet.attribute(var.getKey()); if (attribute == null) throw new IllegalStateException("Attribute '" + var.getKey() + "' not found in dataset!?"); if (attribute.isNominal()) { var.setType(CadralType.ENUMERATION); final List<String> values = new ArrayList<String>(); final Enumeration<?> eval = attribute.enumerateValues(); while (eval.hasMoreElements()) { values.add((String) eval.nextElement()); }/*from w ww . j a va2s . c o m*/ var.setValues(values); } else if (attribute.isNumeric()) { var.setType(CadralType.NUMERIC); } else { var.setType(CadralType.UNKNOWN); } } }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
public Shih2010(Instances instances, boolean ignoreClass, boolean needsToResample, double theta) { try {/* w ww . j ava 2 s . co m*/ this.instances = instances; this.theta = theta; this.resample = needsToResample; if (needsToResample) { final Resample rs = new Resample(); if (this.instances.numInstances() > MAX_INSTANCES_TAKEN) { rs.setInputFormat(instances); rs.setSampleSizePercent(MAX_INSTANCES_TAKEN * 100.0 / this.instances.numInstances()); this.instances = Filter.useFilter(instances, rs); } } //System.out.println("Size = " + this.instances.numInstances()); this.mapDomain = new HashMap<Integer, Set<String>>(); if (ignoreClass) { this.instances.setClassIndex(-1); } //Save index of nominal & categorial attributes //Build a map i-DOM -> Attribute index this.idxsC = new ArrayList<Integer>(); this.idxsN = new ArrayList<Integer>(); int nn = 0; for (int i = 0; i < instances.numAttributes(); i++) { if (!instances.attribute(i).isNumeric()) mapDomain.put(i, new HashSet<String>()); } //Create map index & domain this.mapIndex = new HashMap<Integer, Map<String, Integer>>(); int mapIdx = 0; for (int i = 0; i < instances.numAttributes(); i++) { Attribute attribute = instances.attribute(i); if (!attribute.isNumeric()) { idxsC.add(i); //i-th attribute is nominal final Map<String, Integer> mapIndexAttribute = new HashMap<String, Integer>(); mapIndex.put(i, mapIndexAttribute); Enumeration<?> en = attribute.enumerateValues(); while (en.hasMoreElements()) { String catVal = en.nextElement().toString(); boolean created = mapDomain.get(i).add(catVal); if (created) { mapIndexAttribute.put(catVal, mapIdx++); } } nn += mapDomain.get(i).size(); //count total nominal values } else { idxsN.add(i); } } this.n = nn; this.base = new ArrayList<TupleSI>(); this.noBase = new ArrayList<TupleSI>(); this.M = new int[n][n]; this.D = new double[n][n]; this.F = new HashMap<TupleSI, Double>(); this.computeBase(); this.computeMatrixMDF(); } catch (Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * // w w w . ja va 2s. c o m * @param dataSet * @return */ private void computeBase() { //Save base int attributeIndex = -1; int max = 0; for (int i = 0; i < instances.numAttributes(); i++) { final Attribute attribute = instances.attribute(i); //Ignore class attribute if needed if (attribute.index() == instances.classIndex() && ignoreClass) continue; if (!attribute.isNumeric()) { int size = instances.attributeStats(i).nominalCounts.length; if (size > max) { attributeIndex = i; max = size; } } } final Attribute maxAttribute = instances.attribute(attributeIndex); Enumeration<?> en = maxAttribute.enumerateValues(); while (en.hasMoreElements()) { base.add(new TupleSI(en.nextElement().toString(), attributeIndex)); } this.baseIndex = attributeIndex; //Save noBase for (int i = 0; i < instances.numAttributes(); i++) { Attribute attribute = instances.attribute(i); if (attribute.index() == instances.classIndex() && ignoreClass) continue; if (i != attributeIndex && !instances.attribute(i).isNumeric()) { Enumeration<?> enb = attribute.enumerateValues(); while (enb.hasMoreElements()) { noBase.add(new TupleSI(enb.nextElement().toString(), i)); } } } }