Example usage for weka.core Attribute enumerateValues

List of usage examples for weka.core Attribute enumerateValues

Introduction

In this page you can find the example usage for weka.core Attribute enumerateValues.

Prototype

public finalEnumeration<Object> enumerateValues() 

Source Link

Document

Returns an enumeration of all the attribute's values if the attribute is nominal, string, or relation-valued, null otherwise.

Usage

From source file:cezeri.utils.FactoryInstance.java

public static String[] getOriginalClasses(Instances data) {
    Attribute att = data.attribute(data.classIndex());
    String[] ret = new String[data.numClasses()];
    Enumeration enu = att.enumerateValues();
    int q = 0;/*from  ww  w. jav  a 2s  . c  o  m*/
    while (enu.hasMoreElements()) {
        ret[q++] = (String) enu.nextElement();
    }
    return ret;
}

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
* Get Samoa attribute from a weka attribute.
*
* @param index the index/*from w  ww. j a v a2  s. co  m*/
* @param attribute the attribute
* @return the attribute
*/
protected Attribute samoaAttribute(int index, weka.core.Attribute attribute) {
    Attribute samoaAttribute;
    if (attribute.isNominal()) {
        Enumeration enu = attribute.enumerateValues();
        List<String> attributeValues = new ArrayList<String>();
        while (enu.hasMoreElements()) {
            attributeValues.add((String) enu.nextElement());
        }
        samoaAttribute = new Attribute(attribute.name(), attributeValues);
    } else {
        samoaAttribute = new Attribute(attribute.name());
    }
    return samoaAttribute;
}

From source file:de.ugoe.cs.cpdp.dataprocessing.NominalAttributeFilter.java

License:Apache License

@Override
public void apply(Instances testdata, Instances traindata) {
    int indexOfConfidenceAttribute = -1;

    // Find index of the named confidence attribute to filter for
    for (int i = 0; i < traindata.numAttributes(); i++) {
        if (traindata.attribute(i).name().equals(nominalAttributeName)) {
            indexOfConfidenceAttribute = i;
        }/*  w  ww.j  ava  2  s . c  o m*/
    }

    // if it was not found return
    if (indexOfConfidenceAttribute == -1) {
        return;
    }

    // Find index of nominal values
    Attribute confidenceAttribute = traindata.attribute(indexOfConfidenceAttribute);
    ArrayList<Object> nominalValuesOfConfidenceAttribute = Collections
            .list(confidenceAttribute.enumerateValues());
    ArrayList<Double> indexOfnominalAttributeValues = new ArrayList<Double>();

    for (int k = 0; k < nominalValuesOfConfidenceAttribute.size(); k++) {
        for (String attributeValue : nominalAttributeValues) {
            if (((String) nominalValuesOfConfidenceAttribute.get(k)).equals(attributeValue)) {
                indexOfnominalAttributeValues.add((double) k);
            }
        }
    }

    // Go through all instances and check if nominal attribute equals
    for (int j = traindata.numInstances() - 1; j >= 0; j--) {
        Instance wekaInstance = traindata.get(j);

        // delete all instances where nominal attribute has the value of one of the parameter
        if (indexOfnominalAttributeValues.contains(wekaInstance.value(indexOfConfidenceAttribute))) {
            traindata.delete(j);
        }
    }
}

From source file:edu.illinois.cs.cogcomp.lbjava.learn.WekaWrapper.java

License:Open Source License

/**
 * Produces a set of scores indicating the degree to which each possible discrete classification
 * value is associated with the given example object.
 **///from  w w  w  . java  2  s  .co m
public ScoreSet scores(int[] exampleFeatures, double[] exampleValues) {
    double[] dist = getDistribution(exampleFeatures, exampleValues);

    /*
     * Assuming that the first Attribute in our attributeInfo vector is the class attribute,
     * decide which case we are in
     */
    Attribute classAtt = (Attribute) attributeInfo.elementAt(0);

    ScoreSet scores = new ScoreSet();

    if (classAtt.isNominal() || classAtt.isString()) {
        Enumeration enumeratedValues = classAtt.enumerateValues();

        int i = 0;
        while (enumeratedValues.hasMoreElements()) {
            if (i >= dist.length) {
                System.err.println(
                        "WekaWrapper: Error - scores found more possible values than " + "probabilities.");
                new Exception().printStackTrace();
                System.exit(1);
            }
            double s = dist[i];
            String v = (String) enumeratedValues.nextElement();
            scores.put(v, s);
            ++i;
        }
    } else if (classAtt.isNumeric()) {
        System.err.println("WekaWrapper: Error - The 'scores' function should not be called "
                + "when the class attribute is numeric.");
        new Exception().printStackTrace();
        System.exit(1);
    } else {
        System.err.println(
                "WekaWrapper: Error - ScoreSet: Class Types must be either " + "Nominal, String, or Numeric.");
        new Exception().printStackTrace();
        System.exit(1);
    }

    return scores;
}

From source file:elh.eus.absa.Features.java

License:Open Source License

/**
 * Creates a feature set from a previously saved model. This allows to load previously saved feature sets. 
 * /*from w ww. java 2 s . co  m*/
 * @param model string: path to the serialized model containing header information
 * @throws IOException 
 */
private void createFeatureSetFromModel(String model) throws IOException {
    try {
        WekaWrapper ww = new WekaWrapper(model);
        Instances header = ww.loadHeader(model);

        int attNum = header.numAttributes();
        for (int i = 0; i < attNum; i++) {
            Attribute att = header.attribute(i);
            String name = att.name();
            if (att.isNumeric()) {
                addNumericFeature(name);
                //System.out.println("numeric feature: "+name);
            } else if (att.isNominal()) {
                //System.out.println("nominal feature: "+name+" - "+att.toString());
                ArrayList<String> vals = new ArrayList<String>();
                Enumeration<Object> e = att.enumerateValues();
                while (e.hasMoreElements()) {
                    vals.add(e.nextElement().toString());
                }
                addNominalFeature(name, vals);
            }
        }

        //General polarity lexicon
        if (header.attribute("polLexGen_posScore") != null) {
            this.polarLexiconGen = new Lexicon(new File(params.getProperty("polarLexiconGeneral")), "lemma");
            System.err.println("Features : createFeatureSet() - General polarity lexicon loaded -> "
                    + params.getProperty("polarLexiconGeneral") + " (" + this.polarLexiconGen.size()
                    + " entries)");
            System.out.println("Features : createFeatureSet() - General polarity lexicon loaded -> "
                    + params.getProperty("polarLexiconGeneral") + " (" + this.polarLexiconGen.size()
                    + " entries)");
        }

        //Domain polarity lexicon
        if (header.attribute("polLexDom_posScore") != null) {
            //this.polarLexiconDom = loadPolarityLexiconFromFile(params.getProperty("polarLexiconDomain"), "polLexDom_");
            this.polarLexiconDom = new Lexicon(new File(params.getProperty("polarLexiconDomain")), "lemma");
            System.err.println("Features : createFeatureSet() - Domain polarity lexicon loaded -> "
                    + params.getProperty("polarLexiconDomain") + " (" + this.polarLexiconDom.size()
                    + " entries)");
            System.out.println("Features : createFeatureSet() - Domain polarity lexicon loaded -> "
                    + params.getProperty("polarLexiconDomain") + " (" + this.polarLexiconDom.size()
                    + " entries)");
        }

        // Load clark cluster category info from files
        loadClusterFeatures("clark");

        // Load brown cluster category info from files
        loadClusterFeatures("brown");

        // Load word2vec cluster category info from files
        loadClusterFeatures("word2vec");

    } catch (Exception e) {
        System.err.println("Features::createFeatureSetFromFile -> error when loading model header");
        e.printStackTrace();
    }

}

From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java

License:Open Source License

/**
 * Set the output format if the class is nominal.
 *///from www .j av a 2  s  .co  m
private void setOutputFormat() {

    FastVector newAtts;
    Instances outputFormat;

    newAtts = new FastVector();

    BitSet attrSrc = new BitSet();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {

        Attribute att = null;
        Attribute srcAtt = getInputFormat().attribute(j);

        if (!m_Columns.isInRange(j) || srcAtt.indexOfValue(m_ReplVal) >= 0) {
            att = (Attribute) srcAtt.copy();
        } else if (srcAtt.isNominal()) {

            Enumeration<String> valsEnum = srcAtt.enumerateValues();
            ArrayList<String> valsList = new ArrayList<String>();

            while (valsEnum.hasMoreElements()) {
                valsList.add(valsEnum.nextElement());
            }
            valsList.add(m_ReplVal);

            att = new Attribute(srcAtt.name(), valsList);
        } else { // string attributes
            att = (Attribute) srcAtt.copy();
            att.addStringValue(m_ReplVal);
        }

        newAtts.addElement(att);
        attrSrc.set(j);
    }

    outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0);
    outputFormat.setClassIndex(getInputFormat().classIndex());

    setOutputFormat(outputFormat);

    m_StringToCopy = new AttributeLocator(getInputFormat(), Attribute.STRING, MathUtils.findTrue(attrSrc));
}

From source file:gov.va.chir.tagline.TagLineEvaluator.java

License:Open Source License

private List<Set<Object>> getFoldDocIds(final Attribute attrDocId) {
    // Setup list of docs per fold
    final List<Set<Object>> folds = new ArrayList<Set<Object>>();

    for (int i = 0; i < numFolds; i++) {
        folds.add(new HashSet<Object>());
    }//ww  w  .  j a  v  a  2s.co m

    // Get distinct values
    final List<Object> docIds = new ArrayList<Object>();
    final Enumeration<?> enumer = attrDocId.enumerateValues();

    while (enumer.hasMoreElements()) {
        docIds.add((Object) enumer.nextElement());
    }

    if (docIds.size() < numFolds) {
        throw new IllegalStateException(String.format(
                "Number of folds must be less than or equal to number of "
                        + "distinct document IDs [num folds = %d | " + "num distinct document IDs = %d]",
                numFolds, docIds.size()));
    }

    // Randomly assign doc IDs to folds
    final Random random = new Random(randomSeed);

    int i = 0;
    int selected = -1;
    while (!docIds.isEmpty()) {
        selected = random.nextInt(docIds.size());
        folds.get(i).add(docIds.get(selected));
        docIds.remove(selected);

        if (++i >= numFolds) {
            i = 0;
        }
    }

    return folds;
}

From source file:lu.lippmann.cdb.graph.GraphUtil.java

License:Open Source License

public static void updateVariables(final Instances dataSet, final GraphWithOperations gr) {
    for (final CVariable var : gr.getVariables()) {
        final Attribute attribute = dataSet.attribute(var.getKey());
        if (attribute == null)
            throw new IllegalStateException("Attribute '" + var.getKey() + "' not found in dataset!?");
        if (attribute.isNominal()) {
            var.setType(CadralType.ENUMERATION);
            final List<String> values = new ArrayList<String>();
            final Enumeration<?> eval = attribute.enumerateValues();
            while (eval.hasMoreElements()) {
                values.add((String) eval.nextElement());
            }/*from  w ww .  j a va2s  . c o m*/
            var.setValues(values);
        } else if (attribute.isNumeric()) {
            var.setType(CadralType.NUMERIC);
        } else {
            var.setType(CadralType.UNKNOWN);
        }
    }
}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

public Shih2010(Instances instances, boolean ignoreClass, boolean needsToResample, double theta) {
    try {/* w  ww  .  j ava  2  s  . co  m*/
        this.instances = instances;
        this.theta = theta;
        this.resample = needsToResample;
        if (needsToResample) {
            final Resample rs = new Resample();
            if (this.instances.numInstances() > MAX_INSTANCES_TAKEN) {
                rs.setInputFormat(instances);
                rs.setSampleSizePercent(MAX_INSTANCES_TAKEN * 100.0 / this.instances.numInstances());
                this.instances = Filter.useFilter(instances, rs);
            }
        }

        //System.out.println("Size = " + this.instances.numInstances());

        this.mapDomain = new HashMap<Integer, Set<String>>();

        if (ignoreClass) {
            this.instances.setClassIndex(-1);
        }

        //Save index of nominal & categorial attributes
        //Build a map i-DOM -> Attribute index
        this.idxsC = new ArrayList<Integer>();
        this.idxsN = new ArrayList<Integer>();
        int nn = 0;

        for (int i = 0; i < instances.numAttributes(); i++) {
            if (!instances.attribute(i).isNumeric())
                mapDomain.put(i, new HashSet<String>());
        }

        //Create map index & domain
        this.mapIndex = new HashMap<Integer, Map<String, Integer>>();
        int mapIdx = 0;
        for (int i = 0; i < instances.numAttributes(); i++) {
            Attribute attribute = instances.attribute(i);
            if (!attribute.isNumeric()) {
                idxsC.add(i); //i-th attribute is nominal
                final Map<String, Integer> mapIndexAttribute = new HashMap<String, Integer>();
                mapIndex.put(i, mapIndexAttribute);
                Enumeration<?> en = attribute.enumerateValues();
                while (en.hasMoreElements()) {
                    String catVal = en.nextElement().toString();
                    boolean created = mapDomain.get(i).add(catVal);
                    if (created) {
                        mapIndexAttribute.put(catVal, mapIdx++);
                    }
                }
                nn += mapDomain.get(i).size(); //count total nominal values
            } else {
                idxsN.add(i);
            }
        }

        this.n = nn;
        this.base = new ArrayList<TupleSI>();
        this.noBase = new ArrayList<TupleSI>();
        this.M = new int[n][n];
        this.D = new double[n][n];
        this.F = new HashMap<TupleSI, Double>();
        this.computeBase();
        this.computeMatrixMDF();
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * // w  w w .  ja  va  2s.  c o  m
 * @param dataSet
 * @return
 */
private void computeBase() {
    //Save base
    int attributeIndex = -1;
    int max = 0;
    for (int i = 0; i < instances.numAttributes(); i++) {
        final Attribute attribute = instances.attribute(i);

        //Ignore class attribute if needed
        if (attribute.index() == instances.classIndex() && ignoreClass)
            continue;

        if (!attribute.isNumeric()) {
            int size = instances.attributeStats(i).nominalCounts.length;
            if (size > max) {
                attributeIndex = i;
                max = size;
            }
        }
    }
    final Attribute maxAttribute = instances.attribute(attributeIndex);
    Enumeration<?> en = maxAttribute.enumerateValues();
    while (en.hasMoreElements()) {
        base.add(new TupleSI(en.nextElement().toString(), attributeIndex));
    }
    this.baseIndex = attributeIndex;

    //Save noBase
    for (int i = 0; i < instances.numAttributes(); i++) {
        Attribute attribute = instances.attribute(i);
        if (attribute.index() == instances.classIndex() && ignoreClass)
            continue;
        if (i != attributeIndex && !instances.attribute(i).isNumeric()) {
            Enumeration<?> enb = attribute.enumerateValues();
            while (enb.hasMoreElements()) {
                noBase.add(new TupleSI(enb.nextElement().toString(), i));
            }
        }
    }
}