Example usage for weka.core Instances numAttributes

List of usage examples for weka.core Instances numAttributes

Introduction

In this page you can find the example usage for weka.core Instances numAttributes.

Prototype


publicint numAttributes() 

Source Link

Document

Returns the number of attributes.

Usage

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Returns the contents of an item set as a string.
 *
 * @param instances contains the relevant header information
 * @return string describing the item set
 *///from   w ww .  j  ava  2s  .c o  m
public String toString(Instances instances) {

    StringBuffer text = new StringBuffer();

    for (int i = 0; i < instances.numAttributes(); i++)
        if (m_items[i] != -1) {
            text.append(instances.attribute(i).name() + '=');
            text.append(instances.attribute(i).value(m_items[i]) + ' ');
        }
    text.append(m_counter);
    return text.toString();
}

From source file:CEP.GenerateStream.java

public void MakeStream() {
    File file = new File("C:\\Users\\Weary\\Documents\\w4ndata\\w4ndata.arff");
    String pc = System.getProperty("user.dir").toString();
    if (pc.contains("gs023850")) {
        file = new File("C:\\Users\\gs023850\\Documents\\w4ndata\\w4ndata.arff");
    }/*  ww w .jav a  2  s . c  o m*/
    try {
        ArffLoader loader = new ArffLoader();
        loader.setFile(file);
        Instances structure = loader.getStructure();
        int j = structure.numAttributes();

        HeaderManager.SetStructure(new Instances(structure));
        Instance current;
        long previousTimeStamp = 0;
        String timeStamp = "0";
        long wait = 0;

        while ((current = loader.getNextInstance(structure)) != null) {
            timeStamp = current.stringValue(0);
            cepRT.sendEvent(current);
            System.out.println("Sending event");
            previousTimeStamp = WaitTime(timeStamp, previousTimeStamp, wait);
        }
    } catch (Exception e) {
        if (e.equals(new FileNotFoundException())) {
            System.out.println("File not found - could not generate stream");
            return;
        } else if (e.equals(new IOException())) {
            System.out.println("Unable to read file");
        } else if (e.equals(new NumberFormatException())) {
            System.out.println("Unable to convert to time to number - bad time");
        } else {
            System.out.println(e.toString());
        }
    }
}

From source file:CEP.HeaderManager.java

static void SetStructure(Instances structure) {
    ArffStructure = structure;
    structure.setClassIndex(structure.numAttributes() - 1);

    lock = false;
}

From source file:cerebro.Id3.java

License:Open Source License

/**
 * Method for building an Id3 tree.//from  w  w  w . ja va 2 s.c o  m
 *
 * @param data the training data
 * @exception Exception if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new Id3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new Id3();
            m_Successors[j].makeTree(splitData[j]);
        }
    }
}

From source file:cezeri.feature.selection.FeatureSelectionInfluence.java

public static Influence[] getMostDiscriminativeFeature(String filePath, Classifier model) {
    Influence[] ret = null;//from   ww w.ja  v  a  2s .  c om
    try {
        Instances data = DataSource.read(filePath);
        ret = new Influence[data.numAttributes() - 1];
        data.setClassIndex(data.numAttributes() - 1);
        // other options
        int seed = 1;
        int folds = 10;
        // randomize data
        Instances randData = new Instances(data);
        Random rand = new Random(seed);
        randData.randomize(rand);
        Evaluation evalBase = getEvaluation(randData, model, folds);
        double accBase = evalBase.correct() / evalBase.numInstances() * 100;
        double nf = randData.numAttributes();

        for (int j = 0; j < nf - 1; j++) {
            ret[j] = new Influence();
            String str = randData.attribute(j).name();
            Attribute att = randData.attribute(j);
            randData.deleteAttributeAt(j);
            Evaluation evalTemp = getEvaluation(randData, model, folds);
            double accTemp = evalTemp.correct() / evalTemp.numInstances() * 100;
            double tempInfluence = accBase - accTemp;
            ret[j].attributeName = str;
            ret[j].infVal = tempInfluence;
            randData.insertAttributeAt(att, j);
        }
        sortInfluenceArray(ret);
    } catch (Exception ex) {
        Logger.getLogger(FeatureSelectionInfluence.class.getName()).log(Level.SEVERE, null, ex);
    }
    return ret;
}

From source file:cezeri.feature.selection.FeatureSelectionRanker.java

/**
 * You should use this method only for the Classification problems Fisher is
 * not suitable for Regression. For Regression problems you can use CRCF
 * method./*from w  w  w  . j av  a 2  s  .c  o  m*/
 *
 * @param data
 * @param type
 * @return
 */
public static TFeatureRank[] fisherDistance(Instances data, int type) {
    if (type == TMachineLearning.REGRESSION) {
        return null;
    }
    TFeatureRank[] ret = new TFeatureRank[data.numAttributes() - 1];
    String[] attributeNames = FactoryInstance.getAttributeList(data);
    //        FactoryInstance.getMatrix(data).plot();
    Instances[] ins = FactoryInstance.getSpecificInstancesBasedOnClassValue(data,
            FactoryInstance.getDefaultClasses(data));
    if (ins.length < 2) {
        return null;
    }
    double[][] cl_1 = CMatrix.getInstance(FactoryInstance.getData(ins[0])).transpose().get2DArrayDouble();
    double[][] cl_2 = CMatrix.getInstance(FactoryInstance.getData(ins[1])).transpose().get2DArrayDouble();
    //        FactoryMatrix.transpose(FactoryInstance.getData(ins[1]));
    double[] fisher = new double[cl_1.length];
    for (int i = 0; i < cl_1.length - 1; i++) {
        double mean_1 = FactoryUtils.getMean(cl_1[i]);
        double std_1 = FactoryStatistic.getStandardDeviation(cl_1[i]);
        double mean_2 = FactoryUtils.getMean(cl_2[i]);
        double std_2 = FactoryStatistic.getStandardDeviation(cl_2[i]);
        if (Math.pow(std_1, 2) + Math.pow(std_2, 2) == 0.0) {
            fisher[i] = 0.0;
        } else {
            //                double f = Math.abs(mean_1 - mean_2) / (Math.pow(std_1, 2) + Math.pow(std_2, 2));
            double f = Math.pow((mean_1 - mean_2), 2) / (Math.pow(std_1, 2) + Math.pow(std_2, 2));
            fisher[i] = FactoryUtils.formatDouble(f);
        }
        TFeatureRank obj = new TFeatureRank();
        obj.featureName = attributeNames[i];
        obj.index = "" + i;
        obj.value = fisher[i];
        ret[i] = obj;
        //println(i + ".fisher distance:" + fisher[i]);
    }
    ArrayList<TFeatureRank> lst = toArrayList(ret);
    Collections.sort(lst, new CustomComparatorForFeatureRank());
    ret = toArray(lst);
    //        int[] fisherIndex = FactoryUtils.sortArrayAndReturnIndex(fisher, "desc");
    return ret;
}

From source file:cezeri.feature.selection.FeatureSelectionRanker.java

/**
 * if full exhaustive search is not feasible due to computational cost,
 * shrink search space by reducing the number of features that you want to
 * explore/*from   w w w.java  2  s  .co  m*/
 *
 * @param nSubset desired subset number i.e.: if you have 15 features you
 * may want to 9 feature subset result
 * @param data train or test data
 * @param model classifier you used
 * @param nFolds during learning what will be the cross validation folds
 * @param show_text print the output
 * @param show_plot plot the output
 * @return
 */
public static TFeatureRank[] wrapperExhaustiveSearchLimited(int nSubset, Instances data, Classifier model,
        int nFolds, boolean show_text, boolean show_plot) {
    if (nSubset > data.numAttributes() - 1) {
        System.out.println("subset should be less than attribute number");
        return null;
    }

    String[] attributeNames = FactoryInstance.getAttributeListExceptClassAttribute(data);
    String[] lstComb = FactoryCombination.getCombination(attributeNames, nSubset);
    FactoryCombination.toString(lstComb);
    TFeatureRank[] ret = computeCombinationPairs(lstComb, data, model, nFolds, show_text, show_plot);
    return ret;
}

From source file:cezeri.feature.selection.FeatureSelectionRanker.java

/**
 * if number of features is less than 15 only you can make exhaustive global
 * search on the feature space//  w  ww  .  j  av a 2s  . co m
 *
 * @param data :dataset
 * @param model :classifier
 * @param nFolds :number of cross validation folds
 * @param show_text :print the output
 * @param show_plot :plot the output
 * @return
 */
public static TFeatureRank[] wrapperExhaustiveSearch(Instances data, Classifier model, int nFolds,
        boolean show_text, boolean show_plot) {
    if (data.numAttributes() > 15) {
        System.out.println(
                "for exhaustive search num of attributes greater than 13 is not feasible comp cost is too high to compute");
        return null;
    }
    String[] attributeNames = FactoryInstance.getAttributeListExceptClassAttribute(data);
    String[] lstComb = FactoryCombination.getAllCombinations(attributeNames);
    TFeatureRank[] ret = computeCombinationPairs(lstComb, data, model, nFolds, show_text, show_plot);
    return ret;
}

From source file:cezeri.feature.selection.FeatureSelectionRanker.java

private static TFeatureRank[] correlation(Instances data, int type) {
    TFeatureRank[] ret = new TFeatureRank[data.numAttributes() - 1];
    String[] attributeNames = FactoryInstance.getAttributeList(data);
    double[] out = data.attributeToDoubleArray(data.classIndex());
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        TFeatureRank obj = new TFeatureRank();
        obj.featureName = attributeNames[i];
        obj.index = i + "";
        if (type == TCorelation.ARE) {
            obj.value = Math.abs(FactoryStatistic.ARE(data.attributeToDoubleArray(i), out));
        }/* ww w . ja v a2s .  com*/
        if (type == TCorelation.CRCF) {
            obj.value = Math.abs(FactoryStatistic.CRCF(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.IOA) {
            obj.value = Math.abs(FactoryStatistic.IOA(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.KENDALL) {
            obj.value = Math.abs(FactoryStatistic.KENDALL(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.MAE) {
            obj.value = Math.abs(FactoryStatistic.MAE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.MPE) {
            obj.value = Math.abs(FactoryStatistic.MPE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.MSE) {
            obj.value = Math.abs(FactoryStatistic.MSE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.NSEC) {
            obj.value = Math.abs(FactoryStatistic.NSEC(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.PEARSON) {
            obj.value = Math.abs(FactoryStatistic.PEARSON(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.R) {
            obj.value = Math.abs(FactoryStatistic.R(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.R2) {
            obj.value = Math.abs(FactoryStatistic.R2(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RAE) {
            obj.value = Math.abs(FactoryStatistic.RAE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RELATIVE_NSEC) {
            obj.value = Math.abs(FactoryStatistic.RELATIVE_NSEC(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RMSE) {
            obj.value = Math.abs(FactoryStatistic.RMSE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.RRSE) {
            obj.value = Math.abs(FactoryStatistic.RRSE(data.attributeToDoubleArray(i), out));
        }
        if (type == TCorelation.SPEARMAN) {
            obj.value = Math.abs(FactoryStatistic.SPEARMAN(data.attributeToDoubleArray(i), out));
        }
        //            if (type==FactoryCorrelation.KENDALL) {
        //                obj.value=Math.abs(FactoryCorrelation.rankKendallTauBeta(data.attributeToDoubleArray(i), out));
        //            }
        //            if (type==FactoryCorrelation.PEARSON) {
        //                obj.value=Math.abs(FactoryCorrelation.pearson(data.attributeToDoubleArray(i), out));
        //            }
        //            if (type==FactoryCorrelation.SPEARMAN) {
        //                obj.value=Math.abs(FactoryCorrelation.spearman(data.attributeToDoubleArray(i), out));
        //            }            
        ret[i] = obj;
    }
    ArrayList<TFeatureRank> lst = toArrayList(ret);
    Collections.sort(lst, new CustomComparatorForFeatureRank());
    ret = toArray(lst);
    return ret;
}

From source file:cezeri.utils.FactoryInstance.java

public static Instances generateInstances(String relationName, int nCols) {
    CMatrix cm = CMatrix.getInstance().zeros(1, nCols);
    FastVector att = new FastVector();
    for (int i = 0; i < cm.getColumnNumber(); i++) {
        att.addElement(new Attribute("f" + (i + 1)));
    }/*from   ww w .  j  a  va 2  s .  c om*/
    Instances ret = new Instances(relationName, att, cm.getRowNumber());
    for (int i = 0; i < cm.getRowNumber(); i++) {
        Instance ins = new Instance(cm.getColumnNumber());
        for (int j = 0; j < cm.getColumnNumber(); j++) {
            ins.setValue(j, cm.get2DArrayDouble()[i][j]);
        }
        ret.add(ins);
    }
    ret.setClassIndex(ret.numAttributes() - 1);
    return ret;
}