Example usage for weka.attributeSelection AttributeSelection rankedAttributes

List of usage examples for weka.attributeSelection AttributeSelection rankedAttributes

Introduction

In this page you can find the example usage for weka.attributeSelection AttributeSelection rankedAttributes.

Prototype

public double[][] rankedAttributes() throws Exception 

Source Link

Document

get the final ranking of the attributes.

Usage

From source file:adams.flow.transformer.WekaAttributeSelection.java

License:Open Source License

/**
 * Executes the flow item./* w  w w.  ja  va  2  s .c  o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances data;
    Instances reduced;
    Instances transformed;
    AttributeSelection eval;
    boolean crossValidate;
    int fold;
    Instances train;
    WekaAttributeSelectionContainer cont;
    SpreadSheet stats;
    int i;
    Row row;
    int[] selected;
    double[][] ranked;
    Range range;
    String rangeStr;
    boolean useReduced;

    result = null;

    try {
        if (m_InputToken.getPayload() instanceof Instances)
            data = (Instances) m_InputToken.getPayload();
        else
            data = (Instances) ((WekaTrainTestSetContainer) m_InputToken.getPayload())
                    .getValue(WekaTrainTestSetContainer.VALUE_TRAIN);

        if (result == null) {
            crossValidate = (m_Folds >= 2);

            // setup evaluation
            eval = new AttributeSelection();
            eval.setEvaluator(m_Evaluator);
            eval.setSearch(m_Search);
            eval.setFolds(m_Folds);
            eval.setSeed((int) m_Seed);
            eval.setXval(crossValidate);

            // select attributes
            if (crossValidate) {
                Random random = new Random(m_Seed);
                data = new Instances(data);
                data.randomize(random);
                if ((data.classIndex() > -1) && data.classAttribute().isNominal()) {
                    if (isLoggingEnabled())
                        getLogger().info("Stratifying instances...");
                    data.stratify(m_Folds);
                }
                for (fold = 0; fold < m_Folds; fold++) {
                    if (isLoggingEnabled())
                        getLogger().info("Creating splits for fold " + (fold + 1) + "...");
                    train = data.trainCV(m_Folds, fold, random);
                    if (isLoggingEnabled())
                        getLogger().info("Selecting attributes using all but fold " + (fold + 1) + "...");
                    eval.selectAttributesCVSplit(train);
                }
            } else {
                eval.SelectAttributes(data);
            }

            // generate reduced/transformed dataset
            reduced = null;
            transformed = null;
            if (!crossValidate) {
                reduced = eval.reduceDimensionality(data);
                if (m_Evaluator instanceof AttributeTransformer)
                    transformed = ((AttributeTransformer) m_Evaluator).transformedData(data);
            }

            // generated stats
            stats = null;
            if (!crossValidate) {
                stats = new DefaultSpreadSheet();
                row = stats.getHeaderRow();

                useReduced = false;
                if (m_Search instanceof RankedOutputSearch) {
                    i = reduced.numAttributes();
                    if (reduced.classIndex() > -1)
                        i--;
                    ranked = eval.rankedAttributes();
                    useReduced = (ranked.length == i);
                }

                if (useReduced) {
                    for (i = 0; i < reduced.numAttributes(); i++)
                        row.addCell("" + i).setContent(reduced.attribute(i).name());
                    row = stats.addRow();
                    for (i = 0; i < reduced.numAttributes(); i++)
                        row.addCell(i).setContent(0.0);
                } else {
                    for (i = 0; i < data.numAttributes(); i++)
                        row.addCell("" + i).setContent(data.attribute(i).name());
                    row = stats.addRow();
                    for (i = 0; i < data.numAttributes(); i++)
                        row.addCell(i).setContent(0.0);
                }

                if (m_Search instanceof RankedOutputSearch) {
                    ranked = eval.rankedAttributes();
                    for (i = 0; i < ranked.length; i++)
                        row.getCell((int) ranked[i][0]).setContent(ranked[i][1]);
                } else {
                    selected = eval.selectedAttributes();
                    for (i = 0; i < selected.length; i++)
                        row.getCell(selected[i]).setContent(1.0);
                }
            }

            // selected attributes
            rangeStr = null;
            if (!crossValidate) {
                range = new Range();
                range.setIndices(eval.selectedAttributes());
                rangeStr = range.getRange();
            }

            // setup container
            if (crossValidate)
                cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, m_Seed, m_Folds);
            else
                cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, stats, rangeStr);
            m_OutputToken = new Token(cont);
        }
    } catch (Exception e) {
        m_OutputToken = null;
        result = handleException("Failed to process data:", e);
    }

    return result;
}

From source file:it.poliba.sisinflab.simlib.featureSelection.methods.CHI.java

public void execute(String dataset) {
    try {/*from  w  w w . j  a  va 2s  .co m*/

        if (dataset.length() == 0)
            throw new IllegalArgumentException();
        // Load input dataset.
        DataSource source = new DataSource(dataset);
        System.out.println("Reading instances...");
        Instances data = source.getDataSet();

        // Performs a principal components analysis.
        ChiSquaredAttributeEval chiEvaluator = new ChiSquaredAttributeEval();

        // Ranking the attributes.
        Ranker ranker = new Ranker();
        // Specify the number of attributes to select from the ranked list.
        /*ranker.setThreshold(-1.7976931348623157E308);
        ranker.setNumToSelect(-1);
        ranker.setGenerateRanking(true);*/
        ranker.setNumToSelect(-1);

        AttributeSelection selector = new AttributeSelection();
        System.out.println("Selecting attributes...");
        selector.setSearch(ranker);
        selector.setEvaluator(chiEvaluator);
        selector.SelectAttributes(data);

        PrintStream o = new PrintStream(new File("data/" + "CHIResults" + ".txt"));
        System.setOut(o);
        System.out.println(Arrays.toString(selector.rankedAttributes()));
        System.out.println(Arrays.toString(selector.selectedAttributes()));
        //System.out.println(selector.CVResultsString());
        System.out.println(selector.toResultsString());

        System.out.println();

    } catch (IllegalArgumentException e) {
        System.err.println("Error");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:it.poliba.sisinflab.simlib.featureSelection.methods.PCA.java

public void execute(String dataset) {
    try {//from   ww w .j  a v a 2  s . co m

        if (dataset.length() == 0)
            throw new IllegalArgumentException();
        // Load input dataset.
        DataSource source = new DataSource(dataset);
        Instances data = source.getDataSet();

        // Performs a principal components analysis.
        PrincipalComponents pcaEvaluator = new PrincipalComponents();

        // Sets the amount of variance to account for when retaining principal
        // components.
        pcaEvaluator.setVarianceCovered(1.0);
        // Sets maximum number of attributes to include in transformed attribute
        // names.
        pcaEvaluator.setMaximumAttributeNames(-1);

        // Scaled X such that the variance of each feature is 1.
        boolean scale = true;
        if (scale) {
            pcaEvaluator.setCenterData(true);
        } else {
            pcaEvaluator.setCenterData(false);
        }

        // Ranking the attributes.
        Ranker ranker = new Ranker();

        ranker.setNumToSelect(-1);

        AttributeSelection selector = new AttributeSelection();
        selector.setSearch(ranker);
        selector.setEvaluator(pcaEvaluator);
        selector.SelectAttributes(data);

        // Transform data into eigenvector basis.
        Instances transformedData = selector.reduceDimensionality(data);
        PrintStream o = new PrintStream(new File("data/" + "PCAResults" + ".txt"));
        System.setOut(o);
        System.out.println(Arrays.toString(selector.rankedAttributes()));
        System.out.println(Arrays.toString(selector.selectedAttributes()));
        //System.out.println(selector.CVResultsString());
        System.out.println(selector.toResultsString());

        System.out.println();

    } catch (IllegalArgumentException e) {
        System.err.println("Error");
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:mlflex.WekaInMemoryLearner.java

License:Open Source License

@Override
protected ArrayList<String> SelectOrRankFeatures(ArrayList<String> algorithmParameters,
        DataInstanceCollection trainData, DataInstanceCollection dependentVariableInstances) throws Exception {
    ArrayList<String> dataPointNames = Lists.SortStringList(trainData.GetDataPointNames());

    FastVector attVector = GetAttributeVector(dependentVariableInstances, dataPointNames, trainData);
    Instances instances = GetInstances(dependentVariableInstances, attVector, trainData);

    AttributeSelection attsel = new AttributeSelection();
    ASEvaluation eval = GetAttributeEvaluator(algorithmParameters);
    ASSearch search = GetSearchMethod(algorithmParameters);
    attsel.setEvaluator(eval);//from  w ww .  j a  v a  2 s  . co m
    attsel.setSearch(search);

    boolean isRanker = algorithmParameters.get(2).equals(Ranker.class.getName());

    if (isRanker)
        attsel.setRanking(true);

    attsel.SelectAttributes(instances);

    ArrayList<String> features = new ArrayList<String>();

    if (isRanker) {
        for (double[] rank : attsel.rankedAttributes())
            features.add(instances.attribute((int) rank[0]).name());
    } else {
        for (int i : attsel.selectedAttributes())
            features.add(instances.attribute(i).name());
    }

    instances = null;

    return features;
}

From source file:net.semanticmetadata.lire.classifiers.HashingSearchBasedClassifierMod.java

License:Open Source License

private static HashMap<String, Double> calculateInformationGain(String wekaFileLocation,
        double[] featureInformationGain, int featureSpace[], HashMap<String, Integer> featureSpaceHashMap,
        ArrayList<String> featureOrder, HashMap<String, Double> featureInformationGainHashMap) {

    Instances data = null;/*from w  ww  .j  a  v  a  2  s.  co  m*/
    try {
        data = new Instances(new BufferedReader(new FileReader(wekaFileLocation)));
    } catch (IOException e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    AttributeSelection attsel = new AttributeSelection(); // package weka.attributeSelection!
    InfoGainAttributeEval eval = new InfoGainAttributeEval();
    Ranker search = new Ranker();
    search.setThreshold(-1.7976931348623157E308);
    search.setNumToSelect(-1);
    search.setGenerateRanking(true);
    attsel.setEvaluator(eval);
    attsel.setSearch(search);
    try {

        attsel.SelectAttributes(data);
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    // obtain the attribute indices that were selected
    int[] indices = new int[0];
    double[][] rankedAttribuesArray = new double[0][0];
    try {
        rankedAttribuesArray = attsel.rankedAttributes();
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }
    try {
        indices = attsel.selectedAttributes();
    } catch (Exception e) {
        e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
    }

    for (int i = 0; i < rankedAttribuesArray.length; i++) {

        int currentFeature = Integer.parseInt(data.attribute((int) rankedAttribuesArray[i][0]).name()
                .substring(0, data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_")));
        //System.out.println("DDDDDDDDDDDDDD"+currentFeature);
        // System.out.print(data.attribute((int) rankedAttribuesArray[i][0]).name() + "/" + rankedAttribuesArray[i][0] + "/");
        //     System.out.println(rankedAttribuesArray[i][1]);
        // data.attribute((int) rankedAttribuesArray[i][0]).name().substring(0,data.attribute((int) rankedAttribuesArray[i][0]).name().indexOf("_"));
        // featureInformationGain[currentFeature] = featureInformationGain[currentFeature] + rankedAttribuesArray[i][1];
        featureInformationGainHashMap.put(featureOrder.get(currentFeature),
                featureInformationGainHashMap.get(featureOrder.get(currentFeature))
                        + rankedAttribuesArray[i][1]);
    }

    //Caalculate the mean of the information gain (better comparable)
    // for (int i = 0; i < featureInformationGain.length; i++) {
    //     featureInformationGain[i] = (featureInformationGain[i] / featureSpace[i]) * 100;
    // }

    //Calculate the mean of the information gain (better comparable)
    for (int i = 0; i < featureOrder.size(); i++) {
        //  featureInformationGainHashMap.put(featureOrder.get(i), (featureInformationGainHashMap.get(featureOrder.get(i)) / featureSpaceHashMap.get(featureOrder.get(i))) * 100);
        featureInformationGainHashMap.put(featureOrder.get(i),
                (featureInformationGainHashMap.get(featureOrder.get(i))));
    }

    // for(int i=0;i<0;i++){
    //     System.out.println(data.attribute(indices[i]).toString());
    // }
    System.out.println("Scoring finished, starting with classification! Scores: ");
    for (int i = 0; i < featureOrder.size(); i++) {
        System.out.println(featureOrder.get(i) + " " + featureInformationGainHashMap.get(featureOrder.get(i)));
        // featureInformationGainHashMap.put(featureOrder.get(i),(featureInformationGainHashMap.get(featureOrder.get(i))/featureSpaceHashMap.get(featureOrder.get(i)))*100);
    }
    // return featureInformationGain;
    File deleteFile = new File(wekaFileLocation);
    deleteFile.delete();
    return featureInformationGainHashMap;
}

From source file:trabfs.machineLeaningFrameWork.core.Problema.java

public double[] getAttributeQuality() {
    try {//from w  ww.jav a 2s  . c o m

        ASEvaluation[] filters = { new InfoGainAttributeEval(), new ChiSquaredAttributeEval(),
                new ReliefFAttributeEval() };
        R = new double[data.numAttributes() - 1][filters.length];
        Ranker rk = new Ranker();
        AttributeSelection selec = new AttributeSelection();
        selec.setSearch(rk);

        for (int j = 0; j < filters.length; j++) {
            selec.setEvaluator(filters[j]);
            selec.SelectAttributes(data);
            double[][] full = selec.rankedAttributes();
            //double[] r = new double[full.length];

            Arrays.sort(full, new Comparator() {
                @Override
                public int compare(Object t, Object t1) {
                    double[] a1 = (double[]) t;
                    double[] a2 = (double[]) t1;
                    if (a1[0] > a2[0])
                        return 1;
                    else if (a1[0] < a2[0])
                        return -1;
                    else
                        return 0;
                }
            });

            double max = Double.NEGATIVE_INFINITY, min = Double.POSITIVE_INFINITY;
            for (int i = 0; i < full.length; i++) {
                if (full[i][1] < min)
                    min = full[i][1];
                if (full[i][1] > max)
                    max = full[i][1];
            }

            // armazena
            for (int i = 0; i < full.length; i++) {
                R[i][j] = (full[i][1] - min) / (max - min);
            }
        }

        double[] Rfinal = new double[data.numAttributes() - 1];
        double SW = 1.0f;
        for (int i = 0; i < Rfinal.length; i++) {
            Rfinal[i] = somaWK(i) / 3.0f;
        }

        return Rfinal;
    } catch (Exception ex) {
        Logger.getLogger(Problema.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}