Example usage for weka.attributeSelection AttributeSelection AttributeSelection

List of usage examples for weka.attributeSelection AttributeSelection AttributeSelection

Introduction

In this page you can find the example usage for weka.attributeSelection AttributeSelection AttributeSelection.

Prototype

public AttributeSelection() 

Source Link

Document

constructor.

Usage

From source file:FeatureSelectionClass.java

public AttributeSelection withGainRatio(String path) throws Exception {
    int N;//from  ww  w.  j  av  a  2 s  .c  o m
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    InfoGainAttributeEval evaluator = new InfoGainAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;

}

From source file:FeatureSelectionClass.java

public AttributeSelection withInfoGain(String path) throws Exception {
    int N;/*from  w  w w .  ja  v  a  2 s .  c  om*/
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    GainRatioAttributeEval evaluator = new GainRatioAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;
}

From source file:FeatureSelectionClass.java

public AttributeSelection withChiSquare(String path) throws Exception {
    int N;/*  w  w  w .j a  v  a2 s.  c om*/
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    ChiSquaredAttributeEval evaluator = new ChiSquaredAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;

}

From source file:RunExhaustiveSearch.java

License:Open Source License

protected static void runAttributeSelection(Instances data, int n) throws Exception {
    AttributeSelection attsel = new AttributeSelection();
    CfsSubsetEval cost_function = new CfsSubsetEval(); // CFS cost function.
    ExhaustiveSearch algorithm = new ExhaustiveSearch(); //  ES algorithm.

    cost_function.buildEvaluator(data);//from   w  w w .j a v a2  s . com

    attsel.setEvaluator(cost_function);
    attsel.setSearch(algorithm);

    attsel.SelectAttributes(data);

    int[] indices = attsel.selectedAttributes();

    System.out.println("Selected features:\n" + Utils.arrayToString(indices));
}

From source file:task2.java

/**
 * Processes requests for both HTTP <code>GET</code> and <code>POST</code>
 * methods./* ww  w .j a v  a  2  s.  c  om*/
 *
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    try (PrintWriter out = response.getWriter()) {
        /* TODO output your page here. You may use following sample code. */
        out.println("<!DOCTYPE html>");
        out.println("<html>");
        out.println("<head>");
        out.println("<title>Servlet selection</title>");
        out.println("</head>");
        out.println("<body>");
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File("C:/Users//Raguvinoth/Desktop/5339.csv"));
        Instances data = loader.getDataSet();

        //Save ARFF
        ArffSaver saver = new ArffSaver();
        saver.setInstances(data);
        saver.setFile(new File("\"C:/Users/Raguvinoth/Desktop/5339_converted.arff"));
        saver.writeBatch();

        BufferedReader reader = new BufferedReader(
                new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_nominal.arff"));
        Instances data1 = new Instances(reader);

        if (data1.classIndex() == -1)
            data1.setClassIndex(data1.numAttributes() - 14);
        // 1. meta-classifier
        // useClassifier(data);

        // 2. AttributeSelector
        try {
            AttributeSelection attsel = new AttributeSelection();
            GreedyStepwise search = new GreedyStepwise();
            CfsSubsetEval eval = new CfsSubsetEval();
            attsel.setEvaluator(eval);
            attsel.setSearch(search);
            attsel.SelectAttributes(data);
            int[] indices = attsel.selectedAttributes();

            System.out.println("selected attribute indices:\n" + Utils.arrayToString(indices));
            System.out.println("\n 4. Linear-Regression on above selected attributes");
            long time1 = System.currentTimeMillis();
            long sec1 = time1 / 1000;
            BufferedReader reader1 = new BufferedReader(
                    new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_linear2.arff"));
            Instances data2 = new Instances(reader1);
            data2.setClassIndex(0);
            LinearRegression lr = new LinearRegression();
            lr.buildClassifier(data2);

            System.out.println(lr.toString());
            long time2 = System.currentTimeMillis();
            long sec2 = time2 / 1000;
            long timeTaken = sec2 - sec1;
            System.out.println("Total time taken for building the model: " + timeTaken + " seconds");

            for (int i = 0; i < 5; i++) {
                out.println("<p>" + "selected attribute indices:\n" + Utils.arrayToString(indices[i]) + "</p>");
            }
            out.println("<p>" + "\n 4. Linear-Regression on above selected attributes" + "</p>");
            out.println("<p>" + lr.toString() + "</p>");
            out.println("<p>" + "Total time taken for building the model: " + timeTaken + " seconds" + "</p>");
            out.println("</body>");
            out.println("</html>");
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

From source file:RunBestFirstSearch.java

License:Open Source License

protected static void runAttributeSelection(Instances data, int n) throws Exception {
    AttributeSelection attsel = new AttributeSelection();
    CfsSubsetEval cost_function = new CfsSubsetEval(); // CFS cost function.
    BestFirst algorithm = new BestFirst(); // BFS algorithm.

    cost_function.buildEvaluator(data);/*from   ww  w  .j  a  v a  2s . co  m*/

    algorithm.setLookupCacheSize(n);

    // BFS with forward direction and terminating search after five
    // non-improving nodes.
    //
    String[] parameters = { "-D 1", "-N 5" };

    algorithm.setOptions(parameters);

    cost_function.setLocallyPredictive(false);

    attsel.setEvaluator(cost_function);
    attsel.setSearch(algorithm);

    attsel.SelectAttributes(data);

    int[] indices = attsel.selectedAttributes();

    System.out.println("Selected features:\n" + Utils.arrayToString(indices));
}

From source file:PCADetector.java

License:Apache License

public boolean runPCA(ArrayList<Double> newData, int slidewdSz, double cAlpha, int nAttrs) {
    try {/* w  w w .  j av a 2 s.  co  m*/
        if (m_nDims == 0) {
            m_nDims = nAttrs;
            for (int i = 0; i < this.m_nDims; i++) {
                m_oriDataMatrix.add(new ArrayList<Double>()); // one list for each attribute
            }
        }
        verifyData(newData);
        this.c_alpha = cAlpha;
        if (false == prepareData(newData, slidewdSz))
            return false;
        Instances oriDataInsts = getInstances();
        if (oriDataInsts != null) {
            // standardization + PCA covariance matrix
            m_scaledInstances = new Instances(oriDataInsts);
            Standardize filter = new Standardize();

            filter.setInputFormat(m_scaledInstances);
            m_scaledInstances = Standardize.useFilter(m_scaledInstances, filter); // standardization

            PrincipalComponents PCA = new PrincipalComponents();
            PCA.setVarianceCovered(1.0); // means 100%
            PCA.setMaximumAttributeNames(-1);
            PCA.setCenterData(true);
            Ranker ranker = new Ranker();
            AttributeSelection selector = new AttributeSelection();
            selector.setSearch(ranker);
            selector.setEvaluator(PCA);
            selector.SelectAttributes(m_scaledInstances);
            //                Instances transformedData = selector.reduceDimensionality(m_scaledInstances);

            // get sorted eigens
            double[] eigenValues = PCA.getEigenValues();
            // eigenVectors[i][j]  i: rows; j: cols
            double[][] eigenVectors = PCA.getUnsortedEigenVectors();
            Sort(eigenValues, eigenVectors);
            setEigens(eigenValues);

            // get residual start dimension
            int residualStartDimension = -1;
            double sum = 0;
            double major = 0;
            for (int ss = 0; ss < eigenValues.length; ss++) {
                sum += eigenValues[ss];
            }
            for (int ss = 0; ss < eigenValues.length; ss++) {
                major += eigenValues[ss];
                if ((residualStartDimension < 0) && (major / sum > 0.95)) {
                    residualStartDimension = ss + 1;
                    break;
                }
            }
            //            System.out.println("residualStartDim: "+residualStartDimension);
            m_threshold = computeThreshold(eigenValues, residualStartDimension);

            // check new data abnormal or not
            boolean bAbnormal = checkSPE(eigenVectors, residualStartDimension, newData);
            computeProjPCs(eigenVectors, residualStartDimension, newData); // only for demo

            if (bAbnormal) { // anomaly, now to diagnosis
                // check original space using all the lists
                diagnosis(eigenVectors, residualStartDimension, newData);
            }

        }

    } catch (Exception exc) {
    }
    return true;
}

From source file:adams.flow.transformer.WekaAttributeSelection.java

License:Open Source License

/**
 * Executes the flow item.// w ww.j a  v a2  s  .  c om
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances data;
    Instances reduced;
    Instances transformed;
    AttributeSelection eval;
    boolean crossValidate;
    int fold;
    Instances train;
    WekaAttributeSelectionContainer cont;
    SpreadSheet stats;
    int i;
    Row row;
    int[] selected;
    double[][] ranked;
    Range range;
    String rangeStr;
    boolean useReduced;

    result = null;

    try {
        if (m_InputToken.getPayload() instanceof Instances)
            data = (Instances) m_InputToken.getPayload();
        else
            data = (Instances) ((WekaTrainTestSetContainer) m_InputToken.getPayload())
                    .getValue(WekaTrainTestSetContainer.VALUE_TRAIN);

        if (result == null) {
            crossValidate = (m_Folds >= 2);

            // setup evaluation
            eval = new AttributeSelection();
            eval.setEvaluator(m_Evaluator);
            eval.setSearch(m_Search);
            eval.setFolds(m_Folds);
            eval.setSeed((int) m_Seed);
            eval.setXval(crossValidate);

            // select attributes
            if (crossValidate) {
                Random random = new Random(m_Seed);
                data = new Instances(data);
                data.randomize(random);
                if ((data.classIndex() > -1) && data.classAttribute().isNominal()) {
                    if (isLoggingEnabled())
                        getLogger().info("Stratifying instances...");
                    data.stratify(m_Folds);
                }
                for (fold = 0; fold < m_Folds; fold++) {
                    if (isLoggingEnabled())
                        getLogger().info("Creating splits for fold " + (fold + 1) + "...");
                    train = data.trainCV(m_Folds, fold, random);
                    if (isLoggingEnabled())
                        getLogger().info("Selecting attributes using all but fold " + (fold + 1) + "...");
                    eval.selectAttributesCVSplit(train);
                }
            } else {
                eval.SelectAttributes(data);
            }

            // generate reduced/transformed dataset
            reduced = null;
            transformed = null;
            if (!crossValidate) {
                reduced = eval.reduceDimensionality(data);
                if (m_Evaluator instanceof AttributeTransformer)
                    transformed = ((AttributeTransformer) m_Evaluator).transformedData(data);
            }

            // generated stats
            stats = null;
            if (!crossValidate) {
                stats = new DefaultSpreadSheet();
                row = stats.getHeaderRow();

                useReduced = false;
                if (m_Search instanceof RankedOutputSearch) {
                    i = reduced.numAttributes();
                    if (reduced.classIndex() > -1)
                        i--;
                    ranked = eval.rankedAttributes();
                    useReduced = (ranked.length == i);
                }

                if (useReduced) {
                    for (i = 0; i < reduced.numAttributes(); i++)
                        row.addCell("" + i).setContent(reduced.attribute(i).name());
                    row = stats.addRow();
                    for (i = 0; i < reduced.numAttributes(); i++)
                        row.addCell(i).setContent(0.0);
                } else {
                    for (i = 0; i < data.numAttributes(); i++)
                        row.addCell("" + i).setContent(data.attribute(i).name());
                    row = stats.addRow();
                    for (i = 0; i < data.numAttributes(); i++)
                        row.addCell(i).setContent(0.0);
                }

                if (m_Search instanceof RankedOutputSearch) {
                    ranked = eval.rankedAttributes();
                    for (i = 0; i < ranked.length; i++)
                        row.getCell((int) ranked[i][0]).setContent(ranked[i][1]);
                } else {
                    selected = eval.selectedAttributes();
                    for (i = 0; i < selected.length; i++)
                        row.getCell(selected[i]).setContent(1.0);
                }
            }

            // selected attributes
            rangeStr = null;
            if (!crossValidate) {
                range = new Range();
                range.setIndices(eval.selectedAttributes());
                rangeStr = range.getRange();
            }

            // setup container
            if (crossValidate)
                cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, m_Seed, m_Folds);
            else
                cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, stats, rangeStr);
            m_OutputToken = new Token(cont);
        }
    } catch (Exception e) {
        m_OutputToken = null;
        result = handleException("Failed to process data:", e);
    }

    return result;
}

From source file:ca.uottawa.balie.WekaAttributeSelection.java

License:Open Source License

/**
 * Select the top attributes/*from  w  ww.j a va2s.  c  om*/
 */
public void Select(boolean pi_Debug) {
    Instances insts = m_DummyLearner.GetTrainInstances();

    try {
        ASEvaluation eval = null;
        ASSearch search = null;

        if (m_Evaluator == WEKA_CHI_SQUARE) {
            eval = new ChiSquaredAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_INFO_GAIN) {
            eval = new InfoGainAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_WRAPPER) {
            eval = new ClassifierSubsetEval();
            ((ClassifierSubsetEval) eval).setClassifier(new NaiveBayes());
            search = new Ranker(); // TODO: use something else than ranker
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_SYM_UNCERT) {
            eval = new SymmetricalUncertAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_SVM) {
            eval = new SVMAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_RELIEF) {
            eval = new ReliefFAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        } else if (m_Evaluator == WEKA_ONER) {
            eval = new OneRAttributeEval();
            search = new Ranker();
            ((Ranker) search).setNumToSelect(m_NumAttributes);
        }

        m_AttributeSelection = new AttributeSelection();
        m_AttributeSelection.setEvaluator(eval);
        m_AttributeSelection.setSearch(search);

        m_AttributeSelection.SelectAttributes(insts);
        if (pi_Debug)
            System.out.println(m_AttributeSelection.toResultsString());

    } catch (Exception e) {
        System.err.println(e.getMessage());
    }

}

From source file:de.ugoe.cs.cpdp.dataprocessing.TopMetricFilter.java

License:Apache License

private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet)
        throws Exception {
    Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1];
    IntStream.range(0, counts.length).forEach(val -> counts[val] = 0);
    for (Instances traindata : traindataSet) {
        J48 decisionTree = new J48();
        decisionTree.buildClassifier(traindata);
        int k = 0;
        for (int j = 0; j < traindata.numAttributes(); j++) {
            if (j != traindata.classIndex()) {
                if (decisionTree.toString().contains(traindata.attribute(j).name())) {
                    counts[k] = counts[k] + 1;
                }// w w  w  .  j a va2s  . c  o m
                k++;
            }
        }
    }
    int[] topkIndex = new int[counts.length];
    IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val);
    SortUtils.quicksort(counts, topkIndex, true);

    // get CFSs for each training set
    List<Set<Integer>> cfsSets = new LinkedList<>();
    for (Instances traindata : traindataSet) {
        boolean selectionSuccessful = false;
        boolean secondAttempt = false;
        Instances traindataCopy = null;
        do {
            try {
                if (secondAttempt) {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindataCopy);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                } else {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindata);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                }
            } catch (IllegalArgumentException e) {
                String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*";
                Pattern p = Pattern.compile(regex);
                Matcher m = p.matcher(e.getMessage());
                if (!m.find()) {
                    // cannot treat problem, rethrow exception
                    throw e;
                }
                String attributeName = m.group(1);
                int attrIndex = traindata.attribute(attributeName).index();
                if (secondAttempt) {
                    traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex);
                } else {
                    traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex);
                }
                Console.traceln(Level.FINE, "upscaled attribute " + attributeName + "; restarting training");
                secondAttempt = true;
                continue;
            }
        } while (!selectionSuccessful); // dummy loop for internal continue
    }

    double[] coverages = new double[topkIndex.length];
    for (Set<Integer> cfsSet : cfsSets) {
        Set<Integer> topkSet = new HashSet<>();
        for (int k = 0; k < topkIndex.length; k++) {
            topkSet.add(topkIndex[k]);
            coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size());
        }
    }
    double bestCoverageValue = Double.MIN_VALUE;
    int bestCoverageIndex = 0;
    for (int i = 0; i < coverages.length; i++) {
        if (coverages[i] > bestCoverageValue) {
            bestCoverageValue = coverages[i];
            bestCoverageIndex = i;
        }
    }
    // build correlation matrix
    SpearmansCorrelation corr = new SpearmansCorrelation();
    double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex];
    for (Instances traindata : traindataSet) {
        double[][] vectors = new double[bestCoverageIndex][traindata.size()];
        for (int i = 0; i < traindata.size(); i++) {
            for (int j = 0; j < bestCoverageIndex; j++) {
                vectors[j][i] = traindata.get(i).value(topkIndex[j]);
            }
        }
        for (int j = 0; j < bestCoverageIndex; j++) {
            for (int k = j + 1; k < bestCoverageIndex; k++) {
                correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k]));
            }
        }
    }
    Set<Integer> topkSetIndexSet = new TreeSet<>();
    // j<30 ensures that the computational time does not explode since the powerset is 2^n in
    // complexity
    for (int j = 0; j < bestCoverageIndex && j < 30; j++) {
        topkSetIndexSet.add(j);
    }
    Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet);
    double bestOptCoverage = Double.MIN_VALUE;
    Set<Integer> opttopkSetIndexSet = null;
    for (Set<Integer> combination : allCombinations) {
        if (isUncorrelated(correlationMatrix, combination)) {
            double currentCoverage = 0.0;
            Set<Integer> topkCombination = new TreeSet<>();
            for (Integer index : combination) {
                topkCombination.add(topkIndex[index]);
            }
            for (Set<Integer> cfsSet : cfsSets) {
                currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size());
            }
            if (currentCoverage > bestOptCoverage) {
                bestOptCoverage = currentCoverage;
                opttopkSetIndexSet = combination;
            }
        }
    }
    Set<Integer> opttopkIndex = new TreeSet<>();
    for (Integer index : opttopkSetIndexSet) {
        opttopkIndex.add(topkIndex[index]);
    }
    Console.traceln(Level.FINE, "selected the following metrics:");
    for (Integer index : opttopkIndex) {
        Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name());
    }
    // finally remove attributes
    for (int j = testdata.numAttributes() - 1; j >= 0; j--) {
        if (j != testdata.classIndex() && !opttopkIndex.contains(j)) {
            testdata.deleteAttributeAt(j);
            for (Instances traindata : traindataSet) {
                traindata.deleteAttributeAt(j);
            }
        }
    }
}