Example usage for weka.attributeSelection AttributeSelection SelectAttributes

List of usage examples for weka.attributeSelection AttributeSelection SelectAttributes

Introduction

In this page you can find the example usage for weka.attributeSelection AttributeSelection SelectAttributes.

Prototype

public void SelectAttributes(Instances data) throws Exception 

Source Link

Document

Perform attribute selection on the supplied training instances.

Usage

From source file:FeatureSelectionClass.java

public AttributeSelection withGainRatio(String path) throws Exception {
    int N;//w  w w .ja  va2  s . c o  m
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    InfoGainAttributeEval evaluator = new InfoGainAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;

}

From source file:FeatureSelectionClass.java

public AttributeSelection withInfoGain(String path) throws Exception {
    int N;/* w  w w .j a  va2  s  . c  o m*/
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    GainRatioAttributeEval evaluator = new GainRatioAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;
}

From source file:FeatureSelectionClass.java

public AttributeSelection withChiSquare(String path) throws Exception {
    int N;/*from w w  w  . j av  a 2s  . co m*/
    PreparingSteps pr = new PreparingSteps();
    N = pr.getReadFileData(path).numAttributes();
    Instances data = pr.getReadFileData(path);

    AttributeSelection selector = new AttributeSelection();
    ChiSquaredAttributeEval evaluator = new ChiSquaredAttributeEval();
    Ranker ranker = new Ranker();
    ranker.setNumToSelect(Math.min(500, N - 1));
    selector.setEvaluator(evaluator);
    selector.setSearch(ranker);
    selector.SelectAttributes(data);
    return selector;

}

From source file:RunExhaustiveSearch.java

License:Open Source License

protected static void runAttributeSelection(Instances data, int n) throws Exception {
    AttributeSelection attsel = new AttributeSelection();
    CfsSubsetEval cost_function = new CfsSubsetEval(); // CFS cost function.
    ExhaustiveSearch algorithm = new ExhaustiveSearch(); //  ES algorithm.

    cost_function.buildEvaluator(data);//ww  w  . j  a  v a  2s  .c  o  m

    attsel.setEvaluator(cost_function);
    attsel.setSearch(algorithm);

    attsel.SelectAttributes(data);

    int[] indices = attsel.selectedAttributes();

    System.out.println("Selected features:\n" + Utils.arrayToString(indices));
}

From source file:task2.java

/**
 * Processes requests for both HTTP <code>GET</code> and <code>POST</code>
 * methods./*w w  w . j  av  a 2  s.  c o m*/
 *
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    try (PrintWriter out = response.getWriter()) {
        /* TODO output your page here. You may use following sample code. */
        out.println("<!DOCTYPE html>");
        out.println("<html>");
        out.println("<head>");
        out.println("<title>Servlet selection</title>");
        out.println("</head>");
        out.println("<body>");
        CSVLoader loader = new CSVLoader();
        loader.setSource(new File("C:/Users//Raguvinoth/Desktop/5339.csv"));
        Instances data = loader.getDataSet();

        //Save ARFF
        ArffSaver saver = new ArffSaver();
        saver.setInstances(data);
        saver.setFile(new File("\"C:/Users/Raguvinoth/Desktop/5339_converted.arff"));
        saver.writeBatch();

        BufferedReader reader = new BufferedReader(
                new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_nominal.arff"));
        Instances data1 = new Instances(reader);

        if (data1.classIndex() == -1)
            data1.setClassIndex(data1.numAttributes() - 14);
        // 1. meta-classifier
        // useClassifier(data);

        // 2. AttributeSelector
        try {
            AttributeSelection attsel = new AttributeSelection();
            GreedyStepwise search = new GreedyStepwise();
            CfsSubsetEval eval = new CfsSubsetEval();
            attsel.setEvaluator(eval);
            attsel.setSearch(search);
            attsel.SelectAttributes(data);
            int[] indices = attsel.selectedAttributes();

            System.out.println("selected attribute indices:\n" + Utils.arrayToString(indices));
            System.out.println("\n 4. Linear-Regression on above selected attributes");
            long time1 = System.currentTimeMillis();
            long sec1 = time1 / 1000;
            BufferedReader reader1 = new BufferedReader(
                    new FileReader("C://Users//Raguvinoth//Desktop//weka1//5339_linear2.arff"));
            Instances data2 = new Instances(reader1);
            data2.setClassIndex(0);
            LinearRegression lr = new LinearRegression();
            lr.buildClassifier(data2);

            System.out.println(lr.toString());
            long time2 = System.currentTimeMillis();
            long sec2 = time2 / 1000;
            long timeTaken = sec2 - sec1;
            System.out.println("Total time taken for building the model: " + timeTaken + " seconds");

            for (int i = 0; i < 5; i++) {
                out.println("<p>" + "selected attribute indices:\n" + Utils.arrayToString(indices[i]) + "</p>");
            }
            out.println("<p>" + "\n 4. Linear-Regression on above selected attributes" + "</p>");
            out.println("<p>" + lr.toString() + "</p>");
            out.println("<p>" + "Total time taken for building the model: " + timeTaken + " seconds" + "</p>");
            out.println("</body>");
            out.println("</html>");
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

From source file:RunBestFirstSearch.java

License:Open Source License

protected static void runAttributeSelection(Instances data, int n) throws Exception {
    AttributeSelection attsel = new AttributeSelection();
    CfsSubsetEval cost_function = new CfsSubsetEval(); // CFS cost function.
    BestFirst algorithm = new BestFirst(); // BFS algorithm.

    cost_function.buildEvaluator(data);/*  w  w  w .j a v a  2 s. co  m*/

    algorithm.setLookupCacheSize(n);

    // BFS with forward direction and terminating search after five
    // non-improving nodes.
    //
    String[] parameters = { "-D 1", "-N 5" };

    algorithm.setOptions(parameters);

    cost_function.setLocallyPredictive(false);

    attsel.setEvaluator(cost_function);
    attsel.setSearch(algorithm);

    attsel.SelectAttributes(data);

    int[] indices = attsel.selectedAttributes();

    System.out.println("Selected features:\n" + Utils.arrayToString(indices));
}

From source file:PCADetector.java

License:Apache License

public boolean runPCA(ArrayList<Double> newData, int slidewdSz, double cAlpha, int nAttrs) {
    try {/*from w  w w.  j  a  v a2 s. c o m*/
        if (m_nDims == 0) {
            m_nDims = nAttrs;
            for (int i = 0; i < this.m_nDims; i++) {
                m_oriDataMatrix.add(new ArrayList<Double>()); // one list for each attribute
            }
        }
        verifyData(newData);
        this.c_alpha = cAlpha;
        if (false == prepareData(newData, slidewdSz))
            return false;
        Instances oriDataInsts = getInstances();
        if (oriDataInsts != null) {
            // standardization + PCA covariance matrix
            m_scaledInstances = new Instances(oriDataInsts);
            Standardize filter = new Standardize();

            filter.setInputFormat(m_scaledInstances);
            m_scaledInstances = Standardize.useFilter(m_scaledInstances, filter); // standardization

            PrincipalComponents PCA = new PrincipalComponents();
            PCA.setVarianceCovered(1.0); // means 100%
            PCA.setMaximumAttributeNames(-1);
            PCA.setCenterData(true);
            Ranker ranker = new Ranker();
            AttributeSelection selector = new AttributeSelection();
            selector.setSearch(ranker);
            selector.setEvaluator(PCA);
            selector.SelectAttributes(m_scaledInstances);
            //                Instances transformedData = selector.reduceDimensionality(m_scaledInstances);

            // get sorted eigens
            double[] eigenValues = PCA.getEigenValues();
            // eigenVectors[i][j]  i: rows; j: cols
            double[][] eigenVectors = PCA.getUnsortedEigenVectors();
            Sort(eigenValues, eigenVectors);
            setEigens(eigenValues);

            // get residual start dimension
            int residualStartDimension = -1;
            double sum = 0;
            double major = 0;
            for (int ss = 0; ss < eigenValues.length; ss++) {
                sum += eigenValues[ss];
            }
            for (int ss = 0; ss < eigenValues.length; ss++) {
                major += eigenValues[ss];
                if ((residualStartDimension < 0) && (major / sum > 0.95)) {
                    residualStartDimension = ss + 1;
                    break;
                }
            }
            //            System.out.println("residualStartDim: "+residualStartDimension);
            m_threshold = computeThreshold(eigenValues, residualStartDimension);

            // check new data abnormal or not
            boolean bAbnormal = checkSPE(eigenVectors, residualStartDimension, newData);
            computeProjPCs(eigenVectors, residualStartDimension, newData); // only for demo

            if (bAbnormal) { // anomaly, now to diagnosis
                // check original space using all the lists
                diagnosis(eigenVectors, residualStartDimension, newData);
            }

        }

    } catch (Exception exc) {
    }
    return true;
}

From source file:adams.flow.transformer.WekaAttributeSelection.java

License:Open Source License

/**
 * Executes the flow item./*from   w ww  .  j  a v  a 2 s . c o  m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances data;
    Instances reduced;
    Instances transformed;
    AttributeSelection eval;
    boolean crossValidate;
    int fold;
    Instances train;
    WekaAttributeSelectionContainer cont;
    SpreadSheet stats;
    int i;
    Row row;
    int[] selected;
    double[][] ranked;
    Range range;
    String rangeStr;
    boolean useReduced;

    result = null;

    try {
        if (m_InputToken.getPayload() instanceof Instances)
            data = (Instances) m_InputToken.getPayload();
        else
            data = (Instances) ((WekaTrainTestSetContainer) m_InputToken.getPayload())
                    .getValue(WekaTrainTestSetContainer.VALUE_TRAIN);

        if (result == null) {
            crossValidate = (m_Folds >= 2);

            // setup evaluation
            eval = new AttributeSelection();
            eval.setEvaluator(m_Evaluator);
            eval.setSearch(m_Search);
            eval.setFolds(m_Folds);
            eval.setSeed((int) m_Seed);
            eval.setXval(crossValidate);

            // select attributes
            if (crossValidate) {
                Random random = new Random(m_Seed);
                data = new Instances(data);
                data.randomize(random);
                if ((data.classIndex() > -1) && data.classAttribute().isNominal()) {
                    if (isLoggingEnabled())
                        getLogger().info("Stratifying instances...");
                    data.stratify(m_Folds);
                }
                for (fold = 0; fold < m_Folds; fold++) {
                    if (isLoggingEnabled())
                        getLogger().info("Creating splits for fold " + (fold + 1) + "...");
                    train = data.trainCV(m_Folds, fold, random);
                    if (isLoggingEnabled())
                        getLogger().info("Selecting attributes using all but fold " + (fold + 1) + "...");
                    eval.selectAttributesCVSplit(train);
                }
            } else {
                eval.SelectAttributes(data);
            }

            // generate reduced/transformed dataset
            reduced = null;
            transformed = null;
            if (!crossValidate) {
                reduced = eval.reduceDimensionality(data);
                if (m_Evaluator instanceof AttributeTransformer)
                    transformed = ((AttributeTransformer) m_Evaluator).transformedData(data);
            }

            // generated stats
            stats = null;
            if (!crossValidate) {
                stats = new DefaultSpreadSheet();
                row = stats.getHeaderRow();

                useReduced = false;
                if (m_Search instanceof RankedOutputSearch) {
                    i = reduced.numAttributes();
                    if (reduced.classIndex() > -1)
                        i--;
                    ranked = eval.rankedAttributes();
                    useReduced = (ranked.length == i);
                }

                if (useReduced) {
                    for (i = 0; i < reduced.numAttributes(); i++)
                        row.addCell("" + i).setContent(reduced.attribute(i).name());
                    row = stats.addRow();
                    for (i = 0; i < reduced.numAttributes(); i++)
                        row.addCell(i).setContent(0.0);
                } else {
                    for (i = 0; i < data.numAttributes(); i++)
                        row.addCell("" + i).setContent(data.attribute(i).name());
                    row = stats.addRow();
                    for (i = 0; i < data.numAttributes(); i++)
                        row.addCell(i).setContent(0.0);
                }

                if (m_Search instanceof RankedOutputSearch) {
                    ranked = eval.rankedAttributes();
                    for (i = 0; i < ranked.length; i++)
                        row.getCell((int) ranked[i][0]).setContent(ranked[i][1]);
                } else {
                    selected = eval.selectedAttributes();
                    for (i = 0; i < selected.length; i++)
                        row.getCell(selected[i]).setContent(1.0);
                }
            }

            // selected attributes
            rangeStr = null;
            if (!crossValidate) {
                range = new Range();
                range.setIndices(eval.selectedAttributes());
                rangeStr = range.getRange();
            }

            // setup container
            if (crossValidate)
                cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, m_Seed, m_Folds);
            else
                cont = new WekaAttributeSelectionContainer(data, reduced, transformed, eval, stats, rangeStr);
            m_OutputToken = new Token(cont);
        }
    } catch (Exception e) {
        m_OutputToken = null;
        result = handleException("Failed to process data:", e);
    }

    return result;
}

From source file:de.ugoe.cs.cpdp.dataprocessing.TopMetricFilter.java

License:Apache License

private void determineTopKAttributes(Instances testdata, SetUniqueList<Instances> traindataSet)
        throws Exception {
    Integer[] counts = new Integer[traindataSet.get(0).numAttributes() - 1];
    IntStream.range(0, counts.length).forEach(val -> counts[val] = 0);
    for (Instances traindata : traindataSet) {
        J48 decisionTree = new J48();
        decisionTree.buildClassifier(traindata);
        int k = 0;
        for (int j = 0; j < traindata.numAttributes(); j++) {
            if (j != traindata.classIndex()) {
                if (decisionTree.toString().contains(traindata.attribute(j).name())) {
                    counts[k] = counts[k] + 1;
                }/* www.  ja v a2s.  com*/
                k++;
            }
        }
    }
    int[] topkIndex = new int[counts.length];
    IntStream.range(0, counts.length).forEach(val -> topkIndex[val] = val);
    SortUtils.quicksort(counts, topkIndex, true);

    // get CFSs for each training set
    List<Set<Integer>> cfsSets = new LinkedList<>();
    for (Instances traindata : traindataSet) {
        boolean selectionSuccessful = false;
        boolean secondAttempt = false;
        Instances traindataCopy = null;
        do {
            try {
                if (secondAttempt) {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindataCopy);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                } else {
                    AttributeSelection attsel = new AttributeSelection();
                    CfsSubsetEval eval = new CfsSubsetEval();
                    GreedyStepwise search = new GreedyStepwise();
                    search.setSearchBackwards(true);
                    attsel.setEvaluator(eval);
                    attsel.setSearch(search);
                    attsel.SelectAttributes(traindata);
                    Set<Integer> cfsSet = new HashSet<>();
                    for (int attr : attsel.selectedAttributes()) {
                        cfsSet.add(attr);
                    }
                    cfsSets.add(cfsSet);
                    selectionSuccessful = true;
                }
            } catch (IllegalArgumentException e) {
                String regex = "A nominal attribute \\((.*)\\) cannot have duplicate labels.*";
                Pattern p = Pattern.compile(regex);
                Matcher m = p.matcher(e.getMessage());
                if (!m.find()) {
                    // cannot treat problem, rethrow exception
                    throw e;
                }
                String attributeName = m.group(1);
                int attrIndex = traindata.attribute(attributeName).index();
                if (secondAttempt) {
                    traindataCopy = WekaUtils.upscaleAttribute(traindataCopy, attrIndex);
                } else {
                    traindataCopy = WekaUtils.upscaleAttribute(traindata, attrIndex);
                }
                Console.traceln(Level.FINE, "upscaled attribute " + attributeName + "; restarting training");
                secondAttempt = true;
                continue;
            }
        } while (!selectionSuccessful); // dummy loop for internal continue
    }

    double[] coverages = new double[topkIndex.length];
    for (Set<Integer> cfsSet : cfsSets) {
        Set<Integer> topkSet = new HashSet<>();
        for (int k = 0; k < topkIndex.length; k++) {
            topkSet.add(topkIndex[k]);
            coverages[k] += (coverage(topkSet, cfsSet) / traindataSet.size());
        }
    }
    double bestCoverageValue = Double.MIN_VALUE;
    int bestCoverageIndex = 0;
    for (int i = 0; i < coverages.length; i++) {
        if (coverages[i] > bestCoverageValue) {
            bestCoverageValue = coverages[i];
            bestCoverageIndex = i;
        }
    }
    // build correlation matrix
    SpearmansCorrelation corr = new SpearmansCorrelation();
    double[][] correlationMatrix = new double[bestCoverageIndex][bestCoverageIndex];
    for (Instances traindata : traindataSet) {
        double[][] vectors = new double[bestCoverageIndex][traindata.size()];
        for (int i = 0; i < traindata.size(); i++) {
            for (int j = 0; j < bestCoverageIndex; j++) {
                vectors[j][i] = traindata.get(i).value(topkIndex[j]);
            }
        }
        for (int j = 0; j < bestCoverageIndex; j++) {
            for (int k = j + 1; k < bestCoverageIndex; k++) {
                correlationMatrix[j][k] = Math.abs(corr.correlation(vectors[j], vectors[k]));
            }
        }
    }
    Set<Integer> topkSetIndexSet = new TreeSet<>();
    // j<30 ensures that the computational time does not explode since the powerset is 2^n in
    // complexity
    for (int j = 0; j < bestCoverageIndex && j < 30; j++) {
        topkSetIndexSet.add(j);
    }
    Set<Set<Integer>> allCombinations = Sets.powerSet(topkSetIndexSet);
    double bestOptCoverage = Double.MIN_VALUE;
    Set<Integer> opttopkSetIndexSet = null;
    for (Set<Integer> combination : allCombinations) {
        if (isUncorrelated(correlationMatrix, combination)) {
            double currentCoverage = 0.0;
            Set<Integer> topkCombination = new TreeSet<>();
            for (Integer index : combination) {
                topkCombination.add(topkIndex[index]);
            }
            for (Set<Integer> cfsSet : cfsSets) {
                currentCoverage += (coverage(topkCombination, cfsSet) / traindataSet.size());
            }
            if (currentCoverage > bestOptCoverage) {
                bestOptCoverage = currentCoverage;
                opttopkSetIndexSet = combination;
            }
        }
    }
    Set<Integer> opttopkIndex = new TreeSet<>();
    for (Integer index : opttopkSetIndexSet) {
        opttopkIndex.add(topkIndex[index]);
    }
    Console.traceln(Level.FINE, "selected the following metrics:");
    for (Integer index : opttopkIndex) {
        Console.traceln(Level.FINE, traindataSet.get(0).attribute(index).name());
    }
    // finally remove attributes
    for (int j = testdata.numAttributes() - 1; j >= 0; j--) {
        if (j != testdata.classIndex() && !opttopkIndex.contains(j)) {
            testdata.deleteAttributeAt(j);
            for (Instances traindata : traindataSet) {
                traindata.deleteAttributeAt(j);
            }
        }
    }
}

From source file:ia02classificacao.IA02Classificacao.java

/**
 * @param args the command line arguments
 *//*from ww w .  j av  a2s  . c  o  m*/
public static void main(String[] args) throws Exception {

    // abre o banco de dados arff e mostra a quantidade de instancias (linhas)
    DataSource arquivo = new DataSource("data/zoo.arff");
    Instances dados = arquivo.getDataSet();
    System.out.println("Instancias lidas: " + dados.numInstances());

    // FILTER: remove o atributo nome do animal da classificao
    String[] parametros = new String[] { "-R", "1" };
    Remove filtro = new Remove();
    filtro.setOptions(parametros);
    filtro.setInputFormat(dados);
    dados = Filter.useFilter(dados, filtro);

    AttributeSelection selAtributo = new AttributeSelection();
    InfoGainAttributeEval avaliador = new InfoGainAttributeEval();
    Ranker busca = new Ranker();
    selAtributo.setEvaluator(avaliador);
    selAtributo.setSearch(busca);
    selAtributo.SelectAttributes(dados);
    int[] indices = selAtributo.selectedAttributes();
    System.out.println("Selected attributes: " + Utils.arrayToString(indices));

    // Usa o algoritimo J48 e mostra a classificao dos dados em forma textual
    String[] opcoes = new String[1];
    opcoes[0] = "-U";
    J48 arvore = new J48();
    arvore.setOptions(opcoes);
    arvore.buildClassifier(dados);
    System.out.println(arvore);

    // Usa o algoritimo J48 e mostra a classificao de dados em forma grafica
    /*
    TreeVisualizer tv = new TreeVisualizer(null, arvore.graph(), new PlaceNode2());
    JFrame frame = new javax.swing.JFrame("?rvore de Conhecimento");
    frame.setSize(800,500);
    frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    frame.getContentPane().add(tv);
    frame.setVisible(true);
    tv.fitToScreen();
    */

    /*
    * Classificao de novos dados
    */

    System.out.println("\n\nCLASSIFICAO DE NOVOS DADOS");
    // criar atributos
    double[] vals = new double[dados.numAttributes()];
    vals[0] = 1.0; // hair
    vals[1] = 0.0; // feathers
    vals[2] = 0.0; // eggs
    vals[3] = 1.0; // milk
    vals[4] = 1.0; // airborne
    vals[5] = 0.0; // aquatic
    vals[6] = 0.0; // predator
    vals[7] = 1.0; // toothed
    vals[8] = 1.0; // backbone
    vals[9] = 1.0; // breathes
    vals[10] = 0.0; // venomous
    vals[11] = 0.0; // fins
    vals[12] = 4.0; // legs
    vals[13] = 1.0; // tail
    vals[14] = 1.0; // domestic
    vals[15] = 1.0; // catsize

    // Criar uma instncia baseada nestes atributos
    Instance meuUnicornio = new DenseInstance(1.0, vals);

    // Adicionar a instncia nos dados
    meuUnicornio.setDataset(dados);

    // Classificar esta nova instncia
    double label = arvore.classifyInstance(meuUnicornio);

    // Imprimir o resultado da classificao
    System.out.println("Novo Animal: Unicrnio");
    System.out.println("classificacao: " + dados.classAttribute().value((int) label));

    /*
    * Avaliao e predio de erros de mtrica
    */
    System.out.println("\n\nAVALIAO E PREDIO DE ERROS DE MTRICA");
    Classifier cl = new J48();
    Evaluation eval_roc = new Evaluation(dados);
    eval_roc.crossValidateModel(cl, dados, 10, new Random(1), new Object[] {});
    System.out.println(eval_roc.toSummaryString());

    /*
    * Matriz de confuso
    */
    System.out.println("\n\nMATRIZ DE CONFUSO");
    double[][] confusionMatrix = eval_roc.confusionMatrix();
    System.out.println(eval_roc.toMatrixString());

}