Example usage for weka.filters.supervised.attribute AttributeSelection setSearch

List of usage examples for weka.filters.supervised.attribute AttributeSelection setSearch

Introduction

In this page you can find the example usage for weka.filters.supervised.attribute AttributeSelection setSearch.

Prototype

public void setSearch(ASSearch search) 

Source Link

Document

Set search class

Usage

From source file:com.ivanrf.smsspam.SpamClassifier.java

License:Apache License

private static FilteredClassifier initFilterClassifier(int wordsToKeep, String tokenizerOp,
        boolean useAttributeSelection, String classifierOp, boolean boosting) throws Exception {
    StringToWordVector filter = new StringToWordVector();
    filter.setDoNotOperateOnPerClassBasis(true);
    filter.setLowerCaseTokens(true);/*from   w w w. j a  v a 2  s  .  com*/
    filter.setWordsToKeep(wordsToKeep);

    if (!tokenizerOp.equals(TOKENIZER_DEFAULT)) {
        //Make a tokenizer
        WordTokenizer wt = new WordTokenizer();
        if (tokenizerOp.equals(TOKENIZER_COMPLETE))
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}");
        else //TOKENIZER_COMPLETE_NUMBERS)
            wt.setDelimiters(" \r\n\t.,;:\'\"()?!-+*&#$%/=<>[]_`@\\^{}|~0123456789");
        filter.setTokenizer(wt);
    }

    FilteredClassifier classifier = new FilteredClassifier();
    classifier.setFilter(filter);

    if (useAttributeSelection) {
        AttributeSelection as = new AttributeSelection();
        as.setEvaluator(new InfoGainAttributeEval());
        Ranker r = new Ranker();
        r.setThreshold(0);
        as.setSearch(r);

        MultiFilter mf = new MultiFilter();
        mf.setFilters(new Filter[] { filter, as });

        classifier.setFilter(mf);
    }

    if (classifierOp.equals(CLASSIFIER_SMO))
        classifier.setClassifier(new SMO());
    else if (classifierOp.equals(CLASSIFIER_NB))
        classifier.setClassifier(new NaiveBayes());
    else if (classifierOp.equals(CLASSIFIER_IB1))
        classifier.setClassifier(new IBk(1));
    else if (classifierOp.equals(CLASSIFIER_IB3))
        classifier.setClassifier(new IBk(3));
    else if (classifierOp.equals(CLASSIFIER_IB5))
        classifier.setClassifier(new IBk(5));
    else if (classifierOp.equals(CLASSIFIER_PART))
        classifier.setClassifier(new PART()); //Tarda mucho

    if (boosting) {
        AdaBoostM1 boost = new AdaBoostM1();
        boost.setClassifier(classifier.getClassifier());
        classifier.setClassifier(boost); //Con NB tarda mucho
    }

    return classifier;
}

From source file:com.relationalcloud.main.Explanation.java

License:Open Source License

/**
 * @param args//from  w  w w .j a v  a2  s .  co m
 */
public static void main(String[] args) {

    // LOADING PROPERTY FILE AND DRIVER
    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // LOAD PROPERTIES FROM CONFIGURATION FILE
    String connection = ini.getProperty("conn");
    String schemaname = ini.getProperty("schema");

    String user = ini.getProperty("user");
    String password = ini.getProperty("password");
    String txnLogTable = ini.getProperty("txnLogTable");
    String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates");

    int numPart = Integer.parseInt(ini.getProperty("numPartitions"));

    // Initialize the Justification Handler
    ExplanationHandler jh = new ExplanationHandler(ini);

    System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :"
            + jh.dbPropertyFile);

    try {

        // CREATE A DB CONNEctioN
        Connection conn = DriverManager.getConnection(connection + schemaname, user, password);
        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES
        ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable,
                numb_trans_to_process, schemaname, conn, schema);

        // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN
        for (String tableProcessed : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + tableProcessed);

            // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT
            Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn);

            // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE
            if (data.numAttributes() < 2) {
                System.out.println("No transactions touches this table, nothing to be done.");
                continue;
            }
            // INSTANTIATE THE CLASSIFIER
            String[] options;
            options = new String[3];
            options[0] = "-P";
            options[1] = "-C";
            options[2] = ini.getProperty("Explanation.j48PruningConfidence");
            J48 classifier = new J48(); // new instance of tree
            classifier.setOptions(options); // set the options

            Boolean attributeFilter = true;
            // ATTRIBUTE FILTERING
            Instances newData;
            if (data.numClasses() > 1 && attributeFilter) {
                AttributeSelection filter = new AttributeSelection();

                //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES
                //InfoGainAttributeEval eval = new InfoGainAttributeEval();
                //Ranker search = new Ranker();
                //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2")));
                CfsSubsetEval eval = new CfsSubsetEval();
                GreedyStepwise search = new GreedyStepwise();

                search.setSearchBackwards(true);
                filter.setEvaluator(eval);
                filter.setSearch(search);
                filter.setInputFormat(data);
                newData = Filter.useFilter(data, filter);
            } else {
                newData = data;
            }

            String atts = "";
            Enumeration e = newData.enumerateAttributes();
            ArrayList<String> attributesForPopulation = new ArrayList<String>();
            while (e.hasMoreElements()) {
                String s = ((Attribute) e.nextElement()).name();
                attributesForPopulation.add(s);
                atts += s + ", ";
            }
            atts = atts.substring(0, atts.length() - 2);

            System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to "
                    + (newData.numAttributes() - 1) + " (" + atts + ")");

            data = null;
            System.gc();

            if (newData.numInstances() < 1) {
                System.err.println("The are no data in the table, skipping classification");
                continue;
            }

            if (newData.numInstances() > 0) {
                if (newData.classAttribute().numValues() > 1) {
                    // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES
                    ExplanationHandler.trainClassifier(newData, classifier);

                    if (classifier.measureNumLeaves() == 1) {

                        int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance());
                        System.out.println(
                                "The classifier decided to put all the tuplesi in the table in one partition: "
                                        + partitionvalue);
                        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                            jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                    conn);
                        }

                    }

                    // POPULATING THE justifiedpartition column with the result of this
                    // classifier if required
                    else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn,
                                numPart, newData.classAttribute().enumerateValues());
                    }

                } else { // easy case... the class attribute is unary!!
                    int partitionvalue = ((int) newData.firstInstance()
                            .value(newData.firstInstance().classIndex()));
                    System.out.println("The table is all stored in one partition, no need to use classifier");
                    if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) {
                        jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation,
                                conn);
                    }
                }
            } else
                throw new Exception("The Instances is empty");

        }

        // SET HASH PARTITION / REPLICATED PARTITION
        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) {
            jh.populateHashPartition(conn);
        }

        if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) {
            jh.populateReplicatedPartition(conn,
                    Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate")));
        }

        conn.close();
    } catch (SQLException e) {
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:es.jarias.FMC.FMC.java

License:Open Source License

public static void buildModel(MultiLabelInstances trainData, MultiLabelInstances testData, int fold,
        String baseClassifierClass, String discType, String fss, String outPath, String prune)
        throws Exception {

    double start = System.nanoTime();

    try {/*from w w w .  j a  v a2  s . c o m*/

        // DATA PREPROCESING:

        weka.filters.unsupervised.attribute.Discretize m_unsuperDiscretize = null;

        if (discType.equals("supervised")) {
            // pass
            // Supervised discretization is applied to each model later during the training step.
        } else if (discType.equals("unsupervised")) {
            // Apply a baseline discretization filter:
            m_unsuperDiscretize = new weka.filters.unsupervised.attribute.Discretize();
            m_unsuperDiscretize.setUseEqualFrequency(false);
            m_unsuperDiscretize.setBins(3);
            m_unsuperDiscretize.setInputFormat(trainData.getDataSet());

            trainData = trainData
                    .reintegrateModifiedDataSet(Filter.useFilter(trainData.getDataSet(), m_unsuperDiscretize));
        } else
            throw new Exception("Invalid Discretization Type");

        if (!fss.equals("no") && !fss.equals("CFS"))
            throw new Exception("Invalid FSS strategy");

        if (!prune.equals("full") && !prune.equals("tree") && !prune.equals("best") && !prune.equals("hiton")
                && !prune.equals("bdeu"))
            throw new Exception("Invalid Pruning strategy");

        // Label information
        int m_numLabels = trainData.getNumLabels();
        int[] m_labelIndices = trainData.getLabelIndices();

        // Map for reference:
        HashMap<Integer, Integer> mapLabels = new HashMap<Integer, Integer>(m_numLabels);
        String[] mapLabelsName = new String[m_numLabels];
        for (int l = 0; l < m_numLabels; l++) {
            mapLabels.put(trainData.getLabelIndices()[l], l);
            mapLabelsName[l] = trainData.getDataSet().attribute(trainData.getLabelIndices()[l]).name();
        }

        // Get label combinations:
        int m_numPairs = (m_labelIndices.length * (m_labelIndices.length - 1)) / 2;
        int[][] labelCombinations = new int[m_numPairs][2];

        int counter = 0;
        for (int i = 0; i < m_labelIndices.length; i++) {
            for (int j = i + 1; j < m_labelIndices.length; j++) {
                labelCombinations[counter] = new int[] { m_labelIndices[i], m_labelIndices[j] };
                counter++;
            }
        }

        // Select the pairs:
        int m_numSelected = m_numPairs;
        int m_numSingleton = 0;
        int[] ordered;
        boolean[] selectedPair = new boolean[m_numPairs];
        boolean[] singleton = new boolean[m_numLabels];

        for (int i = 0; i < m_numPairs; i++)
            selectedPair[i] = true;

        if (!prune.equals("full")) {

            m_numSelected = 0;
            selectedPair = new boolean[m_numPairs];

            // Info gain for pruned model:
            double[][] mutualInfoPairs = mutualInfo(trainData.getDataSet(), trainData.getLabelIndices());
            double[] mutualInfo = new double[m_numPairs];
            counter = 0;
            for (int i = 0; i < m_labelIndices.length; i++) {
                Instances tempInstances = new Instances(trainData.getDataSet());
                tempInstances.setClassIndex(m_labelIndices[i]);

                for (int j = i + 1; j < m_labelIndices.length; j++) {
                    mutualInfo[counter] = mutualInfoPairs[i][j];
                    counter++;
                }
            }

            ordered = orderBy(mutualInfo);

            if (prune.equals("tree")) {
                // Each labels correspond to its own connex component 
                HashMap<Integer, ArrayList<Integer>> tree_compo = new HashMap<Integer, ArrayList<Integer>>(
                        m_numLabels);
                HashMap<Integer, Integer> tree_index = new HashMap<Integer, Integer>(m_numLabels);

                for (int i = 0; i < m_numLabels; i++) {
                    tree_compo.put(i, new ArrayList<Integer>());
                    tree_compo.get(i).add(i);
                    tree_index.put(i, i);
                }

                for (int i = 0; i < m_numPairs; i++) {
                    if (m_numSelected >= m_numLabels - 1)
                        break;

                    int pairIndex = ordered[i];
                    int pair_i = mapLabels.get(labelCombinations[pairIndex][0]);
                    int pair_j = mapLabels.get(labelCombinations[pairIndex][1]);

                    int conex_i = tree_index.get(pair_i);
                    int conex_j = tree_index.get(pair_j);

                    if (conex_i != conex_j) {
                        ArrayList<Integer> family = tree_compo.get(conex_j);
                        tree_compo.get(conex_i).addAll(family);
                        for (int element : family) {
                            tree_index.put(element, conex_i);
                        }

                        selectedPair[pairIndex] = true;
                        m_numSelected++;
                    }
                }
            } // End of the chow-liu algorithm

            if (prune.equals("best") || prune.equals("tree")) {
                int amount = 0;
                if (prune.equals("best"))
                    amount = (int) (m_numLabels * 2);

                int index = 0;
                while (m_numSelected < amount && index < m_numPairs) {
                    if (!selectedPair[ordered[index]]) {
                        m_numSelected++;
                        selectedPair[ordered[index]] = true;
                    }

                    index++;
                }
            } // End of the linear tree and best procedures

            if (prune.equals("hiton")) {
                weka.filters.unsupervised.attribute.Remove m_remove = new weka.filters.unsupervised.attribute.Remove();
                m_remove.setAttributeIndicesArray(trainData.getLabelIndices());
                m_remove.setInvertSelection(true);
                m_remove.setInputFormat(trainData.getDataSet());
                Instances hitonData = Filter.useFilter(trainData.getDataSet(), m_remove);

                HITON hiton = new HITON(hitonData);

                HashSet<Integer>[] markovBlanket = new HashSet[m_numLabels];
                for (int l = 0; l < m_numLabels; l++)
                    markovBlanket[l] = hiton.HITONMB(l);

                for (int p = 0; p < m_numPairs; p++) {
                    int p_i = mapLabels.get(labelCombinations[p][0]);
                    int p_j = mapLabels.get(labelCombinations[p][1]);

                    if (markovBlanket[p_i].contains(p_j) || markovBlanket[p_j].contains(p_i)) {
                        selectedPair[p] = true;
                        m_numSelected++;
                    }
                }

            } // end of the hiton pruning algorithm

            if (prune.equals("bdeu")) {
                weka.filters.unsupervised.attribute.Remove m_remove = new weka.filters.unsupervised.attribute.Remove();
                m_remove.setAttributeIndicesArray(trainData.getLabelIndices());
                m_remove.setInvertSelection(true);
                m_remove.setInputFormat(trainData.getDataSet());
                Instances hitonData = Filter.useFilter(trainData.getDataSet(), m_remove);

                BDeu hiton = new BDeu(hitonData);
                double[] scores = hiton.singleScore;

                double[] pairScores = new double[m_numPairs];
                double[] sumScores = new double[m_numLabels];
                for (int p = 0; p < m_numPairs; p++) {
                    int head = mapLabels.get(labelCombinations[p][0]);
                    int tail = mapLabels.get(labelCombinations[p][1]);
                    pairScores[p] = -1 * (scores[tail] - (hiton.localBdeuScore(tail, new Integer[] { head })));

                    sumScores[tail] += pairScores[p];
                    sumScores[head] += pairScores[p];
                }

                HashSet<Integer>[] parents = new HashSet[m_numLabels];
                for (int i = 0; i < m_numLabels; i++)
                    parents[i] = new HashSet<Integer>();

                ordered = orderBy(pairScores);

                int[] topologicalOrdering = orderBy(sumScores);

                int[] relevance = new int[m_numLabels];
                for (int i = 0; i < m_numLabels; i++)
                    relevance[topologicalOrdering[i]] = i;

                for (int p = 0; p < m_numPairs; p++) {
                    int pair = ordered[p];

                    int head = mapLabels.get(labelCombinations[pair][0]);
                    int tail = mapLabels.get(labelCombinations[pair][1]);

                    if (relevance[head] > relevance[tail]) {
                        int aux = head;
                        head = tail;
                        tail = aux;
                    }

                    // Check if adding this improves
                    parents[tail].add(head);
                    double scoreAdd = hiton.localBdeuScore(tail,
                            parents[tail].toArray(new Integer[parents[tail].size()]));
                    double diff = scores[tail] - scoreAdd;

                    if (diff < 0) {
                        scores[tail] = scoreAdd;
                        selectedPair[pair] = true;
                        m_numSelected++;
                    } else {
                        parents[tail].remove(head);
                    }
                } // End of the BDeu procedure

            } // End of the Pruning algorithms 

            //
            // Determine singleton variables
            for (int i = 0; i < m_labelIndices.length; i++)
                singleton[i] = true;

            for (int p = 0; p < m_numPairs; p++) {
                if (selectedPair[p]) {
                    singleton[mapLabels.get(labelCombinations[p][0])] = false;
                    singleton[mapLabels.get(labelCombinations[p][1])] = false;
                }
            }

            for (int i = 0; i < m_labelIndices.length; i++)
                if (singleton[i])
                    m_numSingleton++;

            mutualInfo = null;
        }

        // Generate single class datasets from the full ML data and learn models:
        HashMap<Integer, Classifier> models = new HashMap<Integer, Classifier>();
        HashMap<Integer, Classifier> singletonModels = new HashMap<Integer, Classifier>();
        HashMap<Integer, weka.filters.supervised.attribute.AttributeSelection> singletonFilterSel = new HashMap<Integer, weka.filters.supervised.attribute.AttributeSelection>();
        HashMap<Integer, weka.filters.supervised.attribute.Discretize> singletonFilter = new HashMap<Integer, weka.filters.supervised.attribute.Discretize>();
        weka.filters.supervised.attribute.AttributeSelection[] m_selecters = new weka.filters.supervised.attribute.AttributeSelection[m_numPairs];
        weka.filters.supervised.attribute.Discretize[] m_discretizers = new weka.filters.supervised.attribute.Discretize[m_numPairs];

        ClassCompoundTransformation[] converters = new ClassCompoundTransformation[m_numPairs];

        for (int i = 0; i < m_numPairs; i++) {

            if (!selectedPair[i]) {
                continue;
            }

            MultiLabelInstances filteredLabelData = trainData
                    .reintegrateModifiedDataSet(RemoveAllLabels.transformInstances(trainData.getDataSet(),
                            complement(m_labelIndices, labelCombinations[i])));

            converters[i] = new ClassCompoundTransformation();

            Instances singleLabelData = converters[i].transformInstances(filteredLabelData);

            if (discType.equals("supervised")) {
                m_discretizers[i] = new Discretize();
                m_discretizers[i].setInputFormat(singleLabelData);
                singleLabelData = Filter.useFilter(singleLabelData, m_discretizers[i]);
            }

            if (fss.equals("CFS")) {

                m_selecters[i] = new weka.filters.supervised.attribute.AttributeSelection();
                m_selecters[i].setSearch(new weka.attributeSelection.BestFirst());
                m_selecters[i].setEvaluator(new weka.attributeSelection.CfsSubsetEval());
                m_selecters[i].setInputFormat(singleLabelData);
                singleLabelData = Filter.useFilter(singleLabelData, m_selecters[i]);

            }

            models.put(i, (Classifier) Class.forName("weka.classifiers." + baseClassifierClass).newInstance());
            models.get(i).buildClassifier(singleLabelData);
        }

        // Learn singleton models:
        for (int i = 0; i < m_labelIndices.length; i++) {
            if (singleton[i]) {

                Instances singleLabelData = new Instances(trainData.getDataSet());
                singleLabelData.setClassIndex(m_labelIndices[i]);
                singleLabelData = RemoveAllLabels.transformInstances(singleLabelData,
                        complement(m_labelIndices, new int[] { m_labelIndices[i] }));

                if (discType.equals("supervised")) {
                    singletonFilter.put(i, new Discretize());
                    singletonFilter.get(i).setInputFormat(singleLabelData);
                    singleLabelData = Filter.useFilter(singleLabelData, singletonFilter.get(i));
                }

                if (fss.equals("CFS")) {
                    weka.filters.supervised.attribute.AttributeSelection tempFilter = new weka.filters.supervised.attribute.AttributeSelection();
                    tempFilter.setSearch(new weka.attributeSelection.BestFirst());
                    tempFilter.setEvaluator(new weka.attributeSelection.CfsSubsetEval());
                    tempFilter.setInputFormat(singleLabelData);
                    singletonFilterSel.put(i, tempFilter);
                    singleLabelData = Filter.useFilter(singleLabelData, singletonFilterSel.get(i));
                }

                Classifier single;

                single = (Classifier) Class.forName("weka.classifiers." + baseClassifierClass).newInstance();

                single.buildClassifier(singleLabelData);
                singletonModels.put(i, single);
            }
        }

        //
        // END OF THE LEARNING STAGE
        //

        double train = System.nanoTime() - start;
        start = System.nanoTime();

        Writer writerConf = null;
        Writer writerDist = null;
        Writer writerSing = null;
        Writer writerLayo = null;

        try {

            writerConf = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/conf_" + fold + ".txt"), "utf-8"));

            writerDist = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/dist_" + fold + ".txt"), "utf-8"));

            writerSing = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/sing_" + fold + ".txt"), "utf-8"));

            writerLayo = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/layo_" + fold + ".txt"), "utf-8"));
            for (int l = 0; l < m_numLabels; l++) {
                writerLayo.write(trainData.getDataSet().attribute(m_labelIndices[l]).numValues() + "\t");
            }
            writerLayo.write("\n");
            writerLayo.write(m_numSelected + "\t" + m_numSingleton);
            writerLayo.close();

            // Get distributions for instance for each variable pairs:
            double[] distributions;

            for (int i = 0; i < testData.getDataSet().size(); i++) {

                for (int l : testData.getLabelIndices())
                    writerConf.write((int) testData.getDataSet().instance(i).value(l) + "\t");

                writerConf.write("\n");

                Instance inst = testData.getDataSet().get(i);

                if (discType.equals("unsupervised")) {
                    m_unsuperDiscretize.input(inst);
                    inst = m_unsuperDiscretize.output();
                }

                for (int p = 0; p < m_numPairs; p++) {
                    if (!selectedPair[p]) {
                        continue;
                    }

                    Instance processed = converters[p].transformInstance(inst, testData.getLabelIndices());

                    if (discType.equals("supervised")) {
                        m_discretizers[p].input(processed);
                        processed = m_discretizers[p].output();

                        //                  m_removers[p].input(processed);
                        //                  processed = m_removers[p].output();
                    }

                    if (!fss.equals("no")) {
                        m_selecters[p].input(processed);
                        processed = m_selecters[p].output();
                    }

                    distributions = models.get(p).distributionForInstance(processed);

                    writerDist.write(mapLabels.get(labelCombinations[p][0]) + "\t"
                            + mapLabels.get(labelCombinations[p][1]) + "\t");

                    for (int d = 0; d < distributions.length; d++)
                        writerDist.write(distributions[d] + "\t");

                    writerDist.write("\n");
                }

                // Get predictions for singleton labels:
                for (int m = 0; m < m_labelIndices.length; m++) {
                    if (singleton[m]) {
                        Instance processed = RemoveAllLabels.transformInstance(inst,
                                complement(m_labelIndices, new int[] { m_labelIndices[m] }));

                        if (discType.equals("supervised")) {
                            singletonFilter.get(m).input(processed);
                            processed = singletonFilter.get(m).output();
                        }

                        if (!fss.equals("no")) {
                            singletonFilterSel.get(m).input(processed);
                            processed = singletonFilterSel.get(m).output();
                        }

                        double[] distribution = singletonModels.get(m).distributionForInstance(processed);

                        double maxValue = 0;
                        int conf = -1;

                        for (int v = 0; v < distribution.length; v++) {
                            if (distribution[v] > maxValue) {
                                maxValue = distribution[v];
                                conf = v;
                            }
                        }
                        writerSing.write(i + "\t" + m + "\t" + conf + "\n");
                    }
                }
            }

            writerConf.close();
            writerDist.close();
            writerSing.close();

            double test = System.nanoTime() - start;

            //         train /= 1000000000.0;
            //         test /=  1000000000.0;
            //         System.out.println(java.lang.String.format("FMC-%s\t%s\t%s\t%d\t%s\t%s\t%.4f\t%.4f",prune,baseClassifierClass,dbName,fold,discType,fss,train,test));
        } catch (IOException ex) {
            // report
        } finally {
            try {
                writerConf.close();
            } catch (Exception ex) {
            }
            try {
                writerDist.close();
            } catch (Exception ex) {
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:etc.aloe.oilspill2010.FeatureGenerationImpl.java

License:Open Source License

protected Filter getFeatureSelectionFilter(ExampleSet examples) throws Exception {

    AttributeSelection filter = new AttributeSelection(); // package weka.filters.supervised.attribute!
    //CfsSubsetEval eval = new CfsSubsetEval();

    //CorrelationAttributeEval eval = new CorrelationAttributeEval();
    //InfoGainAttributeEval eval = new InfoGainAttributeEval();

    ReliefFAttributeEval eval = new ReliefFAttributeEval();

    //GreedyStepwise search = new GreedyStepwise();
    //search.setNumToSelect(980);
    //search.setSearchBackwards(true);

    Ranker search = new Ranker();
    search.setNumToSelect(980);/*from  www  . ja  va  2  s . c om*/

    filter.setEvaluator(eval);
    filter.setSearch(search);

    filter.setInputFormat(examples.getInstances());
    Instances filtered = Filter.useFilter(examples.getInstances(), filter);
    examples.setInstances(filtered);

    return filter;
}

From source file:mao.datamining.DataSetPair.java

/**
 * Pre-Process the training data set with:
 * RemoveUselessColumnsByMissingValues filter
 * SpreadSubsample filter to shrink the majority class instances 
 * AttributeSelection filter with CfsSubsetEval and LinearForwardSelection
 *///from w ww.  ja v a2  s . co  m
private void processTrainRawData() {
    System.out.println("====================" + this.trainFileName + "====================");
    System.out.println("====================" + this.trainFileName + "====================");
    System.out.println("====================" + this.trainFileName + "====================");
    finalTrainAttrList.clear();
    try {
        doItOnce4All();
        String sampleFilePath = null;
        //step 2, either over sample, or under sample
        //weka.filters.supervised.instance.SpreadSubsample
        if (this.resampleMethod.equalsIgnoreCase(resampleUnder)) {
            System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterUnderSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleOver)) {
            System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterOverSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleNone)) {
            //do nothing,
            System.out.println("None Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleMatrix)) {
            //do nothing
            System.out.println("Matrix Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff";
        } else {
            doNotSupport();
        }
        Instances newData = ConverterUtils.DataSource.read(sampleFilePath);
        newData.setClassIndex(newData.numAttributes() - 1);
        //            Main.logging("== New Data After Resampling class instances: ===\n" + newData.toSummaryString());

        //Step 3, select features
        AttributeSelection attrSelectionFilter = new AttributeSelection();
        ASEvaluation eval = null;
        ASSearch search = null;

        //ranker
        if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionA)) {
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            eval = new weka.attributeSelection.InfoGainAttributeEval();
            //weka.attributeSelection.Ranker -T 0.02 -N -1
            search = new Ranker();
            String rankerOptios[] = { "-T", "0.01", "-N", "-1" };
            if (resampleMethod.equalsIgnoreCase(resampleOver)) {
                rankerOptios[1] = "0.1";
            }
            ((Ranker) search).setOptions(rankerOptios);
            Main.logging("== Start to Select Features with InfoGainAttributeEval and Ranker");
        }
        //weka.attributeSelection.LinearForwardSelection -D 0 -N 5 -I -K 50 -T 0
        else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionB)) {
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            eval = new CfsSubsetEval();
            search = new LinearForwardSelection();
            String linearOptios[] = { "-D", "0", "-N", "5", "-I", "-K", "50", "-T", "0" };
            ((LinearForwardSelection) search).setOptions(linearOptios);
            Main.logging("== Start to Select Features with CfsSubsetEval and LinearForwardSelection");
        } else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionNo)) {
            System.out.println("None Selection ssssssssssssssssssssssssssssssssssssss");
            Main.logging("No Feature Selection Method");
        } else {
            doNotSupport();
        }

        if (eval != null) {
            attrSelectionFilter.setEvaluator(eval);
            attrSelectionFilter.setSearch(search);
            attrSelectionFilter.setInputFormat(newData);
            newData = Filter.useFilter(newData, attrSelectionFilter);
        }

        Main.logging("== New Data After Selecting Features: ===\n" + newData.toSummaryString());

        //finally, write the final dataset to file system

        try (BufferedWriter writer = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(this.trainFileName)))) {
            writer.write(newData.toString());
        }

        int numAttributes = newData.numAttributes();
        for (int i = 0; i < numAttributes; i++) {
            String attrName = newData.attribute(i).name();
            finalTrainAttrList.add(attrName);
        }
        Main.logging(finalTrainAttrList.toString());
        //            //set the final train dataset
        finalTrainDataSet = newData;
        finalTrainDataSet.setClassIndex(finalTrainDataSet.numAttributes() - 1);

        Main.logging("train dataset class attr: " + finalTrainDataSet.classAttribute().toString());
    } catch (Exception ex) {
        Main.logging(null, ex);
    }

}

From source file:mlpoc.MLPOC.java

/**
 * uses the filter/*from   w  w w  .  j a v  a  2  s.c  om*/
 */
protected static void useFilter(Instances data) throws Exception {
    System.out.println("\n2. Filter");
    weka.filters.supervised.attribute.AttributeSelection filter = new weka.filters.supervised.attribute.AttributeSelection();
    CfsSubsetEval eval = new CfsSubsetEval();
    GreedyStepwise search = new GreedyStepwise();
    search.setSearchBackwards(true);
    filter.setEvaluator(eval);
    filter.setSearch(search);
    filter.setInputFormat(data);
    Instances newData = Filter.useFilter(data, filter);
    System.out.println(newData);
}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.FeaturesSelector.java

License:Apache License

/**
 * Method to filter the input data using GreedyStepwise approach.
 *
 * @throws Exception the exception//from   w w  w . ja v  a  2  s .  c o m
 */
public void filterData() throws Exception {

    this.confirmationMessage = new ArrayList<String>();

    Instances inputData, outputData;
    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    inputData = fileLoader.getDataSet();

    inputData.setClassIndex(inputData.numAttributes() - 1);
    AttributeSelection filter = new AttributeSelection();
    CfsSubsetEval eval = new CfsSubsetEval();
    GreedyStepwise search = new GreedyStepwise();
    search.setSearchBackwards(true);
    filter.setEvaluator(eval);
    filter.setSearch(search);
    filter.setInputFormat(inputData);
    outputData = Filter.useFilter(inputData, filter);

    int indices = outputData.numAttributes();
    String selectedAttributesString = "";
    for (int i = 0; i < indices; i++) {
        selectedAttributesString += "\n" + outputData.attribute(i).toString() + ", ";
    }
    selectedAttributesString = selectedAttributesString.substring(0, selectedAttributesString.length() - 2);

    saveFilteredData(inputFile, outputData);

}

From source file:trainableSegmentation.WekaSegmentation.java

License:GNU General Public License

/**
 * Select attributes using BestFirst search to reduce
 * the number of parameters per instance of a dataset
 *
 * @param data input set of instances/* w  ww  . j ava 2s . c om*/
 * @return resampled set of instances
 */
public static Instances selectAttributes(Instances data) {
    final AttributeSelection filter = new AttributeSelection();
    Instances filteredIns = null;
    // Evaluator
    final CfsSubsetEval evaluator = new CfsSubsetEval();
    evaluator.setMissingSeparate(true);
    // Assign evaluator to filter
    filter.setEvaluator(evaluator);
    // Search strategy: best first (default values)
    final BestFirst search = new BestFirst();
    filter.setSearch(search);
    // Apply filter
    try {
        filter.setInputFormat(data);

        filteredIns = Filter.useFilter(data, filter);
    } catch (Exception e) {
        IJ.log("Error when resampling input data with selected attributes!");
        e.printStackTrace();
    }
    return filteredIns;

}