Example usage for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex)

Source Link

Document

Sets the class index of the set.

Usage

From source file:es.jarias.FMC.ClassCompoundTransformation.java

License:Open Source License

/**
 * /*from w w w  . j a  v a  2s  . c o  m*/
 * @param mlData
 * @return the transformed instances
 * @throws Exception
 */
public Instances transformInstances(MultiLabelInstances mlData) throws Exception {
    data = mlData.getDataSet();
    numLabels = mlData.getNumLabels();
    labelIndices = mlData.getLabelIndices();

    Instances newData = null;

    // This must be different in order to combine ALL class states, not only existing ones.
    // gather distinct label combinations
    // ASSUME CLASSES ARE BINARY

    ArrayList<LabelSet> labelSets = new ArrayList<LabelSet>();

    double[] dblLabels = new double[numLabels];
    double nCombinations = Math.pow(2, numLabels);

    for (int i = 0; i < nCombinations; i++) {
        for (int l = 0; l < numLabels; l++) {
            int digit = (int) Math.pow(2, numLabels - 1 - l);
            dblLabels[l] = (digit & i) / digit;
        }

        LabelSet labelSet = new LabelSet(dblLabels);
        labelSets.add(labelSet);
    }

    //        for (int i = 0; i < numInstances; i++) {
    //            // construct labelset
    //            double[] dblLabels = new double[numLabels];
    //            for (int j = 0; j < numLabels; j++) {
    //                int index = labelIndices[j];
    //                dblLabels[j] = Double.parseDouble(data.attribute(index).value((int) data.instance(i).value(index)));
    //            }
    //            LabelSet labelSet = new LabelSet(dblLabels);
    //
    //            // add labelset if not already present
    //            labelSets.add(labelSet);
    //        }

    // create class attribute
    ArrayList<String> classValues = new ArrayList<String>(labelSets.size());
    for (LabelSet subset : labelSets) {
        classValues.add(subset.toBitString());
    }
    newClass = new Attribute("class", classValues);

    //        for (String s : classValues)
    //        {
    //           System.out.print(s+", ");
    //           
    //        }
    //        System.out.println();

    // remove all labels
    newData = RemoveAllLabels.transformInstances(data, labelIndices);

    // add new class attribute
    newData.insertAttributeAt(newClass, newData.numAttributes());
    newData.setClassIndex(newData.numAttributes() - 1);

    // add class values
    for (int i = 0; i < newData.numInstances(); i++) {
        //System.out.println(newData.instance(i).toString());
        String strClass = "";
        for (int j = 0; j < numLabels; j++) {
            int index = labelIndices[j];
            strClass = strClass + data.attribute(index).value((int) data.instance(i).value(index));
        }
        //System.out.println(strClass);
        newData.instance(i).setClassValue(strClass);
    }
    transformedFormat = new Instances(newData, 0);
    return newData;
}

From source file:es.jarias.FMC.FMC.java

License:Open Source License

public static void buildModel(MultiLabelInstances trainData, MultiLabelInstances testData, int fold,
        String baseClassifierClass, String discType, String fss, String outPath, String prune)
        throws Exception {

    double start = System.nanoTime();

    try {/*w  w  w  .ja  v a2 s  .c  om*/

        // DATA PREPROCESING:

        weka.filters.unsupervised.attribute.Discretize m_unsuperDiscretize = null;

        if (discType.equals("supervised")) {
            // pass
            // Supervised discretization is applied to each model later during the training step.
        } else if (discType.equals("unsupervised")) {
            // Apply a baseline discretization filter:
            m_unsuperDiscretize = new weka.filters.unsupervised.attribute.Discretize();
            m_unsuperDiscretize.setUseEqualFrequency(false);
            m_unsuperDiscretize.setBins(3);
            m_unsuperDiscretize.setInputFormat(trainData.getDataSet());

            trainData = trainData
                    .reintegrateModifiedDataSet(Filter.useFilter(trainData.getDataSet(), m_unsuperDiscretize));
        } else
            throw new Exception("Invalid Discretization Type");

        if (!fss.equals("no") && !fss.equals("CFS"))
            throw new Exception("Invalid FSS strategy");

        if (!prune.equals("full") && !prune.equals("tree") && !prune.equals("best") && !prune.equals("hiton")
                && !prune.equals("bdeu"))
            throw new Exception("Invalid Pruning strategy");

        // Label information
        int m_numLabels = trainData.getNumLabels();
        int[] m_labelIndices = trainData.getLabelIndices();

        // Map for reference:
        HashMap<Integer, Integer> mapLabels = new HashMap<Integer, Integer>(m_numLabels);
        String[] mapLabelsName = new String[m_numLabels];
        for (int l = 0; l < m_numLabels; l++) {
            mapLabels.put(trainData.getLabelIndices()[l], l);
            mapLabelsName[l] = trainData.getDataSet().attribute(trainData.getLabelIndices()[l]).name();
        }

        // Get label combinations:
        int m_numPairs = (m_labelIndices.length * (m_labelIndices.length - 1)) / 2;
        int[][] labelCombinations = new int[m_numPairs][2];

        int counter = 0;
        for (int i = 0; i < m_labelIndices.length; i++) {
            for (int j = i + 1; j < m_labelIndices.length; j++) {
                labelCombinations[counter] = new int[] { m_labelIndices[i], m_labelIndices[j] };
                counter++;
            }
        }

        // Select the pairs:
        int m_numSelected = m_numPairs;
        int m_numSingleton = 0;
        int[] ordered;
        boolean[] selectedPair = new boolean[m_numPairs];
        boolean[] singleton = new boolean[m_numLabels];

        for (int i = 0; i < m_numPairs; i++)
            selectedPair[i] = true;

        if (!prune.equals("full")) {

            m_numSelected = 0;
            selectedPair = new boolean[m_numPairs];

            // Info gain for pruned model:
            double[][] mutualInfoPairs = mutualInfo(trainData.getDataSet(), trainData.getLabelIndices());
            double[] mutualInfo = new double[m_numPairs];
            counter = 0;
            for (int i = 0; i < m_labelIndices.length; i++) {
                Instances tempInstances = new Instances(trainData.getDataSet());
                tempInstances.setClassIndex(m_labelIndices[i]);

                for (int j = i + 1; j < m_labelIndices.length; j++) {
                    mutualInfo[counter] = mutualInfoPairs[i][j];
                    counter++;
                }
            }

            ordered = orderBy(mutualInfo);

            if (prune.equals("tree")) {
                // Each labels correspond to its own connex component 
                HashMap<Integer, ArrayList<Integer>> tree_compo = new HashMap<Integer, ArrayList<Integer>>(
                        m_numLabels);
                HashMap<Integer, Integer> tree_index = new HashMap<Integer, Integer>(m_numLabels);

                for (int i = 0; i < m_numLabels; i++) {
                    tree_compo.put(i, new ArrayList<Integer>());
                    tree_compo.get(i).add(i);
                    tree_index.put(i, i);
                }

                for (int i = 0; i < m_numPairs; i++) {
                    if (m_numSelected >= m_numLabels - 1)
                        break;

                    int pairIndex = ordered[i];
                    int pair_i = mapLabels.get(labelCombinations[pairIndex][0]);
                    int pair_j = mapLabels.get(labelCombinations[pairIndex][1]);

                    int conex_i = tree_index.get(pair_i);
                    int conex_j = tree_index.get(pair_j);

                    if (conex_i != conex_j) {
                        ArrayList<Integer> family = tree_compo.get(conex_j);
                        tree_compo.get(conex_i).addAll(family);
                        for (int element : family) {
                            tree_index.put(element, conex_i);
                        }

                        selectedPair[pairIndex] = true;
                        m_numSelected++;
                    }
                }
            } // End of the chow-liu algorithm

            if (prune.equals("best") || prune.equals("tree")) {
                int amount = 0;
                if (prune.equals("best"))
                    amount = (int) (m_numLabels * 2);

                int index = 0;
                while (m_numSelected < amount && index < m_numPairs) {
                    if (!selectedPair[ordered[index]]) {
                        m_numSelected++;
                        selectedPair[ordered[index]] = true;
                    }

                    index++;
                }
            } // End of the linear tree and best procedures

            if (prune.equals("hiton")) {
                weka.filters.unsupervised.attribute.Remove m_remove = new weka.filters.unsupervised.attribute.Remove();
                m_remove.setAttributeIndicesArray(trainData.getLabelIndices());
                m_remove.setInvertSelection(true);
                m_remove.setInputFormat(trainData.getDataSet());
                Instances hitonData = Filter.useFilter(trainData.getDataSet(), m_remove);

                HITON hiton = new HITON(hitonData);

                HashSet<Integer>[] markovBlanket = new HashSet[m_numLabels];
                for (int l = 0; l < m_numLabels; l++)
                    markovBlanket[l] = hiton.HITONMB(l);

                for (int p = 0; p < m_numPairs; p++) {
                    int p_i = mapLabels.get(labelCombinations[p][0]);
                    int p_j = mapLabels.get(labelCombinations[p][1]);

                    if (markovBlanket[p_i].contains(p_j) || markovBlanket[p_j].contains(p_i)) {
                        selectedPair[p] = true;
                        m_numSelected++;
                    }
                }

            } // end of the hiton pruning algorithm

            if (prune.equals("bdeu")) {
                weka.filters.unsupervised.attribute.Remove m_remove = new weka.filters.unsupervised.attribute.Remove();
                m_remove.setAttributeIndicesArray(trainData.getLabelIndices());
                m_remove.setInvertSelection(true);
                m_remove.setInputFormat(trainData.getDataSet());
                Instances hitonData = Filter.useFilter(trainData.getDataSet(), m_remove);

                BDeu hiton = new BDeu(hitonData);
                double[] scores = hiton.singleScore;

                double[] pairScores = new double[m_numPairs];
                double[] sumScores = new double[m_numLabels];
                for (int p = 0; p < m_numPairs; p++) {
                    int head = mapLabels.get(labelCombinations[p][0]);
                    int tail = mapLabels.get(labelCombinations[p][1]);
                    pairScores[p] = -1 * (scores[tail] - (hiton.localBdeuScore(tail, new Integer[] { head })));

                    sumScores[tail] += pairScores[p];
                    sumScores[head] += pairScores[p];
                }

                HashSet<Integer>[] parents = new HashSet[m_numLabels];
                for (int i = 0; i < m_numLabels; i++)
                    parents[i] = new HashSet<Integer>();

                ordered = orderBy(pairScores);

                int[] topologicalOrdering = orderBy(sumScores);

                int[] relevance = new int[m_numLabels];
                for (int i = 0; i < m_numLabels; i++)
                    relevance[topologicalOrdering[i]] = i;

                for (int p = 0; p < m_numPairs; p++) {
                    int pair = ordered[p];

                    int head = mapLabels.get(labelCombinations[pair][0]);
                    int tail = mapLabels.get(labelCombinations[pair][1]);

                    if (relevance[head] > relevance[tail]) {
                        int aux = head;
                        head = tail;
                        tail = aux;
                    }

                    // Check if adding this improves
                    parents[tail].add(head);
                    double scoreAdd = hiton.localBdeuScore(tail,
                            parents[tail].toArray(new Integer[parents[tail].size()]));
                    double diff = scores[tail] - scoreAdd;

                    if (diff < 0) {
                        scores[tail] = scoreAdd;
                        selectedPair[pair] = true;
                        m_numSelected++;
                    } else {
                        parents[tail].remove(head);
                    }
                } // End of the BDeu procedure

            } // End of the Pruning algorithms 

            //
            // Determine singleton variables
            for (int i = 0; i < m_labelIndices.length; i++)
                singleton[i] = true;

            for (int p = 0; p < m_numPairs; p++) {
                if (selectedPair[p]) {
                    singleton[mapLabels.get(labelCombinations[p][0])] = false;
                    singleton[mapLabels.get(labelCombinations[p][1])] = false;
                }
            }

            for (int i = 0; i < m_labelIndices.length; i++)
                if (singleton[i])
                    m_numSingleton++;

            mutualInfo = null;
        }

        // Generate single class datasets from the full ML data and learn models:
        HashMap<Integer, Classifier> models = new HashMap<Integer, Classifier>();
        HashMap<Integer, Classifier> singletonModels = new HashMap<Integer, Classifier>();
        HashMap<Integer, weka.filters.supervised.attribute.AttributeSelection> singletonFilterSel = new HashMap<Integer, weka.filters.supervised.attribute.AttributeSelection>();
        HashMap<Integer, weka.filters.supervised.attribute.Discretize> singletonFilter = new HashMap<Integer, weka.filters.supervised.attribute.Discretize>();
        weka.filters.supervised.attribute.AttributeSelection[] m_selecters = new weka.filters.supervised.attribute.AttributeSelection[m_numPairs];
        weka.filters.supervised.attribute.Discretize[] m_discretizers = new weka.filters.supervised.attribute.Discretize[m_numPairs];

        ClassCompoundTransformation[] converters = new ClassCompoundTransformation[m_numPairs];

        for (int i = 0; i < m_numPairs; i++) {

            if (!selectedPair[i]) {
                continue;
            }

            MultiLabelInstances filteredLabelData = trainData
                    .reintegrateModifiedDataSet(RemoveAllLabels.transformInstances(trainData.getDataSet(),
                            complement(m_labelIndices, labelCombinations[i])));

            converters[i] = new ClassCompoundTransformation();

            Instances singleLabelData = converters[i].transformInstances(filteredLabelData);

            if (discType.equals("supervised")) {
                m_discretizers[i] = new Discretize();
                m_discretizers[i].setInputFormat(singleLabelData);
                singleLabelData = Filter.useFilter(singleLabelData, m_discretizers[i]);
            }

            if (fss.equals("CFS")) {

                m_selecters[i] = new weka.filters.supervised.attribute.AttributeSelection();
                m_selecters[i].setSearch(new weka.attributeSelection.BestFirst());
                m_selecters[i].setEvaluator(new weka.attributeSelection.CfsSubsetEval());
                m_selecters[i].setInputFormat(singleLabelData);
                singleLabelData = Filter.useFilter(singleLabelData, m_selecters[i]);

            }

            models.put(i, (Classifier) Class.forName("weka.classifiers." + baseClassifierClass).newInstance());
            models.get(i).buildClassifier(singleLabelData);
        }

        // Learn singleton models:
        for (int i = 0; i < m_labelIndices.length; i++) {
            if (singleton[i]) {

                Instances singleLabelData = new Instances(trainData.getDataSet());
                singleLabelData.setClassIndex(m_labelIndices[i]);
                singleLabelData = RemoveAllLabels.transformInstances(singleLabelData,
                        complement(m_labelIndices, new int[] { m_labelIndices[i] }));

                if (discType.equals("supervised")) {
                    singletonFilter.put(i, new Discretize());
                    singletonFilter.get(i).setInputFormat(singleLabelData);
                    singleLabelData = Filter.useFilter(singleLabelData, singletonFilter.get(i));
                }

                if (fss.equals("CFS")) {
                    weka.filters.supervised.attribute.AttributeSelection tempFilter = new weka.filters.supervised.attribute.AttributeSelection();
                    tempFilter.setSearch(new weka.attributeSelection.BestFirst());
                    tempFilter.setEvaluator(new weka.attributeSelection.CfsSubsetEval());
                    tempFilter.setInputFormat(singleLabelData);
                    singletonFilterSel.put(i, tempFilter);
                    singleLabelData = Filter.useFilter(singleLabelData, singletonFilterSel.get(i));
                }

                Classifier single;

                single = (Classifier) Class.forName("weka.classifiers." + baseClassifierClass).newInstance();

                single.buildClassifier(singleLabelData);
                singletonModels.put(i, single);
            }
        }

        //
        // END OF THE LEARNING STAGE
        //

        double train = System.nanoTime() - start;
        start = System.nanoTime();

        Writer writerConf = null;
        Writer writerDist = null;
        Writer writerSing = null;
        Writer writerLayo = null;

        try {

            writerConf = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/conf_" + fold + ".txt"), "utf-8"));

            writerDist = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/dist_" + fold + ".txt"), "utf-8"));

            writerSing = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/sing_" + fold + ".txt"), "utf-8"));

            writerLayo = new BufferedWriter(
                    new OutputStreamWriter(new FileOutputStream(outPath + "/layo_" + fold + ".txt"), "utf-8"));
            for (int l = 0; l < m_numLabels; l++) {
                writerLayo.write(trainData.getDataSet().attribute(m_labelIndices[l]).numValues() + "\t");
            }
            writerLayo.write("\n");
            writerLayo.write(m_numSelected + "\t" + m_numSingleton);
            writerLayo.close();

            // Get distributions for instance for each variable pairs:
            double[] distributions;

            for (int i = 0; i < testData.getDataSet().size(); i++) {

                for (int l : testData.getLabelIndices())
                    writerConf.write((int) testData.getDataSet().instance(i).value(l) + "\t");

                writerConf.write("\n");

                Instance inst = testData.getDataSet().get(i);

                if (discType.equals("unsupervised")) {
                    m_unsuperDiscretize.input(inst);
                    inst = m_unsuperDiscretize.output();
                }

                for (int p = 0; p < m_numPairs; p++) {
                    if (!selectedPair[p]) {
                        continue;
                    }

                    Instance processed = converters[p].transformInstance(inst, testData.getLabelIndices());

                    if (discType.equals("supervised")) {
                        m_discretizers[p].input(processed);
                        processed = m_discretizers[p].output();

                        //                  m_removers[p].input(processed);
                        //                  processed = m_removers[p].output();
                    }

                    if (!fss.equals("no")) {
                        m_selecters[p].input(processed);
                        processed = m_selecters[p].output();
                    }

                    distributions = models.get(p).distributionForInstance(processed);

                    writerDist.write(mapLabels.get(labelCombinations[p][0]) + "\t"
                            + mapLabels.get(labelCombinations[p][1]) + "\t");

                    for (int d = 0; d < distributions.length; d++)
                        writerDist.write(distributions[d] + "\t");

                    writerDist.write("\n");
                }

                // Get predictions for singleton labels:
                for (int m = 0; m < m_labelIndices.length; m++) {
                    if (singleton[m]) {
                        Instance processed = RemoveAllLabels.transformInstance(inst,
                                complement(m_labelIndices, new int[] { m_labelIndices[m] }));

                        if (discType.equals("supervised")) {
                            singletonFilter.get(m).input(processed);
                            processed = singletonFilter.get(m).output();
                        }

                        if (!fss.equals("no")) {
                            singletonFilterSel.get(m).input(processed);
                            processed = singletonFilterSel.get(m).output();
                        }

                        double[] distribution = singletonModels.get(m).distributionForInstance(processed);

                        double maxValue = 0;
                        int conf = -1;

                        for (int v = 0; v < distribution.length; v++) {
                            if (distribution[v] > maxValue) {
                                maxValue = distribution[v];
                                conf = v;
                            }
                        }
                        writerSing.write(i + "\t" + m + "\t" + conf + "\n");
                    }
                }
            }

            writerConf.close();
            writerDist.close();
            writerSing.close();

            double test = System.nanoTime() - start;

            //         train /= 1000000000.0;
            //         test /=  1000000000.0;
            //         System.out.println(java.lang.String.format("FMC-%s\t%s\t%s\t%d\t%s\t%s\t%.4f\t%.4f",prune,baseClassifierClass,dbName,fold,discType,fss,train,test));
        } catch (IOException ex) {
            // report
        } finally {
            try {
                writerConf.close();
            } catch (Exception ex) {
            }
            try {
                writerDist.close();
            } catch (Exception ex) {
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:es.ubu.XRayDetector.datos.GestorArff.java

License:Open Source License

/**
 * Reads an ARFF file./*from ww w. j a  va 2 s  . c o  m*/
 * 
 * @param url The PATH of the ARFF file.
 * @return The instances include in the ARFF file.
 */
public Instances leerArff(String url) {
    BufferedReader reader = null;
    try {
        reader = new BufferedReader(new FileReader(url));
    } catch (FileNotFoundException e) {
        throw new RuntimeException(e);
    }
    ArffReader arff = null;
    try {
        arff = new ArffReader(reader);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    Instances data = arff.getData();
    data.setClassIndex(data.numAttributes() - 1);

    return data;
}

From source file:es.ubu.XRayDetector.modelo.Fachada.java

License:Open Source License

/**
 * Creates a model training a classifier using bagging.
 * /*from  w  w  w . j a v  a  2 s .c  om*/
 * @param data Contains all the instances of the ARFF file
 * @param sizeWindow The size of the window
 */
public void createModel(Instances data, String sizeWindow) {

    // se crea, opciones, setiputformat
    Classifier cls = null;
    //String separator = System.getProperty("file.separator");
    String path = prop.getPathModel();

    int opcionClasificacion = prop.getTipoClasificacion();

    switch (opcionClasificacion) {
    case 0:
        //CLASIFICADOR CLASES NOMINALES (TRUE,FALSE)
        Classifier base;
        base = new REPTree();
        cls = new Bagging();
        ((Bagging) cls).setNumIterations(25);
        ((Bagging) cls).setBagSizePercent(100);
        ((Bagging) cls).setNumExecutionSlots(Runtime.getRuntime().availableProcessors());
        ((Bagging) cls).setClassifier(base);
        break;
    case 1:
        //REGRESIN LINEAL (CLASES NUMRICAS, 1,0)
        cls = new REPTree();
        break;
    }

    ObjectOutputStream oos = null;

    try {
        data.setClassIndex(data.numAttributes() - 1);
        cls.buildClassifier(data);

        /*if (arffName.contains("mejores"))
           oos = new ObjectOutputStream(new FileOutputStream((path
          + separator + "Modelos" + separator + "Bagging_"
          + "mejores_" + sizeWindow + ".model")));
                
        if (arffName.contains("todas"))*/
        oos = new ObjectOutputStream(new FileOutputStream((path + "todas_" + sizeWindow + ".model")));

        oos.writeObject(cls);
        oos.flush();
        oos.close();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:es.ubu.XRayDetector.modelo.ventana.VentanaAbstracta.java

License:Open Source License

/**
 * This method gets the headers of the features.
 * @param features  a List of features/*from  ww w  .  jav  a  2s.  c om*/
 * @return header with features headers
 */
public Instances getHeader(List<String> features) {
    int capacity = 100000;

    List<String> featuresCopy = null;
    ArrayList<Attribute> atts = new ArrayList<Attribute>();
    ArrayList<String> defect = new ArrayList<String>();

    defect.add("true");
    defect.add("false");

    if (features != null) {
        featuresCopy = new ArrayList<String>(features);

        for (int i = 0; i < featuresCopy.size(); i++) {
            String rest = featuresCopy.get(i).substring(1);
            char first = featuresCopy.get(i).charAt(0);
            first = Character.toLowerCase(first);
            featuresCopy.set(i, (first + rest).replaceAll(" ", ""));
        }
    }

    for (int j = 0; j < ftStandard.getHead().length; j++) {
        if (features == null || featuresCopy.contains(ftStandard.getHead()[j]))
            atts.add(new Attribute(ftStandard.getHead()[j]));
    }

    for (int j = 0; j < ftStandardSaliency.getHead().length; j++) {
        if (features == null || featuresCopy.contains(ftStandard.getHead()[j] + "(S)"))
            atts.add(new Attribute(ftStandardSaliency.getHead()[j] + "(S)"));
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralick.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i]))
                atts.add(new Attribute(ftHaralick.getHead()[i] + "_mean" + j));
        }
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralick.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i]))
                atts.add(new Attribute(ftHaralick.getHead()[i] + "_range" + j));
        }
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)"))
                atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_mean" + j + "(S)"));
        }
    }

    for (int j = 1; j < 6; j++) {
        for (int i = 0; i < ftHaralickSaliency.getHead().length; i++) {
            if (features == null || featuresCopy.contains(ftHaralick.getHead()[i] + "(S)"))
                atts.add(new Attribute(ftHaralickSaliency.getHead()[i] + "_range" + j + "(S)"));
        }
    }

    for (int j = 1; j < 60; j++) {
        if (features == null || featuresCopy.contains(ftLbp.getHead() + "_" + j))
            atts.add(new Attribute(ftLbp.getHead() + "(" + j + ")"));
    }

    for (int j = 1; j < 60; j++) {
        if (features == null || featuresCopy.contains(ftLbpSaliency.getHead() + "_" + j + "(S)"))
            atts.add(new Attribute(ftLbpSaliency.getHead() + "(" + j + ")(S)"));
    }

    atts.add(new Attribute("Defecto", defect));

    // Capacidad es el nmero de instancias.
    Instances header = new Instances("NuevaInstancia", atts, capacity);
    // Establecer la clase
    header.setClassIndex(header.numAttributes() - 1);

    return header;
}

From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java

License:Open Source License

/**
 * @param csvFilePath// w ww . j a  va2 s .c o  m
 * @return
 * @throws Exception
 */
private Instances getDataFromCSV(String csvFilePath) throws Exception {
    DataSource source = new DataSource(csvFilePath);
    Instances data = source.getDataSet();
    data.setClassIndex(data.numAttributes() - 1);
    return data;
}

From source file:es.upm.dit.gsi.barmas.launcher.WekaClassifiersValidator.java

License:Open Source License

/**
 * @param csvFilePath/*from  w  w  w.  ja v a2s  . c  o m*/
 * @return
 * @throws Exception
 */
public static Instances getDataFromCSV(String csvFilePath) throws Exception {
    DataSource source = new DataSource(csvFilePath);
    Instances data = source.getDataSet();
    data.setClassIndex(data.numAttributes() - 1);
    return data;
}

From source file:etc.aloe.data.SegmentSet.java

License:Open Source License

/**
 * Convert the segment set into an ExampleSet (ready for feature
 * extraction). The returned example set includes an id attribute, the
 * message text, a label attribute, and several basic features extracted
 * from the segment./*w  w  w .  java  2s  . c  o  m*/
 *
 * @return
 */
public ExampleSet getBasicExamples() {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    attributes.add(new Attribute(ExampleSet.ID_ATTR_NAME));
    attributes.add(new Attribute(ExampleSet.MESSAGE_ATTR_NAME, (List<String>) null));
    attributes.add(new Attribute(ExampleSet.LABEL_ATTR_NAME, Arrays.asList(new String[] { "false", "true" })));
    attributes.add(new Attribute(ExampleSet.PARTICIPANT_ATTR_NAME, (List<String>) null));
    attributes.add(new Attribute(DURATION_ATTR_NAME));
    attributes.add(new Attribute(LENGTH_ATTR_NAME));
    attributes.add(new Attribute(CPS_ATTR_NAME));
    attributes.add(new Attribute(RATE_ATTR_NAME));

    Instances instances = new Instances("BasicExamples", attributes, 0);
    instances.setClassIndex(2);

    Attribute idAttr = instances.attribute(ExampleSet.ID_ATTR_NAME);
    Attribute messageAttr = instances.attribute(ExampleSet.MESSAGE_ATTR_NAME);
    Attribute labelAttr = instances.attribute(ExampleSet.LABEL_ATTR_NAME);
    Attribute participantAttr = instances.attribute(ExampleSet.PARTICIPANT_ATTR_NAME);
    Attribute durationAttr = instances.attribute(DURATION_ATTR_NAME);
    Attribute lengthAttr = instances.attribute(LENGTH_ATTR_NAME);
    Attribute cpsAttr = instances.attribute(CPS_ATTR_NAME);
    Attribute rateAttr = instances.attribute(RATE_ATTR_NAME);

    for (int i = 0; i < size(); i++) {
        Segment segment = get(i);
        Instance instance = new DenseInstance(instances.numAttributes());

        String messageStr = segment.concatMessages();
        String participantStr = segment.concatParticipants();

        instance.setValue(idAttr, segment.getId());
        instance.setValue(messageAttr, messageStr);
        instance.setValue(participantAttr, participantStr);

        if (segment.hasTrueLabel()) {
            instance.setValue(labelAttr, segment.getTrueLabel() ? "true" : "false");
        }

        computeRateValues(segment, instance, messageStr, durationAttr, lengthAttr, cpsAttr, rateAttr);

        instances.add(instance);
    }

    return new ExampleSet(instances);
}

From source file:etc.aloe.filters.StringToDictionaryVector.java

License:Open Source License

public static void main(String[] args) {

    //Create a test dataset
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(new Attribute("message", (ArrayList<String>) null));
    attributes.add(new Attribute("id"));
    {/*from w  ww.j av  a  2s .  c  o  m*/
        ArrayList<String> classValues = new ArrayList<String>();
        classValues.add("0");
        classValues.add("1");
        attributes.add(new Attribute("class", classValues));
    }

    Instances instances = new Instances("test", attributes, 0);
    instances.setClassIndex(2);

    String[] messages = new String[] { "No emoticons here", "I have a smiley :)",
            "Two smileys and a frownie :) :) :(", "Several emoticons :( :-( :) :-) ;-) 8-) :-/ :-P" };

    for (int i = 0; i < messages.length; i++) {
        Instance instance = new DenseInstance(instances.numAttributes());
        instance.setValue(instances.attribute(0), messages[i]);
        instance.setValue(instances.attribute(1), i);
        instance.setValue(instances.attribute(2), Integer.toString(i % 2));
        instances.add(instance);
    }

    System.out.println("Before filter:");
    for (int i = 0; i < instances.size(); i++) {
        System.out.println(instances.instance(i).toString());
    }

    try {
        String dictionaryName = "emoticons.txt";
        StringToDictionaryVector filter = new StringToDictionaryVector();
        List<String> termList = StringToDictionaryVector.readDictionaryFile(new File(dictionaryName));
        filter.setTermList(termList);
        filter.setMinTermFreq(1);
        filter.setTFTransform(true);
        filter.setIDFTransform(true);
        filter.setNormalizeDocLength(new SelectedTag(FILTER_NORMALIZE_TEST_ONLY, TAGS_FILTER));
        filter.setOutputWordCounts(true);
        filter.setStringAttribute("message");

        filter.setInputFormat(instances);
        Instances trans1 = Filter.useFilter(instances, filter);
        Instances trans2 = Filter.useFilter(instances, filter);

        System.out.println("\nFirst application:");
        System.out.println(trans1.toString());

        System.out.println("\nSecond application:");
        System.out.println(trans2.toString());

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:eu.linda.analytics.formats.ArffInputFormat.java

@Override
public AbstractList importData4weka(String pathToFile, boolean isForRDFOutput, Analytics analytics) {

    helpfulFuncions.nicePrintMessage("import Arff file " + pathToFile);

    Instances data = null;
    //Instances newData = null;
    try {//from   w  ww  .j  a  va2s . c om

        data = ConverterUtils.DataSource.read(pathToFile);

        //NominalToString filter1 = new NominalToString();
        //filter1.setInputFormat(data);
        //data = Filter.useFilter(data, filter1);

        /*/first 2 colums are metadata info used for rdf output
        if (excludeMetadataInfo) {
        String[] options = new String[2];
        options[0] = "-R";                                    // "range"
        options[1] = "1,2";                                     // first attribute
        Remove remove = new Remove();                         // new instance of filter
        remove.setOptions(options);                           // set options
        remove.setInputFormat(data);                          // inform filter about dataset **AFTER** setting options
        newData = Filter.useFilter(data, remove);   // apply filter                
        newData.setClassIndex(newData.numAttributes() - 1);
        return newData;
        }*/
        data.setClassIndex(data.numAttributes() - 1);

    } catch (Exception ex) {
        Logger.getLogger(ArffInputFormat.class.getName()).log(Level.SEVERE, null, ex);
    }
    return data;

}