Example usage for weka.filters.unsupervised.attribute Remove setAttributeIndices

List of usage examples for weka.filters.unsupervised.attribute Remove setAttributeIndices

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute Remove setAttributeIndices.

Prototype

public void setAttributeIndices(String rangeList) 

Source Link

Document

Set which attributes are to be deleted (or kept if invert is true)

Usage

From source file:ClusteringClass.java

public static void main(String[] args) throws Exception {
    String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv";

    try {/*from  w  w w  . j  a  va 2  s . c o  m*/
        FileWriter fw = new FileWriter(filename);
        Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance();
        Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani",
                "dani");

        String query = "SELECT * FROM SONG_RATING2";
        Statement stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(query);

        for (int i = 1; i < 23; i++) {
            if (i != 2) {
                ResultSetMetaData rsmd = rs.getMetaData();
                String name = rsmd.getColumnName(i);
                fw.append(name);
                if (i != 22) {
                    fw.append(',');
                } else {
                    fw.append('\n');
                }
            }
        }

        String query1 = "SELECT * FROM SONG_DATA";
        Statement stmt1 = conn.createStatement();
        ResultSet rs1 = stmt1.executeQuery(query1);

        String[] titles = new String[150];

        for (int ii = 0; ii < 150; ii++) {
            rs1.next();
            titles[ii] = rs1.getString("TITLE");
        }

        while (rs.next()) {
            for (int i = 1; i < 23; i++) {
                if (i == 22)
                    fw.append('\n');
                else if (i != 2) {
                    fw.append(',');
                }
            }
        }

        fw.flush();
        fw.close();
        conn.close();
        System.out.println("CSV File is created successfully.");

        /*
         Clustering part
         */
        DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv");
        Instances train = source.getDataSet();

        /*
         Applichiamo il filtro Remove fornito da Weka per non considerare un
         attributo nell'algoritmo di Clustering.
         */
        Remove filter = new Remove();
        filter.setAttributeIndices("1");
        filter.setInputFormat(train);
        Instances train2 = Filter.useFilter(train, filter);
        System.out.println("Nominal attributes removed from computation.");

        /*
         Applichiamo il filtro Normalize fornito da Weka per normalizzare il 
         nostro dataset.
         */
        Normalize norm = new Normalize();
        norm.setInputFormat(train2);
        Instances train3 = Filter.useFilter(train2, norm);
        System.out.println("Dataset normalized.");

        /*
         First Clustering Algorithm
         */
        EuclideanDistance df = new EuclideanDistance();
        SimpleKMeans clus1 = new SimpleKMeans();
        int k = 10;
        clus1.setNumClusters(k);
        clus1.setDistanceFunction(df);
        clus1.setPreserveInstancesOrder(true);
        clus1.buildClusterer(train3);

        /*
         First Evaluation
         */
        ClusterEvaluation eval1 = new ClusterEvaluation();
        eval1.setClusterer(clus1);
        eval1.evaluateClusterer(train3);
        System.out.println(eval1.clusterResultsToString());

        int[] assignments = clus1.getAssignments();
        String[][] dati = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati[kk][0] = String.valueOf(kk);
            dati[kk][1] = train2.instance(kk).toString();
            dati[kk][2] = String.valueOf(assignments[kk]);
            dati[kk][3] = titles[kk];
        }

        for (int w = 0; w < 10; w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (dati[i][2].equals(String.valueOf(w))) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati[i][j] + "-> \t");
                        } else {
                            System.out.println(dati[i][j]);
                        }
                    }
                }
            }
        }

        /*first graph  
                
         PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1);
         //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
         String name = "";
         String cname = clus1.getClass().getName();
         if (cname.startsWith("weka.clusterers."))
         name += cname.substring("weka.clusterers.".length());
         else
         name += cname;
                
                
         VisualizePanel vp = new VisualizePanel();
         vp.setName(name + " (" + train.relationName() + ")");
         predData.setPlotName(name + " (" + train.relationName() + ")");
         vp.addPlot(predData);
                
         String plotName = vp.getName();
         final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName);
         jf.setSize(500,400);
         jf.getContentPane().setLayout(new BorderLayout());
         jf.getContentPane().add(vp, BorderLayout.CENTER);
         jf.dispose();
         jf.addWindowListener(new java.awt.event.WindowAdapter() {
         public void windowClosing(java.awt.event.WindowEvent e) {
         jf.dispose();
         }
         });
         jf.setVisible(true);
                
         end first graph
         */

        /*
         Second Clustering Algorithm
         */

        System.out.println();

        DBSCAN clus3 = new DBSCAN();
        clus3.setEpsilon(0.7);
        clus3.setMinPoints(2);
        clus3.buildClusterer(train3);

        /*
         Second Evaluation
         */
        ClusterEvaluation eval3 = new ClusterEvaluation();
        eval3.setClusterer(clus3);
        eval3.evaluateClusterer(train3);
        System.out.println(eval3.clusterResultsToString());

        double[] assignments3 = eval3.getClusterAssignments();
        String[][] dati3 = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati3[kk][0] = String.valueOf(kk);
            dati3[kk][1] = train2.instance(kk).toString();
            dati3[kk][2] = String.valueOf(assignments3[kk]);
            dati3[kk][3] = titles[kk];
        }

        for (int w = 0; w < eval3.getNumClusters(); w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (Double.parseDouble(dati3[i][2]) == w) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati3[i][j] + "-> \t");
                        } else {
                            System.out.println(dati3[i][j]);
                        }
                    }
                }
            }
        }
        System.out.println();
        for (int i = 0; i < 150; i++) {
            if (Double.parseDouble(dati3[i][2]) == -1.0) {
                for (int j = 0; j < 4; j++) {
                    if (j != 3) {
                        System.out.print(dati3[i][j] + "-> \t");
                    } else {
                        System.out.println(dati3[i][j]);
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Excludes attributes from the data./*from w w w.ja v  a 2 s.  co m*/
 *
 * @param inst   the data to process
 * @return      the processed data
 */
protected Instances excludeAttributes(Instances inst) {
    Instances result;
    StringBuilder atts;
    int i;
    Remove filter;

    // determine attribute indices
    atts = new StringBuilder();
    for (i = 0; i < inst.numAttributes(); i++) {
        if (inst.attribute(i).name().matches(m_ExcludedAttributes)) {
            if (atts.length() > 0)
                atts.append(",");
            atts.append((i + 1));
        }
    }

    // filter data
    try {
        filter = new Remove();
        filter.setAttributeIndices(atts.toString());
        filter.setInvertSelection(m_InvertMatchingSense);
        filter.setInputFormat(inst);
        result = weka.filters.Filter.useFilter(inst, filter);
    } catch (Exception e) {
        result = inst;
        handleException("Error filtering data:", e);
    }

    return result;
}

From source file:adams.flow.transformer.WekaMultiLabelSplitter.java

License:Open Source License

/**
 * Returns the generated token./*from   ww  w .j a  v  a  2s.c  o  m*/
 *
 * @return      the generated token
 */
@Override
public Token output() {
    Token result;
    int index;
    Remove remove;
    Reorder reorder;
    StringBuilder indices;
    int i;
    int newIndex;
    Instances processed;

    result = null;

    index = m_AttributesToProcess.remove(0);
    remove = new Remove();
    indices = new StringBuilder();
    for (i = 0; i < m_ClassAttributes.size(); i++) {
        if (m_ClassAttributes.get(i) == index)
            continue;
        if (indices.length() > 0)
            indices.append(",");
        indices.append("" + (m_ClassAttributes.get(i) + 1));
    }
    remove.setAttributeIndices(indices.toString());

    try {
        remove.setInputFormat(m_Dataset);
        processed = weka.filters.Filter.useFilter(m_Dataset, remove);
        if (m_UpdateRelationName)
            processed.setRelationName(m_Dataset.attribute(index).name());
        result = new Token(processed);
    } catch (Exception e) {
        processed = null;
        handleException(
                "Failed to process dataset with following filter setup:\n" + OptionUtils.getCommandLine(remove),
                e);
    }

    if (m_MakeClassLast && (processed != null)) {
        newIndex = processed.attribute(m_Dataset.attribute(index).name()).index();
        indices = new StringBuilder();
        for (i = 0; i < processed.numAttributes(); i++) {
            if (i == newIndex)
                continue;
            if (indices.length() > 0)
                indices.append(",");
            indices.append("" + (i + 1));
        }
        if (indices.length() > 0)
            indices.append(",");
        indices.append("" + (newIndex + 1));
        reorder = new Reorder();
        try {
            reorder.setAttributeIndices(indices.toString());
            reorder.setInputFormat(processed);
            processed = weka.filters.Filter.useFilter(processed, reorder);
            if (m_UpdateRelationName)
                processed.setRelationName(m_Dataset.attribute(index).name());
            result = new Token(processed);
        } catch (Exception e) {
            handleException("Failed to process dataset with following filter setup:\n"
                    + OptionUtils.getCommandLine(reorder), e);
        }
    }

    return result;
}

From source file:adams.opt.genetic.DarkLord.java

License:Open Source License

/**
 * Generates a Properties file that stores information on the setup of
 * the genetic algorithm. E.g., it backs up the original relation name.
 * The generated properties file will be used as new relation name for
 * the data.// ww w  .  j a v  a 2 s .  com
 *
 * @param data   the data to create the setup for
 * @param job      the associated job
 * @see      #PROPS_RELATION
 * @return      the generated setup
 */
@Override
protected Properties storeSetup(Instances data, GeneticAlgorithmJob job) {
    Properties result;
    DarkLordJob jobDL;
    Remove remove;

    result = super.storeSetup(data, job);
    jobDL = (DarkLordJob) job;

    // mask string
    result.setProperty(PROPS_MASK, jobDL.getMaskAsString());

    // remove filter setup
    remove = new Remove();
    remove.setAttributeIndices(jobDL.getRemoveAsString());
    remove.setInvertSelection(true);
    result.setProperty(PROPS_FILTER, OptionUtils.getCommandLine(remove));

    return result;
}

From source file:adams.opt.optimise.genetic.fitnessfunctions.AttributeSelection.java

License:Open Source License

/**
 * Callback for best measure so far//from   ww w.  j ava 2s .c  om
 */
@Override
public void newBest(double val, OptData opd) {
    int cnt = 0;
    int[] weights = getWeights(opd);
    Instances newInstances = new Instances(getInstances());
    for (int i = 0; i < getInstances().numInstances(); i++) {
        Instance in = newInstances.instance(i);
        cnt = 0;
        for (int a = 0; a < getInstances().numAttributes(); a++) {
            if (a == getInstances().classIndex())
                continue;
            if (weights[cnt++] == 0) {
                in.setValue(a, 0);
            } else {
                in.setValue(a, in.value(a));
            }
        }
    }
    try {
        File file = new File(getOutputDirectory().getAbsolutePath() + File.separator
                + Double.toString(getMeasure().adjust(val)) + ".arff");
        file.createNewFile();
        Writer writer = new BufferedWriter(new FileWriter(file));
        Instances header = new Instances(newInstances, 0);

        // remove filter setup
        Remove remove = new Remove();
        remove.setAttributeIndices(getRemoveAsString(weights));
        remove.setInvertSelection(true);

        header.setRelationName(OptionUtils.getCommandLine(remove));

        writer.write(header.toString());
        writer.write("\n");
        for (int i = 0; i < newInstances.numInstances(); i++) {
            writer.write(newInstances.instance(i).toString());
            writer.write("\n");
        }
        writer.flush();
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:adams.opt.optimise.GeneticAlgorithm.java

License:Open Source License

public static void main(String[] args) {
    Environment.setEnvironmentClass(Environment.class);
    GeneticAlgorithm ga = new GeneticAlgorithm();
    ga.setBits(1);//from   w w  w  .  j a v a  2 s  . com
    ga.setNumChrom(8);
    ga.setIterations(10000);
    ga.setFavorZeroes(true);

    AttributeSelection as = new AttributeSelection();
    //as.setDataset(new PlaceholderFile("/home/dale/blgg/conversion/merged/m_5_.75.arff"));
    ArrayConsumer.setOptions(as, args);
    PLSClassifier pls = new PLSClassifier();
    PLSFilter pf = (PLSFilter) pls.getFilter();
    pf.setNumComponents(11);

    LinearRegressionJ reg = new LinearRegressionJ();
    reg.setEliminateColinearAttributes(false);
    reg.setAttributeSelectionMethod(
            new SelectedTag(LinearRegressionJ.SELECTION_NONE, LinearRegressionJ.TAGS_SELECTION));

    GPD gp = new GPD();
    gp.setNoise(.01);
    //RBFKernel rbf = new RBFKernel();
    //rbf.setChecksTurnedOff(true);
    //rbf.setGamma(.01);
    //gp.setKernel(rbf);

    Remove remove = new Remove();
    remove.setAttributeIndices("1");
    FilteredClassifier fc = new FilteredClassifier();

    MultiFilter mf = new MultiFilter();
    Filter[] filters = new Filter[2];
    filters[0] = remove;
    filters[1] = pf;
    mf.setFilters(filters);

    fc.setClassifier(gp);
    fc.setFilter(pf);

    as.setClassifier(gp);
    as.setClassIndex("last");
    //as.setDataset(new PlaceholderFile("/home/dale/OMD_clean.arff"));
    //as.setOutputDirectory(new PlaceholderFile("/research/dale"));
    ga.setLoggingLevel(LoggingLevel.INFO);
    as.setLoggingLevel(LoggingLevel.INFO);
    ga.optimise(as.getDataDef(), as);

}

From source file:asap.CrossValidation.java

/**
 *
 * @param dataInput/*from w  w  w.ja  v  a 2  s .  c o m*/
 * @param classIndex
 * @param removeIndices
 * @param cls
 * @param seed
 * @param folds
 * @param modelOutputFile
 * @return
 * @throws Exception
 */
public static String performCrossValidation(String dataInput, String classIndex, String removeIndices,
        AbstractClassifier cls, int seed, int folds, String modelOutputFile) throws Exception {

    PerformanceCounters.startTimer("cross-validation ST");

    PerformanceCounters.startTimer("cross-validation init ST");

    // loads data and set class index
    Instances data = DataSource.read(dataInput);
    String clsIndex = classIndex;

    switch (clsIndex) {
    case "first":
        data.setClassIndex(0);
        break;
    case "last":
        data.setClassIndex(data.numAttributes() - 1);
        break;
    default:
        try {
            data.setClassIndex(Integer.parseInt(clsIndex) - 1);
        } catch (NumberFormatException e) {
            data.setClassIndex(data.attribute(clsIndex).index());
        }
        break;
    }

    Remove removeFilter = new Remove();
    removeFilter.setAttributeIndices(removeIndices);
    removeFilter.setInputFormat(data);
    data = Filter.useFilter(data, removeFilter);

    // randomize data
    Random rand = new Random(seed);
    Instances randData = new Instances(data);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Evaluation eval = new Evaluation(randData);
    Instances trainSets[] = new Instances[folds];
    Instances testSets[] = new Instances[folds];
    Classifier foldCls[] = new Classifier[folds];

    for (int n = 0; n < folds; n++) {
        trainSets[n] = randData.trainCV(folds, n);
        testSets[n] = randData.testCV(folds, n);
        foldCls[n] = AbstractClassifier.makeCopy(cls);
    }

    PerformanceCounters.stopTimer("cross-validation init ST");
    PerformanceCounters.startTimer("cross-validation folds+train ST");
    //paralelize!!:--------------------------------------------------------------
    for (int n = 0; n < folds; n++) {
        Instances train = trainSets[n];
        Instances test = testSets[n];

        // the above code is used by the StratifiedRemoveFolds filter, the
        // code below by the Explorer/Experimenter:
        // Instances train = randData.trainCV(folds, n, rand);
        // build and evaluate classifier
        Classifier clsCopy = foldCls[n];
        clsCopy.buildClassifier(train);
        eval.evaluateModel(clsCopy, test);
    }

    cls.buildClassifier(data);
    //until here!-----------------------------------------------------------------

    PerformanceCounters.stopTimer("cross-validation folds+train ST");
    PerformanceCounters.startTimer("cross-validation post ST");
    // output evaluation
    String out = "\n" + "=== Setup ===\n" + "Classifier: " + cls.getClass().getName() + " "
            + Utils.joinOptions(cls.getOptions()) + "\n" + "Dataset: " + data.relationName() + "\n" + "Folds: "
            + folds + "\n" + "Seed: " + seed + "\n" + "\n"
            + eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false) + "\n";

    if (!modelOutputFile.isEmpty()) {
        SerializationHelper.write(modelOutputFile, cls);
    }

    PerformanceCounters.stopTimer("cross-validation post ST");
    PerformanceCounters.stopTimer("cross-validation ST");

    return out;
}

From source file:asap.CrossValidation.java

/**
 *
 * @param dataInput//from  w w w.j a v  a  2 s. c o m
 * @param classIndex
 * @param removeIndices
 * @param cls
 * @param seed
 * @param folds
 * @param modelOutputFile
 * @return
 * @throws Exception
 */
public static String performCrossValidationMT(String dataInput, String classIndex, String removeIndices,
        AbstractClassifier cls, int seed, int folds, String modelOutputFile) throws Exception {

    PerformanceCounters.startTimer("cross-validation MT");

    PerformanceCounters.startTimer("cross-validation init MT");

    // loads data and set class index
    Instances data = DataSource.read(dataInput);
    String clsIndex = classIndex;

    switch (clsIndex) {
    case "first":
        data.setClassIndex(0);
        break;
    case "last":
        data.setClassIndex(data.numAttributes() - 1);
        break;
    default:
        try {
            data.setClassIndex(Integer.parseInt(clsIndex) - 1);
        } catch (NumberFormatException e) {
            data.setClassIndex(data.attribute(clsIndex).index());
        }
        break;
    }

    Remove removeFilter = new Remove();
    removeFilter.setAttributeIndices(removeIndices);
    removeFilter.setInputFormat(data);
    data = Filter.useFilter(data, removeFilter);

    // randomize data
    Random rand = new Random(seed);
    Instances randData = new Instances(data);
    randData.randomize(rand);
    if (randData.classAttribute().isNominal()) {
        randData.stratify(folds);
    }

    // perform cross-validation and add predictions
    Evaluation eval = new Evaluation(randData);
    List<Thread> foldThreads = (List<Thread>) Collections.synchronizedList(new LinkedList<Thread>());

    List<FoldSet> foldSets = (List<FoldSet>) Collections.synchronizedList(new LinkedList<FoldSet>());

    for (int n = 0; n < folds; n++) {
        foldSets.add(new FoldSet(randData.trainCV(folds, n), randData.testCV(folds, n),
                AbstractClassifier.makeCopy(cls)));

        if (n < Config.getNumThreads() - 1) {
            Thread foldThread = new Thread(new CrossValidationFoldThread(n, foldSets, eval));
            foldThreads.add(foldThread);
        }
    }

    PerformanceCounters.stopTimer("cross-validation init MT");
    PerformanceCounters.startTimer("cross-validation folds+train MT");
    //paralelize!!:--------------------------------------------------------------
    if (Config.getNumThreads() > 1) {
        for (Thread foldThread : foldThreads) {
            foldThread.start();
        }
    } else {
        //use the current thread to run the cross-validation instead of using the Thread instance created here:
        new CrossValidationFoldThread(0, foldSets, eval).run();
    }

    cls.buildClassifier(data);

    for (Thread foldThread : foldThreads) {
        foldThread.join();
    }

    //until here!-----------------------------------------------------------------
    PerformanceCounters.stopTimer("cross-validation folds+train MT");
    PerformanceCounters.startTimer("cross-validation post MT");
    // evaluation for output:
    String out = "\n" + "=== Setup ===\n" + "Classifier: " + cls.getClass().getName() + " "
            + Utils.joinOptions(cls.getOptions()) + "\n" + "Dataset: " + data.relationName() + "\n" + "Folds: "
            + folds + "\n" + "Seed: " + seed + "\n" + "\n"
            + eval.toSummaryString("=== " + folds + "-fold Cross-validation ===", false) + "\n";

    if (!modelOutputFile.isEmpty()) {
        SerializationHelper.write(modelOutputFile, cls);
    }

    PerformanceCounters.stopTimer("cross-validation post MT");
    PerformanceCounters.stopTimer("cross-validation MT");
    return out;
}

From source file:br.com.ufu.lsi.rebfnetwork.RBFModel.java

License:Open Source License

/**
 * Method used to pre-process the data, perform clustering, and
 * set the initial parameter vector./*from w  w  w.  j  a  v a 2 s. c  o m*/
 */
protected Instances initializeClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    // Make sure data is shuffled
    Random random = new Random(m_Seed);
    if (data.numInstances() > 2) {
        random = data.getRandomNumberGenerator(m_Seed);
    }
    data.randomize(random);

    double y0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    int index = 1;
    while (index < data.numInstances() && data.instance(index).classValue() == y0) {
        index++;
    }
    if (index == data.numInstances()) {
        // degenerate case, all class values are equal
        // we don't want to deal with this, too much hassle
        throw new Exception("All class values are the same. At least two class values should be different");
    }
    double y1 = data.instance(index).classValue();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(data);
    data = Filter.useFilter(data, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(data);
    data = Filter.useFilter(data, m_AttFilter);

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data after removing useless attributes!), "
                        + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return data;
    } else {
        m_ZeroR = null;
    }

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(data);
    data = Filter.useFilter(data, m_NominalToBinary);

    m_Filter = new Normalize();
    ((Normalize) m_Filter).setIgnoreClass(true);
    m_Filter.setInputFormat(data);
    data = Filter.useFilter(data, m_Filter);
    double z0 = data.instance(0).classValue(); // This stuff is not relevant in classification case
    double z1 = data.instance(index).classValue();
    m_x1 = (y0 - y1) / (z0 - z1); // no division by zero, since y0 != y1 guaranteed => z0 != z1 ???
    m_x0 = (y0 - m_x1 * z0); // = y1 - m_x1 * z1

    m_classIndex = data.classIndex();
    m_numClasses = data.numClasses();
    m_numAttributes = data.numAttributes();

    // Run k-means
    SimpleKMeans skm = new SimpleKMeans();
    skm.setMaxIterations(10000);
    skm.setNumClusters(m_numUnits);
    Remove rm = new Remove();
    data.setClassIndex(-1);
    rm.setAttributeIndices((m_classIndex + 1) + "");
    rm.setInputFormat(data);
    Instances dataRemoved = Filter.useFilter(data, rm);
    data.setClassIndex(m_classIndex);
    skm.buildClusterer(dataRemoved);
    Instances centers = skm.getClusterCentroids();

    if (centers.numInstances() < m_numUnits) {
        m_numUnits = centers.numInstances();
    }

    // Set up arrays
    OFFSET_WEIGHTS = 0;
    if (m_useAttributeWeights) {
        OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses;
        OFFSET_CENTERS = OFFSET_ATTRIBUTE_WEIGHTS + m_numAttributes;
    } else {
        OFFSET_ATTRIBUTE_WEIGHTS = -1;
        OFFSET_CENTERS = (m_numUnits + 1) * m_numClasses;
    }
    OFFSET_SCALES = OFFSET_CENTERS + m_numUnits * m_numAttributes;

    switch (m_scaleOptimizationOption) {
    case USE_GLOBAL_SCALE:
        m_RBFParameters = new double[OFFSET_SCALES + 1];
        break;
    case USE_SCALE_PER_UNIT_AND_ATTRIBUTE:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits * m_numAttributes];
        break;
    default:
        m_RBFParameters = new double[OFFSET_SCALES + m_numUnits];
        break;
    }

    // Set initial radius based on distance to nearest other basis function
    double maxMinDist = -1;
    for (int i = 0; i < centers.numInstances(); i++) {
        double minDist = Double.MAX_VALUE;
        for (int j = i + 1; j < centers.numInstances(); j++) {
            double dist = 0;
            for (int k = 0; k < centers.numAttributes(); k++) {
                if (k != centers.classIndex()) {
                    double diff = centers.instance(i).value(k) - centers.instance(j).value(k);
                    dist += diff * diff;
                }
            }
            if (dist < minDist) {
                minDist = dist;
            }
        }
        if ((minDist != Double.MAX_VALUE) && (minDist > maxMinDist)) {
            maxMinDist = minDist;
        }
    }

    // Initialize parameters
    if (m_scaleOptimizationOption == USE_GLOBAL_SCALE) {
        m_RBFParameters[OFFSET_SCALES] = Math.sqrt(maxMinDist);
    }
    for (int i = 0; i < m_numUnits; i++) {
        if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT) {
            m_RBFParameters[OFFSET_SCALES + i] = Math.sqrt(maxMinDist);
        }
        int k = 0;
        for (int j = 0; j < m_numAttributes; j++) {
            if (k == centers.classIndex()) {
                k++;
            }
            if (j != data.classIndex()) {
                if (m_scaleOptimizationOption == USE_SCALE_PER_UNIT_AND_ATTRIBUTE) {
                    m_RBFParameters[OFFSET_SCALES + (i * m_numAttributes + j)] = Math.sqrt(maxMinDist);
                }
                m_RBFParameters[OFFSET_CENTERS + (i * m_numAttributes) + j] = centers.instance(i).value(k);
                k++;
            }
        }
    }

    if (m_useAttributeWeights) {
        for (int j = 0; j < m_numAttributes; j++) {
            if (j != data.classIndex()) {
                m_RBFParameters[OFFSET_ATTRIBUTE_WEIGHTS + j] = 1.0;
            }
        }
    }

    initializeOutputLayer(random);

    return data;
}

From source file:br.fapesp.myutils.MyUtils.java

License:Open Source License

/**
 * Remove supervision from a data set// w  ww  .j  a  va 2s  . c  om
 * 
 * @param dataset
 *            an Instances data set
 * @param classIndex
 *            if -1 use the last attribute
 * @return a new copy of the data set with supervision removed
 */
public static Instances removeSupervision(Instances dataset, int classIndex) {
    Instances unsupervised = new Instances(dataset);
    String index = classIndex == -1 ? "last" : String.valueOf(classIndex);

    // Use the Remove filter to delete supervision from the data set:
    Remove rm = new Remove();
    rm.setAttributeIndices(index);

    try {
        rm.setInputFormat(unsupervised);
        unsupervised = Filter.useFilter(unsupervised, rm);
        return unsupervised;
    } catch (Exception e) {
        e.printStackTrace();
    }

    return null;
}