Example usage for weka.filters.unsupervised.attribute NumericToNominal setOptions

List of usage examples for weka.filters.unsupervised.attribute NumericToNominal setOptions

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute NumericToNominal setOptions.

Prototype

@Override
public void setOptions(String[] options) throws Exception 

Source Link

Document

Parses a given list of options.

Usage

From source file:com.relationalcloud.main.ExplanationSingleAttribute.java

License:Open Source License

/**
 * @param args/*from www  .  j a  v a  2  s .c o  m*/
 */
@Deprecated
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // loading properties from file
    String schemaname = ini.getProperty("schemaname");

    String partitioningMethod = ini.getProperty("partitioningMethod");
    String pcol;
    if (partitioningMethod.equals("repGraph")) {
        System.out.println("Replication Graph: using replicated column");
        pcol = ini.getProperty("replicatedPartitionCol");
    } else {
        pcol = ini.getProperty("graphPartitionCol");
    }

    String accessLogTable = ini.getProperty("accessLogTable");
    String numb_trans_to_process = ini.getProperty("numb_trans_to_process");
    String txnLogTable = ini.getProperty("txnLogTable");
    String driver = ini.getProperty("driver");
    String connection = ini.getProperty("conn");
    String user = ini.getProperty("user");
    String password = ini.getProperty("password");

    System.out.println("Loading and processing " + schemaname + " traces...");

    // Register jdbcDriver
    try {
        Class.forName(driver);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    Connection conn;
    try {
        conn = DriverManager.getConnection(connection + schemaname, user, password);
        conn.setAutoCommit(true);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        Statement stmt = conn.createStatement();

        // NOTE: the paramenter numb_trans_to_process is used to limit
        // the number of transactions parsed to determine the which attributes
        // are common in the workload WHERE clauses. This can be a subset of the
        // overall set

        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process;
        ResultSet res = stmt.executeQuery(sqlstring);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");

        System.out.println("ANALISYS RESULTS:\n ");
        wa.printStatsByTableColumn();

        for (String str : wa.getAllTableNames()) {
            if (str == null)
                continue;
            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable
                        + "` g, relcloud_" + str + " s WHERE tableid = \"" + str
                        + "\" AND s.relcloud_id = g.tupleid";

                // System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    Object o1 = res.getObject(1);
                    Object o2 = res.getObject(2);
                    if (o1 != null && o2 != null) {
                        a0.add(new Double(o1.hashCode()));
                        a1.add(new Double(o2.hashCode()));
                    }
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();

                                Instances data = WekaHelper.retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    long treeTstart = System.currentTimeMillis();
                                    tree.buildClassifier(newData); // build classifier
                                    long treeTend = System.currentTimeMillis();
                                    System.out.println("CLASSIFICATION CONFIDENCE:  "
                                            + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: "
                                            + (treeTend - treeTstart) + "ms \n" + tree.toString());
                                    System.out.println("TREE:" + tree.prefix());
                                }

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java

License:Open Source License

/**
 * @param args/*w  ww  .  j av  a  2  s .  c o  m*/
 */
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST
    // VARIOUS PARSER FUNCTIONALITIES
    System.out.println("Loading and processing TPCC traces...");

    Connection conn;
    try {

        String schemaname = ini.getProperty("schema");
        String connection = ini.getProperty("conn");
        String user = ini.getProperty("user");
        String password = ini.getProperty("password");
        conn = DriverManager.getConnection(connection + schemaname, user, password);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        conn.setAutoCommit(true);

        Statement stmt = conn.createStatement();

        String txnLogTable = ini.getProperty("txnLogTable");
        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`";
        ResultSet res = stmt.executeQuery(sqlstring);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            // System.out.println("SQL: " +sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        String accessLogTable = ini.getProperty("accessLogTable");

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");
        for (String str : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str
                        + " s WHERE tableid = \"" + str + "\" AND s.id = g.id";
                System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    a0.add(new Double(res.getObject(1).hashCode()));
                    a1.add(new Double(res.getObject(2).hashCode()));
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();
                                Instances data = retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    tree.buildClassifier(newData); // build classifier

                                }
                                System.out.println("CLASSIFICATION CONFIDENCE:  " + tree.getConfidenceFactor()
                                        + "\n " + tree.toString());

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Invokes filter to transform last parameter into a Nominal
 * //from w  w w  .  ja v a  2s  . c o  m
 * @param data
 * @return
 * @throws Exception
 */
public static Instances makeLastNominal(Instances data) throws Exception {
    Instances newData;

    if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
        NumericToNominal ntn = new NumericToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    } else {
        StringToNominal ntn = new StringToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    }

    return newData;
}

From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java

public String id3(Instances arff) {
    tree = new Id3();
    try {/*from   ww  w .  j  a va 2  s  .c o m*/
        NumericToNominal convert = new NumericToNominal();
        String[] options = new String[2];
        options[0] = "-R";
        options[1] = "1-4";

        convert.setOptions(options);
        convert.setInputFormat(arff);

        Instances newData = Filter.useFilter(arff, convert);

        tree.buildClassifier(newData);
    } catch (Exception ex) {
        Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex);
    }

    return tree.toString();
}

From source file:Helper.CustomFilter.java

public Instances convertNumericToNominal(Instances structure) throws Exception {
    NumericToNominal convert = new NumericToNominal();
    String[] options = new String[2];
    options[0] = "-R";
    options[1] = "1-" + structure.numAttributes();
    convert.setOptions(options);
    convert.setInputFormat(structure);//from   w  w  w  . java2s .  c o  m
    structure = Filter.useFilter(structure, convert);
    return structure;
}

From source file:motaz.CODB.java

License:Open Source License

/**
 * prepair the data:remove instances with missing value or replace missing values (if specified)
 * perform normalization for numeric attributes
 * @param instances The instances that need to be detected for class outliers
 * @throws java.lang.Exception If clustering was not successful
 *//*from w ww.j  av  a  2s  .  c o m*/
public void buildCODB(Instances instances) throws Exception {

    if (instances.checkForStringAttributes()) {
        throw new Exception("Can't handle string attributes!");
    }

    NumericToNominal convert = new NumericToNominal();
    String[] options = new String[2];
    options[0] = "-R";
    options[1] = "3"; //range of variables to make nominal

    convert.setOptions(options);
    convert.setInputFormat(instances);

    newData = Filter.useFilter(instances, convert);
    newData.setClassIndex(3);
    Instances filteredInstances = newData;
    System.out.println(newData.attribute(2).isNominal());

    database = databaseForName(getDatabase_Type(), filteredInstances);
    for (int i = 0; i < database.getInstances().numInstances(); i++) {

    }
    for (int i = 0; i < database.getInstances().numInstances(); i++) {
        DataObject dataObject = dataObjectForName(getDatabase_distanceType(),
                database.getInstances().instance(i), Integer.toString(i), database);

        database.insert(dataObject);

    }
    pk_list = new double[database.size()];
    //Get the max row 

    ResultSet rs = null;
    PreparedStatement preparedStatement = null;
    String query = "select max(pk) as ind from geo_osfpm.geo_osfpm_outlier";
    preparedStatement = PostgreSQLlocal.PostgreSQLlocal().prepareStatement(query);
    rs = preparedStatement.executeQuery();
    rs.next();
    int pk = rs.getInt("ind");
    System.out.println(database.size());
    for (int i = 0; i < database.size(); i++) {
        DataObject dataObject = dataObjectForName(getDatabase_distanceType(),
                database.getInstances().instance(i), Integer.toString(i), database);
        pk_list[i] = pk + i + 1;
        ;

    }
    setTopN((database.size() / (5)));
    database.setMinMaxValues();
    System.out.println("Inserted Values");
}

From source file:soccer.core.MyMatchLoader.java

public void test() throws IOException, Exception {
    Instances instances = loader.getDataSet();
    NumericToNominal nm = new NumericToNominal();
    nm.setOptions(new String[] { "-R", "last" });
    nm.setInputFormat(instances);/*from ww  w  .j a v a 2s  .  c  o m*/
    instances = Filter.useFilter(instances, nm);
    instances.setClassIndex(instances.numAttributes() - 1);
    System.out.println(instances.toSummaryString());
}

From source file:soccer.core.SimpleClassifier.java

public void evaluate() throws IOException, Exception {
    Instances data = loader.buildInstances();
    NumericToNominal toNominal = new NumericToNominal();
    toNominal.setOptions(new String[] { "-R", "5,6,8,9" });
    toNominal.setInputFormat(data);/*from   w w w.j  a v  a2  s .co m*/
    data = Filter.useFilter(data, toNominal);
    data.setClassIndex(6);

    //        DataSink.write(ARFF_STRING, data);

    EnsembleLibrary ensembleLib = new EnsembleLibrary();
    ensembleLib.addModel("weka.classifiers.trees.J48");
    ensembleLib.addModel("weka.classifiers.bayes.NaiveBayes");
    ensembleLib.addModel("weka.classifiers.functions.SMO");
    ensembleLib.addModel("weka.classifiers.meta.AdaBoostM1");
    ensembleLib.addModel("weka.classifiers.meta.LogitBoost");
    ensembleLib.addModel("classifiers.trees.DecisionStump");
    ensembleLib.addModel("classifiers.trees.DecisionStump");
    EnsembleLibrary.saveLibrary(new File("./ensembleLib.model.xml"), ensembleLib, null);
    EnsembleSelection model = new EnsembleSelection();
    model.setOptions(new String[] { "-L", "./ensembleLib.model.xml", // </path/to/modelLibrary>"-W", path+"esTmp", // </path/to/working/directory> - 
            "-B", "10", // <numModelBags> 
            "-E", "1.0", // <modelRatio>.
            "-V", "0.25", // <validationRatio>
            "-H", "100", // <hillClimbIterations> 
            "-I", "1.0", // <sortInitialization> 
            "-X", "2", // <numFolds>
            "-P", "roc", // <hillclimbMettric>
            "-A", "forward", // <algorithm> 
            "-R", "true", // - Flag to be selected more than once
            "-G", "true", // - stops adding models when performance degrades
            "-O", "true", // - verbose output.
            "-S", "1", // <num> - Random number seed.
            "-D", "true" // - run in debug mode 
    });
    //        double resES[] = evaluate(ensambleSel);
    //        System.out.println("Ensemble Selection\n"
    //                + "\tchurn:     " + resES[0] + "\n"
    //                + "\tappetency: " + resES[1] + "\n"
    //                + "\tup-sell:   " + resES[2] + "\n"
    //                + "\toverall:   " + resES[3] + "\n");
    //        models.add(new J48());
    //        models.add(new RandomForest());
    //        models.add(new NaiveBayes());
    //        models.add(new AdaBoostM1());
    //        models.add(new Logistic());
    //        models.add(new MultilayerPerceptron());

    int FOLDS = 5;
    Evaluation eval = new Evaluation(data);
    //
    //        for (Classifier model : models) {
    eval.crossValidateModel(model, data, FOLDS, new Random(1), new Object[] {});
    System.out.println(model.getClass().getName() + "\n" + "\tRecall:    " + eval.recall(1) + "\n"
            + "\tPrecision: " + eval.precision(1) + "\n" + "\tF-measure: " + eval.fMeasure(1));
    System.out.println(eval.toSummaryString());
    //        }
    //        LogitBoost cl = new LogitBoost();
    //        cl.setOptions(new String[] {
    //            "-Q", "-I", "100", "-Z", "4", "-O", "4", "-E", "4"
    //        });
    //        cl.buildClassifier(data);
    //        Evaluation eval = new Evaluation(data);
    //        eval.crossValidateModel(cl, data, 6, new Random(1), new Object[]{});
    //        System.out.println(eval.weightedFMeasure());
    //        System.out.println(cl.graph());
    //        System.out.println(cl.globalInfo());

}