Example usage for weka.filters.unsupervised.attribute StringToNominal StringToNominal

List of usage examples for weka.filters.unsupervised.attribute StringToNominal StringToNominal

Introduction

In this page you can find the example usage for weka.filters.unsupervised.attribute StringToNominal StringToNominal.

Prototype

StringToNominal

Source Link

Usage

From source file:clases.Preproceso.java

public static Instances filterStringToNominal(Instances data, String i) {
    try {//from w  w  w .ja  v a 2  s .com
        StringToNominal sn = new StringToNominal();
        sn.setAttributeRange(i);
        sn.setInputFormat(data);
        return Filter.useFilter(data, sn);
    } catch (Exception ex) {
        Logger.getLogger(Preproceso.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:com.relationalcloud.main.ExplanationSingleAttribute.java

License:Open Source License

/**
 * @param args// w  w w.  ja v a  2  s .com
 */
@Deprecated
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // loading properties from file
    String schemaname = ini.getProperty("schemaname");

    String partitioningMethod = ini.getProperty("partitioningMethod");
    String pcol;
    if (partitioningMethod.equals("repGraph")) {
        System.out.println("Replication Graph: using replicated column");
        pcol = ini.getProperty("replicatedPartitionCol");
    } else {
        pcol = ini.getProperty("graphPartitionCol");
    }

    String accessLogTable = ini.getProperty("accessLogTable");
    String numb_trans_to_process = ini.getProperty("numb_trans_to_process");
    String txnLogTable = ini.getProperty("txnLogTable");
    String driver = ini.getProperty("driver");
    String connection = ini.getProperty("conn");
    String user = ini.getProperty("user");
    String password = ini.getProperty("password");

    System.out.println("Loading and processing " + schemaname + " traces...");

    // Register jdbcDriver
    try {
        Class.forName(driver);
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    Connection conn;
    try {
        conn = DriverManager.getConnection(connection + schemaname, user, password);
        conn.setAutoCommit(true);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        Statement stmt = conn.createStatement();

        // NOTE: the paramenter numb_trans_to_process is used to limit
        // the number of transactions parsed to determine the which attributes
        // are common in the workload WHERE clauses. This can be a subset of the
        // overall set

        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process;
        ResultSet res = stmt.executeQuery(sqlstring);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");

        System.out.println("ANALISYS RESULTS:\n ");
        wa.printStatsByTableColumn();

        for (String str : wa.getAllTableNames()) {
            if (str == null)
                continue;
            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable
                        + "` g, relcloud_" + str + " s WHERE tableid = \"" + str
                        + "\" AND s.relcloud_id = g.tupleid";

                // System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    Object o1 = res.getObject(1);
                    Object o2 = res.getObject(2);
                    if (o1 != null && o2 != null) {
                        a0.add(new Double(o1.hashCode()));
                        a1.add(new Double(o2.hashCode()));
                    }
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();

                                Instances data = WekaHelper.retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    long treeTstart = System.currentTimeMillis();
                                    tree.buildClassifier(newData); // build classifier
                                    long treeTend = System.currentTimeMillis();
                                    System.out.println("CLASSIFICATION CONFIDENCE:  "
                                            + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: "
                                            + (treeTend - treeTstart) + "ms \n" + tree.toString());
                                    System.out.println("TREE:" + tree.prefix());
                                }

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname + ", " + pcol
                                    + " correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java

License:Open Source License

/**
 * @param args/*from w  ww. j  a v a2s .  com*/
 */
public static void main(String[] args) {

    Properties ini = new Properties();
    try {
        ini.load(new FileInputStream(System.getProperty("prop")));
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }

    // Register jdbcDriver
    try {
        Class.forName(ini.getProperty("driver"));
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }

    // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST
    // VARIOUS PARSER FUNCTIONALITIES
    System.out.println("Loading and processing TPCC traces...");

    Connection conn;
    try {

        String schemaname = ini.getProperty("schema");
        String connection = ini.getProperty("conn");
        String user = ini.getProperty("user");
        String password = ini.getProperty("password");
        conn = DriverManager.getConnection(connection + schemaname, user, password);

        Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user,
                password);

        Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname);

        ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema);

        conn.setAutoCommit(true);

        Statement stmt = conn.createStatement();

        String txnLogTable = ini.getProperty("txnLogTable");
        String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`";
        ResultSet res = stmt.executeQuery(sqlstring);

        double tstart = System.currentTimeMillis();
        double i = 0;
        while (res.next()) {
            String sql = res.getString(1);
            // PARSE THE STATEMENT
            wa.processSql(sql);
            // System.out.println("SQL: " +sql);
            i++;
        }

        double tend = System.currentTimeMillis();

        String accessLogTable = ini.getProperty("accessLogTable");

        System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:"
                + (tend - tstart) / i + "ms per statement");
        for (String str : wa.getAllTableNames()) {

            System.out.println("-------------------------------------------");
            System.out.println("ANALYZING TABLE " + str);
            for (SimpleCount sc : wa.getFeatures(str)) {

                ArrayList<Double> a0 = new ArrayList<Double>();
                ArrayList<Double> a1 = new ArrayList<Double>();

                sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str
                        + " s WHERE tableid = \"" + str + "\" AND s.id = g.id";
                System.out.println(sqlstring);
                res = stmt.executeQuery(sqlstring);

                while (res.next()) {
                    a0.add(new Double(res.getObject(1).hashCode()));
                    a1.add(new Double(res.getObject(2).hashCode()));
                }

                if (a0.size() >= 1) {
                    double[] d0 = new double[a0.size()];
                    double[] d1 = new double[a1.size()];

                    boolean unary = true;

                    for (int j = 0; j < a0.size(); j++) {
                        d0[j] = a0.get(j).doubleValue();
                        d1[j] = a1.get(j).doubleValue();
                        if (j > 0 && d1[j - 1] != d1[j])
                            unary = false;
                    }

                    if (unary) {
                        System.out.println("EASY CASE: " + str
                                + " is not partitioned and is stored in partition: " + d1[0]);
                    } else {

                        double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1);

                        correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold"));

                        // if the correlation is high enough proceed to use decision
                        // trees.
                        if (Math.abs(correlation) > correlationThreshold) {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (HIGH)");

                            try {
                                // InstanceQuery query;
                                // query = new InstanceQuery();
                                // query.setUsername("bbb");
                                // query.setPassword("qwer");
                                // query.connectToDatabase();
                                // Instances data = query.retrieveInstances(sqlstring);
                                res.beforeFirst();
                                Instances data = retrieveInstanceFromResultSet(res);
                                // set the last column to be the classIndex... is this
                                // correct?
                                data.setClassIndex(data.numAttributes() - 1);

                                Instances newData;

                                if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
                                    NumericToNominal ntn = new NumericToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                } else {
                                    StringToNominal ntn = new StringToNominal();
                                    String[] options = new String[2];
                                    options[0] = "-R"; // "range"
                                    options[1] = "2"; // first attribute
                                    ntn.setOptions(options); // set options
                                    ntn.setInputFormat(data); // inform filter about dataset
                                    // **AFTER** setting options
                                    newData = Filter.useFilter(data, ntn); // apply fil

                                }

                                String[] options = new String[1];
                                options[0] = "-P";
                                J48 tree = new J48(); // new instance of tree
                                tree.setOptions(options); // set the options

                                if (!tree.getCapabilities().test(newData)) {
                                    System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:"
                                            + newData.toSummaryString());
                                    System.err.println("QUERY WAS:" + sqlstring);
                                } else {
                                    tree.buildClassifier(newData); // build classifier

                                }
                                System.out.println("CLASSIFICATION CONFIDENCE:  " + tree.getConfidenceFactor()
                                        + "\n " + tree.toString());

                            } catch (Exception e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }

                        } else {
                            System.out.println("Testing " + str + "." + sc.colname
                                    + ", g.partition correlation: " + correlation + " (LOW)");
                        }
                    }
                }
            }
        }

    } catch (SQLException e) {
        e.printStackTrace();
    }

}

From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java

License:Open Source License

/**
 * Invokes filter to transform last parameter into a Nominal
 * /* w  ww.  j a va 2s  . c o  m*/
 * @param data
 * @return
 * @throws Exception
 */
public static Instances makeLastNominal(Instances data) throws Exception {
    Instances newData;

    if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) {
        NumericToNominal ntn = new NumericToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    } else {
        StringToNominal ntn = new StringToNominal();
        String[] options = new String[2];
        options[0] = "-R"; // "range"
        options[1] = "last"; // first attribute
        ntn.setOptions(options); // set options
        ntn.setInputFormat(data); // inform filter about dataset
        // **AFTER** setting options
        newData = Filter.useFilter(data, ntn); // apply fil

    }

    return newData;
}

From source file:ffnn.FFNN.java

public static Instances preprocess(Instances i) {
    try {//w w  w  .j  ava 2s .c  om
        Reorder rfilter = new Reorder();
        int classIdx = i.classIndex() + 1;
        String order;
        if (classIdx != 1) {
            order = "1";
            for (int j = 2; j <= i.numAttributes(); j++) {
                if (j != classIdx) {
                    order = order + "," + j;
                }
            }
        } else {
            order = "2";
            for (int j = 3; j <= i.numAttributes(); j++) {
                order = order + "," + j;
            }
        }
        order = order + "," + classIdx;
        rfilter.setAttributeIndices(order);
        rfilter.setInputFormat(i);
        i = Filter.useFilter(i, rfilter);

        StringToNominal stnfilter = new StringToNominal();
        stnfilter.setAttributeRange("first-last");
        stnfilter.setInputFormat(i);
        i = Filter.useFilter(i, stnfilter);

        NominalToBinary ntbfilter = new NominalToBinary();
        ntbfilter.setInputFormat(i);
        i = Filter.useFilter(i, ntbfilter);

        Normalize nfilter = new Normalize();
        nfilter.setInputFormat(i);
        i = Filter.useFilter(i, nfilter);
    } catch (Exception e) {
        System.out.println(e.toString());
    }
    return i;
}

From source file:newsclassifier.NewsClassifier.java

public void StrtoNom() throws Exception {
    StringToNominal filter = new StringToNominal();
    //NumericToNominal filter = new NumericToNominal();
    filter.setInputFormat(data);/*from  w ww  . ja  v  a  2s .  c o m*/
    //filter.setOptions("-R 1");
    String[] opts = { "-R", "first" };
    filter.setOptions(opts);
    //filter.setAttributeRange("first");
    data = Filter.useFilter(data, filter);
}

From source file:ocr.ARFFSymbolFilter.java

License:Apache License

public static void writeWeka(final String filenameout, final ArrayList<?> symbolData) {
    final int nsold = ARFFSymbolFilter.ns;
    ARFFSymbolFilter.tangent = (ARFFSymbolFilter.times > 1);
    try {/*from  w w  w  . j a  v  a 2 s  .  c  o  m*/
        if (!ARFFSymbolFilter.strokenumber) {
            ARFFSymbolFilter.ns = 1;
        }
        final DataOutputStream[] fileout = new DataOutputStream[ARFFSymbolFilter.ns];
        final Instances[] instances = new Instances[ARFFSymbolFilter.ns];
        System.out.println("Writing file");
        for (int i = 0; i < ARFFSymbolFilter.ns; ++i) {
            final int k = ARFFSymbolFilter.strokenumber ? i : (nsold - 1);
            fileout[ARFFSymbolFilter.strokenumber ? i : 0] = new DataOutputStream(new FileOutputStream(
                    filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#"));
        }
        final int tot = symbolData.size();
        for (int j = 0; j < symbolData.size(); ++j) {
            final ArrayList<?> group = (ArrayList<?>) symbolData.get(j);
            for (int i = 0; i < group.size(); ++i) {
                final Symbol sym = (Symbol) group.get(i);
                final int k = ARFFSymbolFilter.strokenumber ? (sym.size() - 1) : 0;
                if (sym.name.equals("no_name") || sym.name.equals("empty_symbol")) {
                    System.out.print("#" + sym.name + "#");
                } else {
                    for (int t = 0; t < ARFFSymbolFilter.times; ++t) {
                        final String line = constructStringInstance(sym, ARFFSymbolFilter.alpha);
                        if (line == null) {
                            System.out.print("line=null!");
                        } else {
                            if (instances[k] == null) {
                                final StringTokenizer st = new StringTokenizer(line, " ");
                                final int nt = st.countTokens() / 2;
                                final FastVector att = new FastVector();
                                for (int kk = 0; kk < nt; ++kk) {
                                    final String token = st.nextToken();
                                    att.addElement(new Attribute(new String(token)));
                                    st.nextToken();
                                }
                                att.addElement(new Attribute("class", (FastVector) null));
                                (instances[k] = new Instances("Symbols of Size " + (k + 1), att, 1))
                                        .setClassIndex(att.size() - 1);
                            }
                            final StringTokenizer st = new StringTokenizer(line, " ");
                            final int nt = st.countTokens() / 2;
                            final Instance inst = new Instance(nt + 1);
                            for (int kk = 0; kk < nt; ++kk) {
                                st.nextToken();
                                final String token = new String(st.nextToken());
                                inst.setValue(kk, Double.parseDouble(token));
                            }
                            inst.setDataset(instances[k]);
                            inst.setClassValue(oldReplace(sym.name, "\\", ""));
                            instances[k].add(inst);
                        }
                    }
                }
            }
            if ((int) (100.0 * j) / tot % 10 == 0) {
                System.out.print((int) (100.0 * j) / tot + "%-");
            }
        }
        for (int k = 0; k < ARFFSymbolFilter.ns; ++k) {
            if (fileout[ARFFSymbolFilter.strokenumber ? k : 0] == null) {
                System.out.println("fo" + fileout[ARFFSymbolFilter.strokenumber ? k : 0]);
            }
            if (instances[ARFFSymbolFilter.strokenumber ? k : 0] == null) {
                System.out.println("in:" + instances[ARFFSymbolFilter.strokenumber ? k : 0]);
            }
            fileout[ARFFSymbolFilter.strokenumber ? k : 0]
                    .writeBytes(instances[ARFFSymbolFilter.strokenumber ? k : 0].toString());
            fileout[ARFFSymbolFilter.strokenumber ? k : 0].close();
        }
        final StringToNominal filter = new StringToNominal();
        final String[] args = new String[4];
        for (int k = 0; k < ARFFSymbolFilter.ns; ++k) {
            args[0] = "-i";
            args[1] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#";
            args[2] = "-o";
            args[3] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff";
            Filter.filterFile(filter, args);
            new File(args[1]).delete();
        }
        System.out.println("100.0%");
    } catch (FileNotFoundException fnfe) {
        fnfe.printStackTrace();
    } catch (Exception ioe) {
        ioe.printStackTrace();
    }
}

From source file:org.openml.webapplication.features.FantailConnector.java

License:Open Source License

private List<Quality> datasetCharacteristics(Instances fulldata, Integer start, Integer interval_size,
        List<String> qualitiesAvailable) throws Exception {
    List<Quality> result = new ArrayList<DataQuality.Quality>();
    Instances intervalData;//from  w  w w . j a  v a  2  s  .  c  o  m

    // Be careful changing this!
    if (interval_size != null) {
        intervalData = new Instances(fulldata, start, Math.min(interval_size, fulldata.numInstances() - start));
        intervalData = applyFilter(intervalData, new StringToNominal(), "-R first-last");
        intervalData.setClassIndex(fulldata.classIndex());
    } else {
        intervalData = fulldata;
        // todo: use StringToNominal filter? might be to expensive
    }

    for (Characterizer dc : batchCharacterizers) {
        if (qualitiesAvailable != null && qualitiesAvailable.containsAll(Arrays.asList(dc.getIDs())) == false) {
            Conversion.log("OK", "Extract Batch Features",
                    dc.getClass().getName() + ": " + Arrays.toString(dc.getIDs()));
            Map<String, Double> qualities = dc.characterize(intervalData);
            result.addAll(hashMaptoList(qualities, start, interval_size));
        } else {
            Conversion.log("OK", "Extract Batch Features", dc.getClass().getName() + " - already in database");
        }
    }
    return result;
}