List of usage examples for weka.filters.unsupervised.attribute NumericToNominal setOptions
@Override public void setOptions(String[] options) throws Exception
From source file:com.relationalcloud.main.ExplanationSingleAttribute.java
License:Open Source License
/** * @param args/*from www . j a v a 2 s .c o m*/ */ @Deprecated public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // loading properties from file String schemaname = ini.getProperty("schemaname"); String partitioningMethod = ini.getProperty("partitioningMethod"); String pcol; if (partitioningMethod.equals("repGraph")) { System.out.println("Replication Graph: using replicated column"); pcol = ini.getProperty("replicatedPartitionCol"); } else { pcol = ini.getProperty("graphPartitionCol"); } String accessLogTable = ini.getProperty("accessLogTable"); String numb_trans_to_process = ini.getProperty("numb_trans_to_process"); String txnLogTable = ini.getProperty("txnLogTable"); String driver = ini.getProperty("driver"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); System.out.println("Loading and processing " + schemaname + " traces..."); // Register jdbcDriver try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } Connection conn; try { conn = DriverManager.getConnection(connection + schemaname, user, password); conn.setAutoCommit(true); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); Statement stmt = conn.createStatement(); // NOTE: the paramenter numb_trans_to_process is used to limit // the number of transactions parsed to determine the which attributes // are common in the workload WHERE clauses. This can be a subset of the // overall set String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process; ResultSet res = stmt.executeQuery(sqlstring); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); i++; } double tend = System.currentTimeMillis(); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); System.out.println("ANALISYS RESULTS:\n "); wa.printStatsByTableColumn(); for (String str : wa.getAllTableNames()) { if (str == null) continue; System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable + "` g, relcloud_" + str + " s WHERE tableid = \"" + str + "\" AND s.relcloud_id = g.tupleid"; // System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { Object o1 = res.getObject(1); Object o2 = res.getObject(2); if (o1 != null && o2 != null) { a0.add(new Double(o1.hashCode())); a1.add(new Double(o2.hashCode())); } } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = WekaHelper.retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { long treeTstart = System.currentTimeMillis(); tree.buildClassifier(newData); // build classifier long treeTend = System.currentTimeMillis(); System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: " + (treeTend - treeTstart) + "ms \n" + tree.toString()); System.out.println("TREE:" + tree.prefix()); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * @param args/*w ww . j av a 2 s . c o m*/ */ public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST // VARIOUS PARSER FUNCTIONALITIES System.out.println("Loading and processing TPCC traces..."); Connection conn; try { String schemaname = ini.getProperty("schema"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String txnLogTable = ini.getProperty("txnLogTable"); String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`"; ResultSet res = stmt.executeQuery(sqlstring); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); // System.out.println("SQL: " +sql); i++; } double tend = System.currentTimeMillis(); String accessLogTable = ini.getProperty("accessLogTable"); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); for (String str : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str + " s WHERE tableid = \"" + str + "\" AND s.id = g.id"; System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { a0.add(new Double(res.getObject(1).hashCode())); a1.add(new Double(res.getObject(2).hashCode())); } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { tree.buildClassifier(newData); // build classifier } System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n " + tree.toString()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Invokes filter to transform last parameter into a Nominal * //from w w w . ja v a 2s . c o m * @param data * @return * @throws Exception */ public static Instances makeLastNominal(Instances data) throws Exception { Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } return newData; }
From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java
public String id3(Instances arff) { tree = new Id3(); try {/*from ww w . j a va 2 s .c o m*/ NumericToNominal convert = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; options[1] = "1-4"; convert.setOptions(options); convert.setInputFormat(arff); Instances newData = Filter.useFilter(arff, convert); tree.buildClassifier(newData); } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } return tree.toString(); }
From source file:Helper.CustomFilter.java
public Instances convertNumericToNominal(Instances structure) throws Exception { NumericToNominal convert = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; options[1] = "1-" + structure.numAttributes(); convert.setOptions(options); convert.setInputFormat(structure);//from w w w . java2s . c o m structure = Filter.useFilter(structure, convert); return structure; }
From source file:motaz.CODB.java
License:Open Source License
/** * prepair the data:remove instances with missing value or replace missing values (if specified) * perform normalization for numeric attributes * @param instances The instances that need to be detected for class outliers * @throws java.lang.Exception If clustering was not successful *//*from w ww.j av a 2s . c o m*/ public void buildCODB(Instances instances) throws Exception { if (instances.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } NumericToNominal convert = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; options[1] = "3"; //range of variables to make nominal convert.setOptions(options); convert.setInputFormat(instances); newData = Filter.useFilter(instances, convert); newData.setClassIndex(3); Instances filteredInstances = newData; System.out.println(newData.attribute(2).isNominal()); database = databaseForName(getDatabase_Type(), filteredInstances); for (int i = 0; i < database.getInstances().numInstances(); i++) { } for (int i = 0; i < database.getInstances().numInstances(); i++) { DataObject dataObject = dataObjectForName(getDatabase_distanceType(), database.getInstances().instance(i), Integer.toString(i), database); database.insert(dataObject); } pk_list = new double[database.size()]; //Get the max row ResultSet rs = null; PreparedStatement preparedStatement = null; String query = "select max(pk) as ind from geo_osfpm.geo_osfpm_outlier"; preparedStatement = PostgreSQLlocal.PostgreSQLlocal().prepareStatement(query); rs = preparedStatement.executeQuery(); rs.next(); int pk = rs.getInt("ind"); System.out.println(database.size()); for (int i = 0; i < database.size(); i++) { DataObject dataObject = dataObjectForName(getDatabase_distanceType(), database.getInstances().instance(i), Integer.toString(i), database); pk_list[i] = pk + i + 1; ; } setTopN((database.size() / (5))); database.setMinMaxValues(); System.out.println("Inserted Values"); }
From source file:soccer.core.MyMatchLoader.java
public void test() throws IOException, Exception { Instances instances = loader.getDataSet(); NumericToNominal nm = new NumericToNominal(); nm.setOptions(new String[] { "-R", "last" }); nm.setInputFormat(instances);/*from ww w .j a v a 2s . c o m*/ instances = Filter.useFilter(instances, nm); instances.setClassIndex(instances.numAttributes() - 1); System.out.println(instances.toSummaryString()); }
From source file:soccer.core.SimpleClassifier.java
public void evaluate() throws IOException, Exception { Instances data = loader.buildInstances(); NumericToNominal toNominal = new NumericToNominal(); toNominal.setOptions(new String[] { "-R", "5,6,8,9" }); toNominal.setInputFormat(data);/*from w w w.j a v a2 s .co m*/ data = Filter.useFilter(data, toNominal); data.setClassIndex(6); // DataSink.write(ARFF_STRING, data); EnsembleLibrary ensembleLib = new EnsembleLibrary(); ensembleLib.addModel("weka.classifiers.trees.J48"); ensembleLib.addModel("weka.classifiers.bayes.NaiveBayes"); ensembleLib.addModel("weka.classifiers.functions.SMO"); ensembleLib.addModel("weka.classifiers.meta.AdaBoostM1"); ensembleLib.addModel("weka.classifiers.meta.LogitBoost"); ensembleLib.addModel("classifiers.trees.DecisionStump"); ensembleLib.addModel("classifiers.trees.DecisionStump"); EnsembleLibrary.saveLibrary(new File("./ensembleLib.model.xml"), ensembleLib, null); EnsembleSelection model = new EnsembleSelection(); model.setOptions(new String[] { "-L", "./ensembleLib.model.xml", // </path/to/modelLibrary>"-W", path+"esTmp", // </path/to/working/directory> - "-B", "10", // <numModelBags> "-E", "1.0", // <modelRatio>. "-V", "0.25", // <validationRatio> "-H", "100", // <hillClimbIterations> "-I", "1.0", // <sortInitialization> "-X", "2", // <numFolds> "-P", "roc", // <hillclimbMettric> "-A", "forward", // <algorithm> "-R", "true", // - Flag to be selected more than once "-G", "true", // - stops adding models when performance degrades "-O", "true", // - verbose output. "-S", "1", // <num> - Random number seed. "-D", "true" // - run in debug mode }); // double resES[] = evaluate(ensambleSel); // System.out.println("Ensemble Selection\n" // + "\tchurn: " + resES[0] + "\n" // + "\tappetency: " + resES[1] + "\n" // + "\tup-sell: " + resES[2] + "\n" // + "\toverall: " + resES[3] + "\n"); // models.add(new J48()); // models.add(new RandomForest()); // models.add(new NaiveBayes()); // models.add(new AdaBoostM1()); // models.add(new Logistic()); // models.add(new MultilayerPerceptron()); int FOLDS = 5; Evaluation eval = new Evaluation(data); // // for (Classifier model : models) { eval.crossValidateModel(model, data, FOLDS, new Random(1), new Object[] {}); System.out.println(model.getClass().getName() + "\n" + "\tRecall: " + eval.recall(1) + "\n" + "\tPrecision: " + eval.precision(1) + "\n" + "\tF-measure: " + eval.fMeasure(1)); System.out.println(eval.toSummaryString()); // } // LogitBoost cl = new LogitBoost(); // cl.setOptions(new String[] { // "-Q", "-I", "100", "-Z", "4", "-O", "4", "-E", "4" // }); // cl.buildClassifier(data); // Evaluation eval = new Evaluation(data); // eval.crossValidateModel(cl, data, 6, new Random(1), new Object[]{}); // System.out.println(eval.weightedFMeasure()); // System.out.println(cl.graph()); // System.out.println(cl.globalInfo()); }