List of usage examples for weka.filters.unsupervised.attribute NumericToNominal NumericToNominal
NumericToNominal
From source file:ap.mavenproject1.HelloWeka.java
public static void main(String args[]) { Instances data = null;/*from w ww . j a v a 2s . c o m*/ ArffLoader loader = new ArffLoader(); try { loader.setFile(new File("C:\\Users\\USER\\Desktop\\data.arff")); data = loader.getDataSet(); data.setClassIndex(data.numAttributes() - 1); } catch (IOException ex) { Logger.getLogger(HelloWeka.class.getName()).log(Level.SEVERE, null, ex); } Apriori apriori = new Apriori(); try { NumericToNominal numericToNominal = new NumericToNominal(); numericToNominal.setInputFormat(data); Instances nominalData = Filter.useFilter(data, numericToNominal); apriori.buildAssociations(nominalData); FastVector[] allTheRules; allTheRules = apriori.getAllTheRules(); for (int i = 0; i < allTheRules.length; i++) { System.out.println(allTheRules[i]); } // BufferedWriter writer = new BufferedWriter(new FileWriter("./output.arff")); // writer.write(nominalData.toString()); // writer.flush(); // writer.close(); } catch (Exception ex) { Logger.getLogger(HelloWeka.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:cish.CISH.java
private void trainClassifier() { try {/*from w w w.j a v a2s . c om*/ LibSVMLoader loader = new LibSVMLoader(); loader.setSource(getClass().getResource("/cish/traindata.libsvm")); Instances traindata = loader.getDataSet(); // Set the class attribute as nominal NumericToNominal filter = new NumericToNominal(); filter.setAttributeIndices("last"); filter.setInputFormat(traindata); dataset = Filter.useFilter(traindata, filter); // Train the LibSVM classifier = new LibSVM(); classifier.setOptions(new String[] { "-C", "8", "-G", "0.0625" }); System.out.println("CISH classifier has options"); for (String o : classifier.getOptions()) { System.out.print(o + " "); } System.out.println(); classifier.buildClassifier(dataset); } catch (IOException ex) { Logger.getLogger(CISH.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(CISH.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.relationalcloud.main.ExplanationSingleAttribute.java
License:Open Source License
/** * @param args//from w ww . jav a2 s. co m */ @Deprecated public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // loading properties from file String schemaname = ini.getProperty("schemaname"); String partitioningMethod = ini.getProperty("partitioningMethod"); String pcol; if (partitioningMethod.equals("repGraph")) { System.out.println("Replication Graph: using replicated column"); pcol = ini.getProperty("replicatedPartitionCol"); } else { pcol = ini.getProperty("graphPartitionCol"); } String accessLogTable = ini.getProperty("accessLogTable"); String numb_trans_to_process = ini.getProperty("numb_trans_to_process"); String txnLogTable = ini.getProperty("txnLogTable"); String driver = ini.getProperty("driver"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); System.out.println("Loading and processing " + schemaname + " traces..."); // Register jdbcDriver try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } Connection conn; try { conn = DriverManager.getConnection(connection + schemaname, user, password); conn.setAutoCommit(true); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); Statement stmt = conn.createStatement(); // NOTE: the paramenter numb_trans_to_process is used to limit // the number of transactions parsed to determine the which attributes // are common in the workload WHERE clauses. This can be a subset of the // overall set String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process; ResultSet res = stmt.executeQuery(sqlstring); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); i++; } double tend = System.currentTimeMillis(); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); System.out.println("ANALISYS RESULTS:\n "); wa.printStatsByTableColumn(); for (String str : wa.getAllTableNames()) { if (str == null) continue; System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable + "` g, relcloud_" + str + " s WHERE tableid = \"" + str + "\" AND s.relcloud_id = g.tupleid"; // System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { Object o1 = res.getObject(1); Object o2 = res.getObject(2); if (o1 != null && o2 != null) { a0.add(new Double(o1.hashCode())); a1.add(new Double(o2.hashCode())); } } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = WekaHelper.retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { long treeTstart = System.currentTimeMillis(); tree.buildClassifier(newData); // build classifier long treeTend = System.currentTimeMillis(); System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: " + (treeTend - treeTstart) + "ms \n" + tree.toString()); System.out.println("TREE:" + tree.prefix()); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * @param args//from w ww . ja v a 2 s . c o m */ public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST // VARIOUS PARSER FUNCTIONALITIES System.out.println("Loading and processing TPCC traces..."); Connection conn; try { String schemaname = ini.getProperty("schema"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String txnLogTable = ini.getProperty("txnLogTable"); String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`"; ResultSet res = stmt.executeQuery(sqlstring); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); // System.out.println("SQL: " +sql); i++; } double tend = System.currentTimeMillis(); String accessLogTable = ini.getProperty("accessLogTable"); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); for (String str : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str + " s WHERE tableid = \"" + str + "\" AND s.id = g.id"; System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { a0.add(new Double(res.getObject(1).hashCode())); a1.add(new Double(res.getObject(2).hashCode())); } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { tree.buildClassifier(newData); // build classifier } System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n " + tree.toString()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Invokes filter to transform last parameter into a Nominal * /*from w ww . j a v a 2 s.c o m*/ * @param data * @return * @throws Exception */ public static Instances makeLastNominal(Instances data) throws Exception { Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } return newData; }
From source file:DataMiningLogHistoriKIRIPercobaan2.DecisionTree.java
public String id3(Instances arff) { tree = new Id3(); try {/*w w w .j a v a2 s. co m*/ NumericToNominal convert = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; options[1] = "1-4"; convert.setOptions(options); convert.setInputFormat(arff); Instances newData = Filter.useFilter(arff, convert); tree.buildClassifier(newData); } catch (Exception ex) { Logger.getLogger(Controller.class.getName()).log(Level.SEVERE, null, ex); } return tree.toString(); }
From source file:entities.ArffFile.java
/** * Generaliza el atributo especificado en la variable attribute segun la * variable n. Aca se evalua si el atributo es numerico para convertirlo a * nominal/*from w w w . java 2 s .co m*/ * * @param attribute indice del atributo a generalizar * @param n cantidad de digitos a ser reemplazados * @throws Exception */ public boolean generalizar(int attribute, int n) throws Exception { //instancesFilter = new Instances(instances); if (instancesFilter.attribute(attribute).type() == weka.core.Attribute.NUMERIC) { NumericToNominal numeric = new NumericToNominal(); numeric.setAttributeIndices((attribute + 1) + ""); numeric.setInputFormat(instances); instancesFilter = Filter.useFilter(instancesFilter, numeric); } FastVector values = new FastVector(); List<String> newValues = new ArrayList<>(); for (int i = 0; i < instancesFilter.numInstances(); i++) { String value = instancesFilter.instance(i).toString(attribute); int n2 = n; char[] copy = value.toCharArray(); if (copy.length < n) { return false; } while (n2 != 0) { copy[copy.length - n2] = '*'; n2--; } String newValue = new String(copy); if (!values.contains(newValue)) { values.addElement(new String(copy)); } newValues.add(newValue); } String oldName = new String(instancesFilter.attribute(attribute).name()); instancesFilter.deleteAttributeAt(attribute); instancesFilter.insertAttributeAt(new Attribute(oldName, values), instancesFilter.numAttributes()); for (int i = 0; i < instancesFilter.numInstances(); i++) { instancesFilter.instance(i).setValue(instancesFilter.numAttributes() - 1, newValues.get(i)); } //saveToFile(3); return true; }
From source file:ffnn.TucilWeka.java
public static Instances filterNumericToNominal(Instances a) { NumericToNominal filter = new NumericToNominal(); Instances b = null;/* w w w .ja v a 2 s . c om*/ try { filter.setInputFormat(a); b = Filter.useFilter(a, filter); } catch (Exception ex) { Logger.getLogger(TucilWeka.class.getName()).log(Level.SEVERE, null, ex); } return b; }
From source file:Helper.CustomFilter.java
public Instances convertNumericToNominal(Instances structure) throws Exception { NumericToNominal convert = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; options[1] = "1-" + structure.numAttributes(); convert.setOptions(options);/*from ww w . j a v a 2s . c o m*/ convert.setInputFormat(structure); structure = Filter.useFilter(structure, convert); return structure; }
From source file:imba.classifier.NBTubes.java
@Override public void buildClassifier(Instances data) { dataClassifier = new ArrayList<>(); infoClassifier = new ArrayList<>(); validAttribute = new ArrayList<>(); dataset = null;//from w w w . ja v a 2 s . c o m sumClass = null; dataSize = 0; header_Instances = data; Filter f; int i, j, k, l, m; int sumVal; int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1 i = 0; while (i < numAttr && wasNumeric == false) { if (i == classIdx) { i++; } if (i != numAttr && data.attribute(i).isNumeric()) { wasNumeric = true; } i++; } Instance p; //kasih filter if (wasNumeric) { f = new Normalize(); //Filter f = new NumericToNominal(); try { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } } //f = new NumericToNominal(); if (filter.equals("Discretize")) { f = new Discretize(); } else { f = new NumericToNominal(); } try { if (wasNumeric) { f.setInputFormat(dataset); for (Instance i1 : dataset) { f.input(i1); } } else { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = null; dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } //building data structure classIdx = data.classIndex(); dataSize = data.size(); //isi data dan info classifier dengan array kosong i = 0; j = i; while (j < numAttr) { if (i == classIdx) { i++; } else { dataClassifier.add(new ArrayList<>()); infoClassifier.add(new ArrayList<>()); if (j < i) { m = j - 1; } else { m = j; } k = 0; while (k < dataset.attribute(j).numValues()) { dataClassifier.get(m).add(new ArrayList<>()); infoClassifier.get(m).add(new ArrayList<>()); l = 0; while (l < dataset.attribute(classIdx).numValues()) { dataClassifier.get(m).get(k).add(0); infoClassifier.get(m).get(k).add(0.0); l++; } k++; } } i++; j++; } //isi data classifier dari dataset sumClass = new int[data.numClasses()]; i = 0; while (i < dataset.size()) { j = 0; k = j; while (k < dataset.numAttributes()) { if (j == classIdx) { j++; } else { if (k < j) { m = k - 1; } else { m = k; } dataClassifier.get(m).get((int) dataset.get(i).value(k)).set( (int) dataset.get(i).value(classIdx), dataClassifier.get(m).get((int) dataset.get(i).value(k)) .get((int) dataset.get(i).value(classIdx)) + 1); if (m == 0) { sumClass[(int) dataset.get(i).value(classIdx)]++; } } k++; j++; } i++; } //proses double values i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { k = 0; while (k < dataClassifier.get(i).get(j).size()) { infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]); k++; } j++; } i++; } /* //liat apakah ada nilai di tiap atribut //yang merepresentasikan lebih dari 80% data i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { j++; } i++; } */ }