List of usage examples for weka.filters.unsupervised.attribute StringToNominal StringToNominal
StringToNominal
From source file:clases.Preproceso.java
public static Instances filterStringToNominal(Instances data, String i) { try {//from w w w .ja v a 2 s .com StringToNominal sn = new StringToNominal(); sn.setAttributeRange(i); sn.setInputFormat(data); return Filter.useFilter(data, sn); } catch (Exception ex) { Logger.getLogger(Preproceso.class.getName()).log(Level.SEVERE, null, ex); return null; } }
From source file:com.relationalcloud.main.ExplanationSingleAttribute.java
License:Open Source License
/** * @param args// w w w. ja v a 2 s .com */ @Deprecated public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // loading properties from file String schemaname = ini.getProperty("schemaname"); String partitioningMethod = ini.getProperty("partitioningMethod"); String pcol; if (partitioningMethod.equals("repGraph")) { System.out.println("Replication Graph: using replicated column"); pcol = ini.getProperty("replicatedPartitionCol"); } else { pcol = ini.getProperty("graphPartitionCol"); } String accessLogTable = ini.getProperty("accessLogTable"); String numb_trans_to_process = ini.getProperty("numb_trans_to_process"); String txnLogTable = ini.getProperty("txnLogTable"); String driver = ini.getProperty("driver"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); System.out.println("Loading and processing " + schemaname + " traces..."); // Register jdbcDriver try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } Connection conn; try { conn = DriverManager.getConnection(connection + schemaname, user, password); conn.setAutoCommit(true); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); Statement stmt = conn.createStatement(); // NOTE: the paramenter numb_trans_to_process is used to limit // the number of transactions parsed to determine the which attributes // are common in the workload WHERE clauses. This can be a subset of the // overall set String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process; ResultSet res = stmt.executeQuery(sqlstring); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); i++; } double tend = System.currentTimeMillis(); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); System.out.println("ANALISYS RESULTS:\n "); wa.printStatsByTableColumn(); for (String str : wa.getAllTableNames()) { if (str == null) continue; System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable + "` g, relcloud_" + str + " s WHERE tableid = \"" + str + "\" AND s.relcloud_id = g.tupleid"; // System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { Object o1 = res.getObject(1); Object o2 = res.getObject(2); if (o1 != null && o2 != null) { a0.add(new Double(o1.hashCode())); a1.add(new Double(o2.hashCode())); } } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = WekaHelper.retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { long treeTstart = System.currentTimeMillis(); tree.buildClassifier(newData); // build classifier long treeTend = System.currentTimeMillis(); System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: " + (treeTend - treeTstart) + "ms \n" + tree.toString()); System.out.println("TREE:" + tree.prefix()); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * @param args/*from w ww. j a v a2s . com*/ */ public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST // VARIOUS PARSER FUNCTIONALITIES System.out.println("Loading and processing TPCC traces..."); Connection conn; try { String schemaname = ini.getProperty("schema"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String txnLogTable = ini.getProperty("txnLogTable"); String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`"; ResultSet res = stmt.executeQuery(sqlstring); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); // System.out.println("SQL: " +sql); i++; } double tend = System.currentTimeMillis(); String accessLogTable = ini.getProperty("accessLogTable"); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); for (String str : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str + " s WHERE tableid = \"" + str + "\" AND s.id = g.id"; System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { a0.add(new Double(res.getObject(1).hashCode())); a1.add(new Double(res.getObject(2).hashCode())); } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { tree.buildClassifier(newData); // build classifier } System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n " + tree.toString()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Invokes filter to transform last parameter into a Nominal * /* w ww. j a va 2s . c o m*/ * @param data * @return * @throws Exception */ public static Instances makeLastNominal(Instances data) throws Exception { Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } return newData; }
From source file:ffnn.FFNN.java
public static Instances preprocess(Instances i) { try {//w w w .j ava 2s .c om Reorder rfilter = new Reorder(); int classIdx = i.classIndex() + 1; String order; if (classIdx != 1) { order = "1"; for (int j = 2; j <= i.numAttributes(); j++) { if (j != classIdx) { order = order + "," + j; } } } else { order = "2"; for (int j = 3; j <= i.numAttributes(); j++) { order = order + "," + j; } } order = order + "," + classIdx; rfilter.setAttributeIndices(order); rfilter.setInputFormat(i); i = Filter.useFilter(i, rfilter); StringToNominal stnfilter = new StringToNominal(); stnfilter.setAttributeRange("first-last"); stnfilter.setInputFormat(i); i = Filter.useFilter(i, stnfilter); NominalToBinary ntbfilter = new NominalToBinary(); ntbfilter.setInputFormat(i); i = Filter.useFilter(i, ntbfilter); Normalize nfilter = new Normalize(); nfilter.setInputFormat(i); i = Filter.useFilter(i, nfilter); } catch (Exception e) { System.out.println(e.toString()); } return i; }
From source file:newsclassifier.NewsClassifier.java
public void StrtoNom() throws Exception { StringToNominal filter = new StringToNominal(); //NumericToNominal filter = new NumericToNominal(); filter.setInputFormat(data);/*from w ww . ja v a 2s . c o m*/ //filter.setOptions("-R 1"); String[] opts = { "-R", "first" }; filter.setOptions(opts); //filter.setAttributeRange("first"); data = Filter.useFilter(data, filter); }
From source file:ocr.ARFFSymbolFilter.java
License:Apache License
public static void writeWeka(final String filenameout, final ArrayList<?> symbolData) { final int nsold = ARFFSymbolFilter.ns; ARFFSymbolFilter.tangent = (ARFFSymbolFilter.times > 1); try {/*from w w w . j a v a 2 s . c o m*/ if (!ARFFSymbolFilter.strokenumber) { ARFFSymbolFilter.ns = 1; } final DataOutputStream[] fileout = new DataOutputStream[ARFFSymbolFilter.ns]; final Instances[] instances = new Instances[ARFFSymbolFilter.ns]; System.out.println("Writing file"); for (int i = 0; i < ARFFSymbolFilter.ns; ++i) { final int k = ARFFSymbolFilter.strokenumber ? i : (nsold - 1); fileout[ARFFSymbolFilter.strokenumber ? i : 0] = new DataOutputStream(new FileOutputStream( filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#")); } final int tot = symbolData.size(); for (int j = 0; j < symbolData.size(); ++j) { final ArrayList<?> group = (ArrayList<?>) symbolData.get(j); for (int i = 0; i < group.size(); ++i) { final Symbol sym = (Symbol) group.get(i); final int k = ARFFSymbolFilter.strokenumber ? (sym.size() - 1) : 0; if (sym.name.equals("no_name") || sym.name.equals("empty_symbol")) { System.out.print("#" + sym.name + "#"); } else { for (int t = 0; t < ARFFSymbolFilter.times; ++t) { final String line = constructStringInstance(sym, ARFFSymbolFilter.alpha); if (line == null) { System.out.print("line=null!"); } else { if (instances[k] == null) { final StringTokenizer st = new StringTokenizer(line, " "); final int nt = st.countTokens() / 2; final FastVector att = new FastVector(); for (int kk = 0; kk < nt; ++kk) { final String token = st.nextToken(); att.addElement(new Attribute(new String(token))); st.nextToken(); } att.addElement(new Attribute("class", (FastVector) null)); (instances[k] = new Instances("Symbols of Size " + (k + 1), att, 1)) .setClassIndex(att.size() - 1); } final StringTokenizer st = new StringTokenizer(line, " "); final int nt = st.countTokens() / 2; final Instance inst = new Instance(nt + 1); for (int kk = 0; kk < nt; ++kk) { st.nextToken(); final String token = new String(st.nextToken()); inst.setValue(kk, Double.parseDouble(token)); } inst.setDataset(instances[k]); inst.setClassValue(oldReplace(sym.name, "\\", "")); instances[k].add(inst); } } } } if ((int) (100.0 * j) / tot % 10 == 0) { System.out.print((int) (100.0 * j) / tot + "%-"); } } for (int k = 0; k < ARFFSymbolFilter.ns; ++k) { if (fileout[ARFFSymbolFilter.strokenumber ? k : 0] == null) { System.out.println("fo" + fileout[ARFFSymbolFilter.strokenumber ? k : 0]); } if (instances[ARFFSymbolFilter.strokenumber ? k : 0] == null) { System.out.println("in:" + instances[ARFFSymbolFilter.strokenumber ? k : 0]); } fileout[ARFFSymbolFilter.strokenumber ? k : 0] .writeBytes(instances[ARFFSymbolFilter.strokenumber ? k : 0].toString()); fileout[ARFFSymbolFilter.strokenumber ? k : 0].close(); } final StringToNominal filter = new StringToNominal(); final String[] args = new String[4]; for (int k = 0; k < ARFFSymbolFilter.ns; ++k) { args[0] = "-i"; args[1] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff#"; args[2] = "-o"; args[3] = filenameout + (ARFFSymbolFilter.strokenumber ? ("" + (k + 1)) : "") + ".arff"; Filter.filterFile(filter, args); new File(args[1]).delete(); } System.out.println("100.0%"); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (Exception ioe) { ioe.printStackTrace(); } }
From source file:org.openml.webapplication.features.FantailConnector.java
License:Open Source License
private List<Quality> datasetCharacteristics(Instances fulldata, Integer start, Integer interval_size, List<String> qualitiesAvailable) throws Exception { List<Quality> result = new ArrayList<DataQuality.Quality>(); Instances intervalData;//from w w w . j a v a 2 s . c o m // Be careful changing this! if (interval_size != null) { intervalData = new Instances(fulldata, start, Math.min(interval_size, fulldata.numInstances() - start)); intervalData = applyFilter(intervalData, new StringToNominal(), "-R first-last"); intervalData.setClassIndex(fulldata.classIndex()); } else { intervalData = fulldata; // todo: use StringToNominal filter? might be to expensive } for (Characterizer dc : batchCharacterizers) { if (qualitiesAvailable != null && qualitiesAvailable.containsAll(Arrays.asList(dc.getIDs())) == false) { Conversion.log("OK", "Extract Batch Features", dc.getClass().getName() + ": " + Arrays.toString(dc.getIDs())); Map<String, Double> qualities = dc.characterize(intervalData); result.addAll(hashMaptoList(qualities, start, interval_size)); } else { Conversion.log("OK", "Extract Batch Features", dc.getClass().getName() + " - already in database"); } } return result; }