List of usage examples for weka.core Instances numAttributes
publicint numAttributes()
From source file:com.rapidminer.tools.WekaTools.java
License:Open Source License
/** * Creates a RapidMiner example set from Weka instances. Only a label can be used * as special attributes, other types of special attributes are not * supported. If <code>attributeNamePrefix</code> is not null, the given * string prefix plus a number is used as attribute names. *//* w w w. j a va2s. c om*/ public static ExampleSet toRapidMinerExampleSet(Instances instances, String attributeNamePrefix, int datamanagement) { int classIndex = instances.classIndex(); // create example table // 1. Extract attributes List<Attribute> attributes = new ArrayList<Attribute>(); int number = 1; // use for attribute names for (int i = 0; i < instances.numAttributes(); i++) { weka.core.Attribute wekaAttribute = instances.attribute(i); int rapidMinerAttributeValueType = Ontology.REAL; if (wekaAttribute.isNominal()) rapidMinerAttributeValueType = Ontology.NOMINAL; else if (wekaAttribute.isString()) rapidMinerAttributeValueType = Ontology.STRING; Attribute attribute = AttributeFactory.createAttribute(wekaAttribute.name(), rapidMinerAttributeValueType); if ((i != classIndex) && (attributeNamePrefix != null) && (attributeNamePrefix.length() > 0)) { attribute.setName(attributeNamePrefix + "_" + (number++)); } if (wekaAttribute.isNominal()) { for (int a = 0; a < wekaAttribute.numValues(); a++) { String nominalValue = wekaAttribute.value(a); attribute.getMapping().mapString(nominalValue); } } attributes.add(attribute); } Attribute label = null; if (classIndex >= 0) { label = attributes.get(classIndex); label.setName("label"); } // 2. Guarantee alphabetical mapping to numbers for (int j = 0; j < attributes.size(); j++) { Attribute attribute = attributes.get(j); if (attribute.isNominal()) attribute.getMapping().sortMappings(); } // 3. Read data MemoryExampleTable table = new MemoryExampleTable(attributes); DataRowFactory factory = new DataRowFactory(datamanagement, '.'); // create data List<DataRow> dataList = new LinkedList<DataRow>(); int numberOfRapidMinerAttributes = instances.numAttributes(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.instance(i); DataRow dataRow = factory.create(numberOfRapidMinerAttributes); for (int a = 0; a < instances.numAttributes(); a++) { Attribute attribute = table.getAttribute(a); double wekaValue = instance.value(a); if (attribute.isNominal()) { String nominalValue = instances.attribute(a).value((int) wekaValue); dataRow.set(attribute, attribute.getMapping().mapString(nominalValue)); } else { dataRow.set(attribute, wekaValue); } } dataRow.trim(); dataList.add(dataRow); } // handle label extra table.readExamples(new ListDataRowReader(dataList.iterator())); // create and return example set return table.createExampleSet(label); }
From source file:com.reactivetechnologies.analytics.core.eval.AdaBoostM1WithBuiltClassifiers.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { /** Changed here: Using the provided classifiers */ /** End *///from w ww. j a v a 2s.co m // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } m_NumClasses = data.numClasses(); if ((!m_UseResampling) && (m_Classifier instanceof WeightedInstancesHandler)) { buildClassifierWithWeights(data); } else { buildClassifierUsingResampling(data); } }
From source file:com.reactivetechnologies.analytics.mapper.ARFFDataMapper.java
License:Open Source License
@Override public Dataset mapStringToModel(JsonRequest request) throws ParseException { if (!(request instanceof ArffJsonRequest)) { throw new ParseException("Not an instance of " + ArffJsonRequest.class, -1); }/*from w ww.j a va 2s .c o m*/ try { ArffJsonRequest arff = (ArffJsonRequest) request; ArffReader ar = new ArffReader(new StringReader(request.toString())); Instances ins = ar.getData(); ins.setClassIndex(arff.getClassIndex() >= 0 ? arff.getClassIndex() : ins.numAttributes() - 1); return new Dataset(ins); } catch (Exception e) { ParseException pe = new ParseException("Cannot convert JSON stream to ARFF", -1); pe.initCause(e); throw pe; } }
From source file:com.reactivetechnologies.platform.analytics.mapper.JSONDataMapper.java
License:Open Source License
@Override public TrainModel mapStringToModel(ArffJsonRequest request) throws ParseException { try {// w ww . j ava 2 s . com ArffReader ar = new ArffReader(new StringReader(request.toString())); Instances ins = ar.getData(); ins.setClassIndex(request.getClassIndex() >= 0 ? request.getClassIndex() : ins.numAttributes() - 1); return new TrainModel(ins); } catch (Exception e) { ParseException pe = new ParseException("Cannot convert JSON stream to ARFF", -1); pe.initCause(e); throw pe; } }
From source file:com.relationalcloud.main.Explanation.java
License:Open Source License
/** * @param args/*from www. ja va2 s. c om*/ */ public static void main(String[] args) { // LOADING PROPERTY FILE AND DRIVER Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // LOAD PROPERTIES FROM CONFIGURATION FILE String connection = ini.getProperty("conn"); String schemaname = ini.getProperty("schema"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); String txnLogTable = ini.getProperty("txnLogTable"); String numb_trans_to_process = ini.getProperty("Explanation.numTxnsToExtractTemplates"); int numPart = Integer.parseInt(ini.getProperty("numPartitions")); // Initialize the Justification Handler ExplanationHandler jh = new ExplanationHandler(ini); System.out.println("Loading and processing " + jh.schemaname + " traces... considering prop file :" + jh.dbPropertyFile); try { // CREATE A DB CONNEctioN Connection conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); // ANALYZE WORKLOADS EXTRACTING TABLES, ATTRIBUTES AND FREQUENCIES ExplanationWorkloadPrepocessor wa = ExplanationHandler.analyzeWorkload(txnLogTable, numb_trans_to_process, schemaname, conn, schema); // FOR EACH TABLE CLASSIFY AND POPULATE JUSTIFICATION COLUMN for (String tableProcessed : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + tableProcessed); // FETCH THE INSTANCE FROM THE DB AND SAMPLE IT Instances data = jh.generateInstancesForTable(tableProcessed, wa.getFeatures(tableProcessed), conn); // IF THERE IS ONLY THE PARTITION LABEL, SKIP THE TABLE if (data.numAttributes() < 2) { System.out.println("No transactions touches this table, nothing to be done."); continue; } // INSTANTIATE THE CLASSIFIER String[] options; options = new String[3]; options[0] = "-P"; options[1] = "-C"; options[2] = ini.getProperty("Explanation.j48PruningConfidence"); J48 classifier = new J48(); // new instance of tree classifier.setOptions(options); // set the options Boolean attributeFilter = true; // ATTRIBUTE FILTERING Instances newData; if (data.numClasses() > 1 && attributeFilter) { AttributeSelection filter = new AttributeSelection(); //FIXME TRYING ALTERNATIVE ATTRIBUTE SELECTION STRATEGIES //InfoGainAttributeEval eval = new InfoGainAttributeEval(); //Ranker search = new Ranker(); //search.setNumToSelect(Integer.parseInt(ini.getProperty("Explanation.maxNumberOfAttribute","2"))); CfsSubsetEval eval = new CfsSubsetEval(); GreedyStepwise search = new GreedyStepwise(); search.setSearchBackwards(true); filter.setEvaluator(eval); filter.setSearch(search); filter.setInputFormat(data); newData = Filter.useFilter(data, filter); } else { newData = data; } String atts = ""; Enumeration e = newData.enumerateAttributes(); ArrayList<String> attributesForPopulation = new ArrayList<String>(); while (e.hasMoreElements()) { String s = ((Attribute) e.nextElement()).name(); attributesForPopulation.add(s); atts += s + ", "; } atts = atts.substring(0, atts.length() - 2); System.out.println("Attribute filtering reduced " + (data.numAttributes() - 1) + " to " + (newData.numAttributes() - 1) + " (" + atts + ")"); data = null; System.gc(); if (newData.numInstances() < 1) { System.err.println("The are no data in the table, skipping classification"); continue; } if (newData.numInstances() > 0) { if (newData.classAttribute().numValues() > 1) { // TRAIN THE CLASSIFIER AND PRINT OUT CLASSIFIER RULES ExplanationHandler.trainClassifier(newData, classifier); if (classifier.measureNumLeaves() == 1) { int partitionvalue = (int) classifier.classifyInstance(newData.firstInstance()); System.out.println( "The classifier decided to put all the tuplesi in the table in one partition: " + partitionvalue); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } // POPULATING THE justifiedpartition column with the result of this // classifier if required else if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateJustifiedColumn(tableProcessed, classifier, attributesForPopulation, conn, numPart, newData.classAttribute().enumerateValues()); } } else { // easy case... the class attribute is unary!! int partitionvalue = ((int) newData.firstInstance() .value(newData.firstInstance().classIndex())); System.out.println("The table is all stored in one partition, no need to use classifier"); if (Boolean.parseBoolean(ini.getProperty("Explanation.populateExplainedColumn"))) { jh.populateExplainedColumn(tableProcessed, partitionvalue, attributesForPopulation, conn); } } } else throw new Exception("The Instances is empty"); } // SET HASH PARTITION / REPLICATED PARTITION if (Boolean.parseBoolean(ini.getProperty("Explanation.populateHashColumn"))) { jh.populateHashPartition(conn); } if (Boolean.parseBoolean(ini.getProperty("Explanation.populateReplicatedColumn"))) { jh.populateReplicatedPartition(conn, Boolean.parseBoolean(ini.getProperty("Explanation.defaultReplicate"))); } conn.close(); } catch (SQLException e) { e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.relationalcloud.main.ExplanationSingleAttribute.java
License:Open Source License
/** * @param args//from w w w.j a va 2 s .c o m */ @Deprecated public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // loading properties from file String schemaname = ini.getProperty("schemaname"); String partitioningMethod = ini.getProperty("partitioningMethod"); String pcol; if (partitioningMethod.equals("repGraph")) { System.out.println("Replication Graph: using replicated column"); pcol = ini.getProperty("replicatedPartitionCol"); } else { pcol = ini.getProperty("graphPartitionCol"); } String accessLogTable = ini.getProperty("accessLogTable"); String numb_trans_to_process = ini.getProperty("numb_trans_to_process"); String txnLogTable = ini.getProperty("txnLogTable"); String driver = ini.getProperty("driver"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); System.out.println("Loading and processing " + schemaname + " traces..."); // Register jdbcDriver try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); } Connection conn; try { conn = DriverManager.getConnection(connection + schemaname, user, password); conn.setAutoCommit(true); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); Statement stmt = conn.createStatement(); // NOTE: the paramenter numb_trans_to_process is used to limit // the number of transactions parsed to determine the which attributes // are common in the workload WHERE clauses. This can be a subset of the // overall set String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "` LIMIT " + numb_trans_to_process; ResultSet res = stmt.executeQuery(sqlstring); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); i++; } double tend = System.currentTimeMillis(); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); System.out.println("ANALISYS RESULTS:\n "); wa.printStatsByTableColumn(); for (String str : wa.getAllTableNames()) { if (str == null) continue; System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE IN USED IN THE TRANSACTION TRACE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g." + pcol + " FROM `" + accessLogTable + "` g, relcloud_" + str + " s WHERE tableid = \"" + str + "\" AND s.relcloud_id = g.tupleid"; // System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { Object o1 = res.getObject(1); Object o2 = res.getObject(2); if (o1 != null && o2 != null) { a0.add(new Double(o1.hashCode())); a1.add(new Double(o2.hashCode())); } } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = WekaHelper.retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { long treeTstart = System.currentTimeMillis(); tree.buildClassifier(newData); // build classifier long treeTend = System.currentTimeMillis(); System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n TREE BUILDING TIME: " + (treeTend - treeTstart) + "ms \n" + tree.toString()); System.out.println("TREE:" + tree.prefix()); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", " + pcol + " correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.misc.JustifyAgnosticPartitioning.java
License:Open Source License
/** * @param args// w ww . j av a2s .c om */ public static void main(String[] args) { Properties ini = new Properties(); try { ini.load(new FileInputStream(System.getProperty("prop"))); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } // Register jdbcDriver try { Class.forName(ini.getProperty("driver")); } catch (ClassNotFoundException e) { e.printStackTrace(); } // READ FROM MYSQL THE TPCC TRANSACTION LOG, PARSE EACH STATEMENT AND TEST // VARIOUS PARSER FUNCTIONALITIES System.out.println("Loading and processing TPCC traces..."); Connection conn; try { String schemaname = ini.getProperty("schema"); String connection = ini.getProperty("conn"); String user = ini.getProperty("user"); String password = ini.getProperty("password"); conn = DriverManager.getConnection(connection + schemaname, user, password); Connection infschema_conn = DriverManager.getConnection(connection + "information_schema", user, password); Schema schema = SchemaLoader.loadSchemaFromDB(infschema_conn, schemaname); ExplanationWorkloadPrepocessor wa = new ExplanationWorkloadPrepocessor(schemaname, schema); conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String txnLogTable = ini.getProperty("txnLogTable"); String sqlstring = "SELECT sqlstring FROM `" + txnLogTable + "`"; ResultSet res = stmt.executeQuery(sqlstring); double tstart = System.currentTimeMillis(); double i = 0; while (res.next()) { String sql = res.getString(1); // PARSE THE STATEMENT wa.processSql(sql); // System.out.println("SQL: " +sql); i++; } double tend = System.currentTimeMillis(); String accessLogTable = ini.getProperty("accessLogTable"); System.out.println("Processed " + i + " statements in " + (tend - tstart) + "ms average:" + (tend - tstart) / i + "ms per statement"); for (String str : wa.getAllTableNames()) { System.out.println("-------------------------------------------"); System.out.println("ANALYZING TABLE " + str); for (SimpleCount sc : wa.getFeatures(str)) { ArrayList<Double> a0 = new ArrayList<Double>(); ArrayList<Double> a1 = new ArrayList<Double>(); sqlstring = "SELECT s." + sc.colname + ", g.partition FROM `" + accessLogTable + "` g, " + str + " s WHERE tableid = \"" + str + "\" AND s.id = g.id"; System.out.println(sqlstring); res = stmt.executeQuery(sqlstring); while (res.next()) { a0.add(new Double(res.getObject(1).hashCode())); a1.add(new Double(res.getObject(2).hashCode())); } if (a0.size() >= 1) { double[] d0 = new double[a0.size()]; double[] d1 = new double[a1.size()]; boolean unary = true; for (int j = 0; j < a0.size(); j++) { d0[j] = a0.get(j).doubleValue(); d1[j] = a1.get(j).doubleValue(); if (j > 0 && d1[j - 1] != d1[j]) unary = false; } if (unary) { System.out.println("EASY CASE: " + str + " is not partitioned and is stored in partition: " + d1[0]); } else { double correlation = PearsonCorrelation.getPearsonCorrelation(d0, d1); correlationThreshold = Double.parseDouble(ini.getProperty("correlationThreshold")); // if the correlation is high enough proceed to use decision // trees. if (Math.abs(correlation) > correlationThreshold) { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (HIGH)"); try { // InstanceQuery query; // query = new InstanceQuery(); // query.setUsername("bbb"); // query.setPassword("qwer"); // query.connectToDatabase(); // Instances data = query.retrieveInstances(sqlstring); res.beforeFirst(); Instances data = retrieveInstanceFromResultSet(res); // set the last column to be the classIndex... is this // correct? data.setClassIndex(data.numAttributes() - 1); Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "2"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } String[] options = new String[1]; options[0] = "-P"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options if (!tree.getCapabilities().test(newData)) { System.err.println("ERROR the FOLLOWING DATA CANNOT BE PROCESED:" + newData.toSummaryString()); System.err.println("QUERY WAS:" + sqlstring); } else { tree.buildClassifier(newData); // build classifier } System.out.println("CLASSIFICATION CONFIDENCE: " + tree.getConfidenceFactor() + "\n " + tree.toString()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } else { System.out.println("Testing " + str + "." + sc.colname + ", g.partition correlation: " + correlation + " (LOW)"); } } } } } } catch (SQLException e) { e.printStackTrace(); } }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Repeat the selection from the database removing duplicates, since they will * only increase the execution time. And run the tuples through the classifier * to populate the justifiedpartition column. * /* w ww . j a v a 2s . c o m*/ * @param tableProcessed * @param classifier * @param wa * @throws SQLException * @throws Exception */ public void populateJustifiedColumn(String tableProcessed, Classifier classifier, ArrayList<String> attributes, Connection conn, int numbPart, Enumeration enumclassvalues) throws SQLException, Exception { if (true) { labelTest(tableProcessed, classifier, conn); return; } tableProcessed = removeQuotes(tableProcessed); // get from the DB the tuples content and their partitioning column String sqlstring = "SELECT distinct g.tupleid, "; for (String sc : attributes) { sqlstring += "s." + sc + ", "; } sqlstring += "g." + pcol + " FROM " + "(SELECT distinct tupleid," + pcol + " FROM `" + testingtable + "` WHERE tableid = '" + tableProcessed + "') AS g, relcloud_" + tableProcessed + " AS s " + "WHERE s.relcloud_id = g.tupleid;"; System.out.println(sqlstring); Statement stmt = conn.createStatement(); // initializing the testing table to avoid complaints from classifier with // an hash partition like distribution if (!testingtable.equals(sampledtrainingtable)) { int i = 0; Object o = enumclassvalues.nextElement(); // set everything to an existing value to ensure that every field is // covered stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tableid = '" + tableProcessed + "'"); // and than sparkly in a bunch of other values (unsure whether it is // required); while (enumclassvalues.hasMoreElements()) { o = enumclassvalues.nextElement(); // FIXME there might still be an issue in which tupleid%i do not exists, // and thus one of the "o" never appears in the instance... stmt.executeUpdate("UPDATE " + testingtable + " SET " + pcol + "=" + o + " WHERE tupleid%" + numbPart + "=" + i + " AND tableid = '" + tableProcessed + "'"); i++; } } ResultSet res = stmt.executeQuery(sqlstring); // create an instance from the resultset Instances data_tupleid = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile); res.close(); data_tupleid.setClassIndex(data_tupleid.numAttributes() - 1); Instances data_no_tupleid = makeLastNominal(data_tupleid); data_no_tupleid.setClassIndex(data_no_tupleid.numAttributes() - 1); // remove tupleid from data_no_tupleid, still available in data_tupleid data_no_tupleid.deleteAttributeAt(0); // if(data_no_tupleid.classAttribute().numValues()>1){ System.out.println("Running the tuples through the classifier to populate " + explainedPartitionCol); // use data that still has the tupleid and newData for the classification Enumeration enum_data_tupleid = data_tupleid.enumerateInstances(); Enumeration enum_data_no_tupleid = data_no_tupleid.enumerateInstances(); PreparedStatement updateJustCol = conn.prepareStatement("UPDATE `" + testingtable + "` SET `" + explainedPartitionCol + "` = ? " + "WHERE tableid = '" + tableProcessed + "' AND tupleid = ?;"); while (enum_data_tupleid.hasMoreElements() && enum_data_no_tupleid.hasMoreElements()) { Instance tupIDinstance = (Instance) enum_data_tupleid.nextElement(); Instance instance = (Instance) enum_data_no_tupleid.nextElement(); double part = classifier.classifyInstance(instance); if (part == Instance.missingValue()) System.err.println("No classification for:" + instance.toString()); updateJustCol.setInt(1, (int) part); updateJustCol.setInt(2, (int) tupIDinstance.value(0)); // System.out.println(tableProcessed+" "+ instance.value(0) + " " + // tupIDinstance.classValue() +" "+ part); updateJustCol.execute(); updateJustCol.clearParameters(); } updateJustCol.close(); }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Invokes filter to transform last parameter into a Nominal * //from w w w.j av a 2s . c o m * @param data * @return * @throws Exception */ public static Instances makeLastNominal(Instances data) throws Exception { Instances newData; if (data.attribute(data.numAttributes() - 1).type() == Attribute.NUMERIC) { NumericToNominal ntn = new NumericToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } else { StringToNominal ntn = new StringToNominal(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "last"; // first attribute ntn.setOptions(options); // set options ntn.setInputFormat(data); // inform filter about dataset // **AFTER** setting options newData = Filter.useFilter(data, ntn); // apply fil } return newData; }
From source file:com.relationalcloud.partitioning.explanation.ExplanationHandler.java
License:Open Source License
/** * Fetch from the database the content of the table and the partition lables, * and prepares a Weka Instance by sampling and cleaning it * /*from w w w. j av a2s . c o m*/ * @param tableProcessed * @param arraySc * @param conn * @return */ public Instances generateInstancesForTable(String tabname, ArrayList<SimpleCount> arraySc, Connection conn) { tabname = removeQuotes(tabname); Statement stmt; try { stmt = conn.createStatement(); ResultSet test = stmt .executeQuery("SELECT count(*) FROM " + sampledtrainingtable + " WHERE " + pcol + " is null"); // safety check, verifies that there are no nulls in input table. if (test.next() && test.getInt(1) > 0) throw new Exception("Table " + sampledtrainingtable + " contains nulls in " + pcol); // get from the DB the tuples content and their partitioning column String sqlstring = "SELECT "; for (SimpleCount sc : arraySc) { sqlstring += "s." + sc.colname + ", "; } sqlstring += "g." + pcol + " FROM " + "(SELECT tupleid," + pcol + " FROM `" + sampledtrainingtable + "` WHERE tableid = '" + tabname + "') AS g, relcloud_" + tabname + " AS s " + "WHERE s.relcloud_id = g.tupleid"; System.out.println(sqlstring); ResultSet res = stmt.executeQuery(sqlstring); // create an instance from the resultset Instances data = WekaHelper.retrieveInstanceFromResultSetComplete(res, dbPropertyFile); res.close(); // prepare the data, by setting class attributed and sampling if required data = makeLastNominal(data); data.setClassIndex(data.numAttributes() - 1); data = sampleTraining(Double.parseDouble(ini.getProperty("Explanation.j48SamplingThreshold")), data); System.out.println(data.toSummaryString()); return data; } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }