List of usage examples for weka.core Instances size
@Override publicint size()
From source file:eu.cassandra.utils.Utils.java
License:Apache License
/** * This function is used in order to create clusters of points of interest * based on the active power difference they have. * //from w ww . j a v a2 s . co m * @param pois * The list of points of interest that will be clustered. * @return The newly created clusters with the points that are comprising * them. * @throws Exception */ public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias) throws Exception { // Initialize the auxiliary variables ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>(); // Estimating the number of clusters that will be created int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST)) + bias; log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = " + numberOfClusters); // Create a new empty list of points for each cluster for (int i = 0; i < numberOfClusters; i++) result.add(new ArrayList<PointOfInterest>()); // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiff"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); Instances instances = new Instances("Points of Interest", attr, 0); // Each event is translated to an instance with the above attributes for (int i = 0; i < pois.size(); i++) { Instance inst = new DenseInstance(2); inst.setValue(id, i); inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff())); instances.add(inst); } // System.out.println(instances.toString()); Instances newInst = null; log.debug("Instances: " + instances.toSummaryString()); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(numberOfClusters); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numberOfClusters); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); // System.out.println(newInst.toString()); // Parse through the dataset to see where each point is placed in the // clusters. for (int i = 0; i < newInst.size(); i++) { String cluster = newInst.get(i).stringValue(newInst.attribute(2)); cluster = cluster.replace("cluster", ""); log.debug("Point of Interest: " + i + " Cluster: " + cluster); result.get(Integer.parseInt(cluster) - 1).add(pois.get(i)); } // Sorting the each cluster points by their minutes. for (int i = result.size() - 1; i >= 0; i--) { if (result.get(i).size() == 0) result.remove(i); else Collections.sort(result.get(i), Constants.comp); } // Sorting the all clusters by their active power. Collections.sort(result, Constants.comp5); return result; }
From source file:eu.linda.analytics.formats.ForecastingRDFGenerator.java
@Override public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) { helpfulFunctions.nicePrintMessage("Generate Forecasting RDFModel for weka algorithms "); Date date = new Date(); DateFormat formatter = new SimpleDateFormat("ddMMyyyy"); String today = formatter.format(date); String base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/myRepository/statements?context=:_"; String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1) + "Date" + today; Instances triplets = (Instances) dataToExport; int tripletsAttibutesNum = triplets.numAttributes(); // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL) Model model = ModelFactory.createDefaultModel(); //openrdf + analytic_process ID_version_date String NS = base + datasetContextToString + "#"; String analytics_base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology"; String analytics_NS = analytics_base + "#"; model.setNsPrefix("ds", NS); model.setNsPrefix("rdf", RDF.getURI()); model.setNsPrefix("xsd", XSD.getURI()); model.setNsPrefix("foaf", FOAF.getURI()); model.setNsPrefix("rdfs", RDFS.getURI()); model.setNsPrefix("prov", "http://www.w3.org/ns/prov#"); model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#"); model.setNsPrefix("an", Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#"); // Define local properties Property analyzedField = model.createProperty(NS + "#analyzedField"); Property predictedValue = model.createProperty(NS + "#predictedValue"); Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom"); Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy"); Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf"); Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith"); Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset"); Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset"); Property algorithmProperty = model.createProperty(NS + "algorithm"); Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity"); Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity"); Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent"); Resource onlineAccount = model.createResource(FOAF.OnlineAccount); Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software"); Resource software = model.createResource(analytics_NS + "Software"); Resource linda_user = model.createResource(analytics_NS + "User"); Resource analytic_process = model.createResource(analytics_NS + "analytic_process"); Resource analytic_process_statement = model.createResource( analytics_NS + "analytic_process/" + analytics.getId() + "/" + (analytics.getVersion() + 1)); analytic_process_statement.addProperty(RDF.type, analytic_process); analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0"); analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name()); analytic_process_statement.addProperty(RDFS.subClassOf, activity); analytic_process_statement.addProperty(wasAssociatedWith, software_statement); analytic_process_statement.addProperty(RDFS.label, "linda analytic process"); analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription()); analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name()); if (helpfulFunctions.isRDFInputFormat(analytics.getTrainQuery_id())) { Resource analytic_train_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id()); analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement); }/* w ww . j a v a2 s . co m*/ if (helpfulFunctions.isRDFInputFormat(analytics.getEvaluationQuery_id())) { Resource analytic_evaluation_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id()); analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement); } Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name()); linda_user_statement.addProperty(RDF.type, linda_user); linda_user_statement.addProperty(RDFS.subClassOf, agent); linda_user_statement.addProperty(RDFS.label, "linda user"); software_statement.addProperty(RDF.type, software); software_statement.addProperty(RDFS.subClassOf, agent); software_statement.addProperty(RDFS.label, "analytics software"); software_statement.addProperty(actedOnBehalfOf, linda_user_statement); linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person); linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount); linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name()); onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI); Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node"); Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node"); // For each triplet, create a resource representing the sentence, as well as the subject, // predicate, and object, and then add the triples to the model. for (int i = 1; i < triplets.size(); i++) { Resource analytic_result_node_statement = model.createResource(NS + "/" + i); Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(1)); analytic_input_node_statement.addProperty(RDF.type, analytic_input_node); analytic_result_node_statement.addProperty(RDF.type, analytic_result_node); analytic_result_node_statement.addProperty(RDFS.subClassOf, entity); analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement); analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement); analytic_result_node_statement.addProperty(predictedValue, triplets.get(i).toString(tripletsAttibutesNum - 1)); } return model; }
From source file:eu.linda.analytics.formats.GeneralRDFGenerator.java
@Override public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) { helpfulFuncions.nicePrintMessage("Generate General RDFModel for weka algorithms "); Date date = new Date(); DateFormat formatter = new SimpleDateFormat("ddMMyyyy"); String today = formatter.format(date); String base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/statements?context=:_"; String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1) + "Date" + today; Instances triplets = (Instances) dataToExport; int tripletsAttibutesNum = triplets.numAttributes(); // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL) Model model = ModelFactory.createDefaultModel(); //openrdf + analytic_process ID_version_date String NS = base + datasetContextToString + "#"; String analytics_base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology"; String analytics_NS = analytics_base + "#"; model.setNsPrefix("ds", NS); model.setNsPrefix("rdf", RDF.getURI()); model.setNsPrefix("xsd", XSD.getURI()); model.setNsPrefix("foaf", FOAF.getURI()); model.setNsPrefix("rdfs", RDFS.getURI()); model.setNsPrefix("prov", "http://www.w3.org/ns/prov#"); model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#"); model.setNsPrefix("an", Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#"); // Define local properties Property analyzedField = model.createProperty(NS + "analyzedField"); Property predictedValue = model.createProperty(NS + "predictedValue"); Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom"); Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy"); Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf"); Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith"); Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset"); Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset"); Property algorithmProperty = model.createProperty(NS + "algorithm"); Property dataSizeOfAnalyzedDataProperty = model.createProperty(NS + "dataSizeOfAnalyzedDatainBytes"); Property timeToGetDataProperty = model.createProperty(NS + "timeToGetDataInSecs"); Property timeToRunAnalyticsProcessProperty = model.createProperty(NS + "timeToRunAnalyticsProcessInSecs"); Property timeToCreateRDFOutPutProperty = model.createProperty(NS + "timeToCreateRDFOutPutInSecs"); Property performanceProperty = model.createProperty(NS + "hasPerformance"); Property atTime = model.createProperty("http://www.w3.org/ns/prov#atTime"); Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity"); Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity"); Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent"); Resource onlineAccount = model.createResource(FOAF.OnlineAccount); Resource linda_user = model.createResource(analytics_NS + "User"); Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software"); Resource software = model.createResource(analytics_NS + "Software"); Resource performance = model.createResource(analytics_NS + "performance"); Resource performance_statement = model .createResource(analytics_NS + "performance/" + analytics.getId() + "/" + analytics.getVersion()); Resource analytic_process = model.createResource(analytics_NS + "analytic_process"); Resource analytic_process_statement = model.createResource( analytics_NS + "analytic_process/" + analytics.getId() + "/" + analytics.getVersion()); analytic_process_statement.addProperty(RDF.type, analytic_process); analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0"); analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name()); analytic_process_statement.addProperty(RDFS.subClassOf, activity); analytic_process_statement.addProperty(wasAssociatedWith, software_statement); analytic_process_statement.addProperty(RDFS.label, "Linda Analytic process"); analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription()); analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name()); Calendar cal = GregorianCalendar.getInstance(); Literal value = model.createTypedLiteral(cal); analytic_process_statement.addProperty(atTime, value); performance_statement.addProperty(RDF.type, performance); performance_statement.addProperty(dataSizeOfAnalyzedDataProperty, Float.toString(analytics.getData_size())); performance_statement.addProperty(timeToGetDataProperty, Float.toString(analytics.getTimeToGet_data())); performance_statement.addProperty(timeToRunAnalyticsProcessProperty, Float.toString(analytics.getTimeToRun_analytics())); performance_statement.addProperty(timeToCreateRDFOutPutProperty, Float.toString(analytics.getTimeToCreate_RDF())); analytic_process_statement.addProperty(performanceProperty, performance_statement); if (helpfulFuncions.isRDFInputFormat(analytics.getTrainQuery_id())) { Resource analytic_train_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id()); analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement); }// w w w . j av a 2 s. com if (helpfulFuncions.isRDFInputFormat(analytics.getEvaluationQuery_id())) { Resource analytic_evaluation_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id()); analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement); } Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name()); linda_user_statement.addProperty(RDF.type, linda_user); linda_user_statement.addProperty(RDFS.subClassOf, agent); linda_user_statement.addProperty(RDFS.label, "linda user"); software_statement.addProperty(RDF.type, software); software_statement.addProperty(RDFS.subClassOf, agent); software_statement.addProperty(RDFS.label, "analytics software"); software_statement.addProperty(actedOnBehalfOf, linda_user_statement); linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person); linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount); linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name()); onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI); Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node"); Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node"); // For each triplet, create a resource representing the sentence, as well as the subject, // predicate, and object, and then add the triples to the model. for (int i = 1; i < triplets.size(); i++) { //for (Instance triplet : triplets) { Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(0)); analytic_input_node_statement.addProperty(RDF.type, analytic_input_node); Resource analytic_result_node_statement = model.createResource(NS + "/" + i); analytic_result_node_statement.addProperty(RDF.type, analytic_result_node); analytic_result_node_statement.addProperty(RDFS.subClassOf, entity); analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement); analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement); analytic_result_node_statement.addProperty(predictedValue, triplets.get(i).toString(tripletsAttibutesNum - 1)); } return model; }
From source file:gov.va.chir.tagline.TagLineScorer.java
License:Open Source License
public void applyModel(final Document document) throws Exception { // Calculate features at both document and line level extractor.calculateFeatureValues(document); final Instances instances = DatasetUtil.createDataset(tagLineModel.getHeader(), document); for (int i = 0; i < instances.size(); i++) { final double[] probs = fc.distributionForInstance(instances.get(i)); int maxPos = -1; double maxProb = 0.0; for (int j = 0; j < probs.length; j++) { if (probs[j] > maxProb) { maxProb = probs[j];// w w w . j a va2 s. c o m maxPos = j; } } if (maxPos < 0) { throw new IllegalStateException( String.format("Predicted label array index must not be negative (%d)", maxPos)); } // Set predicted label and probability label to correct line final int lineId = (int) instances.get(i).value(lineIdAttr); document.getLine(lineId).setPredictedLabel(classAttr.value(maxPos)); document.getLine(lineId).setPredictedProbability(maxProb); } }
From source file:imba.classifier.FFNNTubes.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); data.deleteWithMissingClass();/*from w w w . ja v a 2s . c o m*/ nAttribute = data.numAttributes() - 1; nOutput = data.numClasses(); nData = data.size(); //set target data setTarget(data); //generate weight generateRandomWeight(); //normalisasi data Normalize norm = new Normalize(); Filter filter = new NominalToBinary(); norm.setInputFormat(data); Instances filteredData = Filter.useFilter(data, norm); try { filter.setInputFormat(filteredData); for (Instance i1 : filteredData) { filter.input(i1); } filter.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } int z = 0; double valMSE = 100.0; while ((z <= nEpoch) && (valMSE >= 0.00001)) { for (int j = 0; j < nData; j++) { feedForward(filteredData.get(j)); if (nHidden == 0) { updateWeight(target[j]); } else { backPropagation(target[j]); } } countError(filteredData); valMSE = countMSE(filteredData); System.out.println("ACCURACY " + z + " : " + accuracy); System.out.println("MSE " + z + " : " + valMSE); z++; } }
From source file:imba.classifier.NBTubes.java
@Override public void buildClassifier(Instances data) { dataClassifier = new ArrayList<>(); infoClassifier = new ArrayList<>(); validAttribute = new ArrayList<>(); dataset = null;//from w w w .ja v a 2s . c o m sumClass = null; dataSize = 0; header_Instances = data; Filter f; int i, j, k, l, m; int sumVal; int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1 i = 0; while (i < numAttr && wasNumeric == false) { if (i == classIdx) { i++; } if (i != numAttr && data.attribute(i).isNumeric()) { wasNumeric = true; } i++; } Instance p; //kasih filter if (wasNumeric) { f = new Normalize(); //Filter f = new NumericToNominal(); try { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } } //f = new NumericToNominal(); if (filter.equals("Discretize")) { f = new Discretize(); } else { f = new NumericToNominal(); } try { if (wasNumeric) { f.setInputFormat(dataset); for (Instance i1 : dataset) { f.input(i1); } } else { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = null; dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } //building data structure classIdx = data.classIndex(); dataSize = data.size(); //isi data dan info classifier dengan array kosong i = 0; j = i; while (j < numAttr) { if (i == classIdx) { i++; } else { dataClassifier.add(new ArrayList<>()); infoClassifier.add(new ArrayList<>()); if (j < i) { m = j - 1; } else { m = j; } k = 0; while (k < dataset.attribute(j).numValues()) { dataClassifier.get(m).add(new ArrayList<>()); infoClassifier.get(m).add(new ArrayList<>()); l = 0; while (l < dataset.attribute(classIdx).numValues()) { dataClassifier.get(m).get(k).add(0); infoClassifier.get(m).get(k).add(0.0); l++; } k++; } } i++; j++; } //isi data classifier dari dataset sumClass = new int[data.numClasses()]; i = 0; while (i < dataset.size()) { j = 0; k = j; while (k < dataset.numAttributes()) { if (j == classIdx) { j++; } else { if (k < j) { m = k - 1; } else { m = k; } dataClassifier.get(m).get((int) dataset.get(i).value(k)).set( (int) dataset.get(i).value(classIdx), dataClassifier.get(m).get((int) dataset.get(i).value(k)) .get((int) dataset.get(i).value(classIdx)) + 1); if (m == 0) { sumClass[(int) dataset.get(i).value(classIdx)]++; } } k++; j++; } i++; } //proses double values i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { k = 0; while (k < dataClassifier.get(i).get(j).size()) { infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]); k++; } j++; } i++; } /* //liat apakah ada nilai di tiap atribut //yang merepresentasikan lebih dari 80% data i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { j++; } i++; } */ }
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
public WekaEvaluator(String baseFolderPath, String projectName, Classifier classifier, String classifierName, String modelName) {//from w w w . j a v a 2 s . c om // READ FILE /*CODICE VECCHIO try { BufferedReader reader = new BufferedReader(new FileReader(filePath)); Instances data = new Instances(reader); data.setClassIndex(data.numAttributes() - 1); System.out.println(data.size()); // dividere istance in train e test Instances train = data; Instances test = null; // EVALUATION Evaluation eval = new Evaluation(train); //eval.evaluateModel(j48, test); // CROSS-VALIDATION eval.crossValidateModel(classifier, train, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); } catch (Exception ex) { Logger.getLogger(WekaEvaluator.class.getName()).log(Level.SEVERE, null, ex); } CODICE VECCHIO*/ String filePath = baseFolderPath + projectName + "/predictors.csv"; try { DataSource source = new DataSource(filePath); Instances instances = source.getDataSet(); instances.setClassIndex(instances.numAttributes() - 1); System.out.println("Numero istanze: " + instances.size()); evaluateModel(baseFolderPath, projectName, classifier, instances, modelName, classifierName); } catch (Exception ex) { Logger.getLogger(WekaEvaluator.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:lfsom.data.LFSData.java
License:Apache License
/** * Gets the data from a csv file./*from ww w . j a v a 2s . c o m*/ * * @param fileName */ public LFSData(String fileName) { Class claseCargador = CSVLoader.class; if (fileName.endsWith(ArffLoader.FILE_EXTENSION)) { claseCargador = ArffLoader.class; } else { if (fileName.endsWith(JSONLoader.FILE_EXTENSION)) { claseCargador = JSONLoader.class; } else { if (fileName.endsWith(MatlabLoader.FILE_EXTENSION)) { claseCargador = MatlabLoader.class; } else { if (fileName.endsWith(XRFFLoader.FILE_EXTENSION)) { claseCargador = XRFFLoader.class; } else { if (fileName.endsWith(C45Loader.FILE_EXTENSION)) { claseCargador = C45Loader.class; } } } } } try { AbstractFileLoader cargador = (AbstractFileLoader) claseCargador.getConstructor().newInstance(); boolean cambio_col = false; cargador.setSource(new File(fileName)); Instances data1 = cargador.getDataSet(); double[][] matrix2 = new double[data1.size()][data1.numAttributes()]; for (int i = 0; i < data1.size(); i++) { matrix2[i] = data1.get(i).toDoubleArray(); } // Ahora se comprueba si todas las columnas son ok Integer[] colVale; dim = 0; if (data1.size() > 0) { colVale = new Integer[matrix2[0].length]; double[] stdevX = StatisticSample.stddeviation(matrix2); for (int k = 0; k < matrix2[0].length; k++) { if (Math.abs(stdevX[k]) >= 0.000000001) { colVale[k] = dim; dim++; } else { colVale[k] = -1; cambio_col = true; } } } else { dim = data1.numAttributes(); colVale = new Integer[dim]; for (int k = 0; k < dim; k++) { colVale[k] = k; } } double[][] matrixAssign = new double[matrix2.length][dim]; if (cambio_col) { for (int k = 0; k < matrix2.length; k++) { for (int w = 0; w < matrix2[0].length; w++) { if (colVale[w] != -1) { matrixAssign[k][colVale[w]] = matrix2[k][w]; } } } } else { matrixAssign = matrix2; } // Fin de la comprobacion setLabels(new String[dim]); for (int i = 0; i < data1.numAttributes(); i++) { if (colVale[i] != -1) { getLabels()[colVale[i]] = data1.attribute(i).name(); } } BufferedWriter br = new BufferedWriter(new FileWriter("d:/tmp/fich.csv")); StringBuilder sb = new StringBuilder(); for (int i = 0; i < matrixAssign.length; i++) { String cad = String.valueOf(matrixAssign[i][0]); for (int k = 1; k < matrixAssign[i].length; k++) cad += "," + matrixAssign[i][k]; sb.append(cad + "\n"); } br.write(sb.toString()); br.close(); setMatrix(matrixAssign); } catch (Exception e) { e.printStackTrace(); System.exit(1); } }
From source file:lu.lippmann.cdb.datasetview.tabs.MDSTabView.java
License:Open Source License
/** * {@inheritDoc}/*from w w w. j av a 2 s . c om*/ */ @Override public void update0(final Instances dataSet) throws Exception { this.jxp.removeAll(); if (this.distComboListener != null) distCombo.removeActionListener(this.distComboListener); this.distComboListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { if (!currentDist.equals(distCombo.getSelectedItem())) update(dataSet); currentDist = distCombo.getSelectedItem(); final MDSDistancesEnum mde = MDSDistancesEnum.valueOf(currentDist.toString()); boolean showDistanceParameters = (mde.equals(MDSDistancesEnum.MINKOWSKI)); distanceParameters.setVisible(showDistanceParameters); distanceParametersLabel.setVisible(showDistanceParameters); } }; this.distCombo.addActionListener(this.distComboListener); if (this.distanceParametersListener != null) distanceParameters.removeActionListener(this.distanceParametersListener); this.distanceParameters.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { if (!currentParameter.equals(distanceParameters.getText())) update(dataSet); currentParameter = distanceParameters.getText(); } }); this.distanceParameters.addActionListener(this.distanceParametersListener); if (this.shihListener != null) shihCheckbox.removeActionListener(this.shihListener); this.shihListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { update(dataSet); } }; this.shihCheckbox.addActionListener(this.shihListener); this.shihCheckbox.setEnabled(!WekaDataStatsUtil.areAllAttributesNominal(dataSet)); if (this.ignoreListener != null) ignoreClassCheckbox.removeActionListener(this.ignoreListener); this.ignoreListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { update(dataSet); } }; this.ignoreClassCheckbox.addActionListener(this.ignoreListener); this.ignoreClassCheckbox.setEnabled(dataSet.classIndex() != -1); if (this.maxInstancesListener != null) maxInstances.removeKeyListener(this.maxInstancesListener); this.maxInstancesListener = new KeyAdapter() { @Override public void keyPressed(KeyEvent e) { final int cCode = e.getKeyCode(); if (cCode == KeyEvent.VK_ENTER) { update(dataSet); e.consume(); } } }; this.maxInstances.addKeyListener(maxInstancesListener); if (this.normalizeListener != null) normalizeCheckbox.removeActionListener(this.normalizeListener); this.normalizeListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { update(dataSet); } }; this.normalizeCheckbox.addActionListener(this.normalizeListener); //TODO : use proper layout ... final JXPanel northPanel = new JXPanel(); northPanel.setLayout(new GridBagLayout()); final GridBagConstraints gbc = new GridBagConstraints(); gbc.gridx = 0; gbc.gridy = 0; gbc.gridwidth = 2; gbc.weightx = 1; gbc.fill = GridBagConstraints.BOTH; northPanel.add(this.distCombo, gbc); gbc.weightx = 0; gbc.gridwidth = 1; gbc.gridy = 1; northPanel.add(this.distanceParametersLabel, gbc); gbc.gridx = 1; northPanel.add(this.distanceParameters, gbc); this.jxp.add(northPanel, BorderLayout.NORTH); final MDSDistancesEnum mde = MDSDistancesEnum.valueOf(distCombo.getSelectedItem().toString()); final String strOrder = distanceParameters.getText(); if (mde.equals(MDSDistancesEnum.MINKOWSKI)) { mde.setParameters(new String[] { strOrder }); } Instances usedDataSet = dataSet; if (shihCheckbox.isSelected()) { //Modify instance using SHIH Algorithm final Shih2010 shih = new Shih2010(dataSet); usedDataSet = shih.getModifiedInstances(); } this.kmeansButton = new JButton("K-means"); this.maxKField = new JTextField("10"); //Create whole panel final JXPanel southPanel = new JXPanel(); southPanel.add(shihCheckbox); southPanel.add(ignoreClassCheckbox); southPanel.add(normalizeCheckbox); southPanel.add(maxInstances); southPanel.add(new JLabel("Maximum K")); southPanel.add(maxKField); southPanel.add(kmeansButton); this.jxp.add(southPanel, BorderLayout.SOUTH); //Compute MDS final MDSResult mdsResult = ClassicMDS.doMDS(usedDataSet, mde, 2, Integer.valueOf(maxInstances.getText()), ignoreClassCheckbox.isSelected(), normalizeCheckbox.isSelected()); final JXPanel mdsView = MDSViewBuilder.buildMDSViewFromDataSet(dataSet, mdsResult, Integer.valueOf(maxInstances.getText()), new Listener<Instances>() { @Override public void onAction(final Instances parameter) { pushDataChange(new DataChange(parameter, TabView.DataChangeTypeEnum.Selection)); } }); this.jxp.add(mdsView, BorderLayout.CENTER); this.kmeansButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { try { //List of coordinates (x,y) of collapsed instances final Instances coordsInstances = mdsResult.buildInstancesFromMatrix(); //FIXME dangerous : K-means on ordered collapsedInstance coordinates final KmeansImproved km = new KmeansImproved(coordsInstances, Integer.valueOf(maxKField.getText())); final double[] ass = km.getClusteredInstances(); int usedK = km.getUsedKmeans().getNumClusters(); final StringBuilder labels = new StringBuilder(); for (int i = 0; i < usedK; i++) { labels.append("cluster").append((i + 1)); if (i < usedK - 1) labels.append(","); } //Build modified dataset String attributeName = "cluster_proj"; while (dataSet.attribute(attributeName) != null) attributeName += "_proj"; final Add addFilter = new Add(); addFilter.setAttributeIndex("last"); addFilter.setAttributeName(attributeName); addFilter.setNominalLabels(labels.toString()); addFilter.setInputFormat(dataSet); final Instances modDataset = Filter.useFilter(dataSet, addFilter); final int nbInstances = modDataset.numInstances(); final int nbAttributes = modDataset.numAttributes(); if (mdsResult.getCInstances().isCollapsed()) { // final KmeansResult kmr = mdsResult.getCInstances().getCentroidMap(); final List<Instances> clusters = kmr.getClusters(); int nbClusters = clusters.size(); //Build a map between any instance and it's cluster's centroid final Map<ComparableInstance, Integer> mapCentroid = new HashMap<ComparableInstance, Integer>(); for (int i = 0; i < nbClusters; i++) { final Instances cluster = clusters.get(i); final int clusterSize = cluster.size(); for (int k = 0; k < clusterSize; k++) { mapCentroid.put(new ComparableInstance(cluster.instance(k)), i); } } //Use the previous map to add the additionnal feature for every element ! for (int i = 0; i < nbInstances; i++) { final int centroidIndex = mapCentroid.get(new ComparableInstance(dataSet.instance(i))); final String value = "cluster" + (int) (ass[centroidIndex] + 1); modDataset.instance(i).setValue(nbAttributes - 1, value); } } else { for (int i = 0; i < nbInstances; i++) { final String value = "cluster" + (int) (ass[i] + 1); modDataset.instance(i).setValue(nbAttributes - 1, value); } } pushDataChange(new DataChange(modDataset, TabView.DataChangeTypeEnum.Update)); } catch (Exception e1) { e1.printStackTrace(); } } }); this.jxp.repaint(); }
From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java
License:Open Source License
public static void main(String[] args) throws Exception { final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile( //new File("./samples/csv/uci/zoo.csv")); new File("./samples/csv/test50bis.csv")); //new File("./samples/arff/UCI/cmc.arff")); //new File("./samples/csv/direct-marketing-bank-reduced.csv")); //new File("./samples/csv/bank.csv")); //new File("./samples/csv/preswissroll.csv")); //new File("./samples/csv/preswissroll-mod4.csv")); ds.setClassIndex(-1);/* w w w. j a v a 2 s. c o m*/ final int N = ds.size(); final int M = ds.numAttributes(); SimpleMatrix dist = new SimpleMatrix(N, N); for (int i = 0; i < N; i++) { for (int j = i + 1; j < N; j++) { Instance xi = ds.instance(i); Instance xj = ds.instance(j); double d = 0, s = 0, a = 0, b = 0; for (int k = 1; k < M; k++) { s += xi.value(k) * xj.value(k); a += xi.value(k) * xi.value(k); b += xi.value(k) * xi.value(k); } d = 1 - s / (Math.sqrt(a) * Math.sqrt(b)); dist.set(i, j, d); dist.set(j, i, d); } } final MDSResult res = ClassicMDS.doMDSV1(ds, dist); JXPanel p2 = MDSViewBuilder.buildMDSViewFromDataSet(ds, res, 5000, null); p2.setPreferredSize(new Dimension(800, 600)); final JXFrame f = new JXFrame(); f.setPreferredSize(new Dimension(1024, 768)); final Container c = f.getContentPane(); c.add(p2); f.pack(); f.setVisible(true); f.setDefaultCloseOperation(JXFrame.EXIT_ON_CLOSE); System.out.println("Kruskal stress : =" + getKruskalStressFromMDSResult(res)); }