List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:etc.aloe.filters.WordFeaturesExtractor.java
License:Open Source License
@Override public boolean setInputFormat(Instances instanceInfo) throws Exception { if (selectedAttributeName == null) { throw new IllegalStateException("String attribute name was not set"); }/*from ww w . j ava 2 s . c o m*/ Attribute stringAttr = instanceInfo.attribute(selectedAttributeName); if (stringAttr == null) { throw new IllegalStateException("Attribute " + selectedAttributeName + " does not exist"); } return super.setInputFormat(instanceInfo); //To change body of generated methods, choose Tools | Templates. }
From source file:etc.aloe.filters.WordFeaturesExtractor.java
License:Open Source License
@Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { if (this.selectedAttributeName == null) { throw new IllegalStateException("String attribute name not set"); }//from w w w . j a v a 2 s. c o m //Lookup the selected attribute Attribute stringAttr = inputFormat.attribute(selectedAttributeName); selectedAttributeIndex = stringAttr.index(); //Read the stopwords stopwords = this.prepareStopwords(); //Tokenize all documents List<List<String>> documents = tokenizeDocuments(inputFormat); //Wrap the instances in something more convenient ClassData instances = new ClassData(inputFormat); //First determine the list of viable unigrams determineUnigrams(documents, instances); //Find all bigrams including one of the unigrams, filtered if (useBigrams) { determineBigrams(documents, instances); } return generateOutputFormat(inputFormat); }
From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java
License:Apache License
/** * This function is taking the instances coming out from clustering and put * each event to each respective cluster. * /* w w w . ja v a 2 s. c o m*/ * @param inst * The clustered instances */ private void fillClusters(Instances inst) { // Initializing auxiliary variables ArrayList<Integer> temp; // For each instance check the cluster value and put it to the correct // cluster for (int i = 0; i < inst.size(); i++) { String cluster = inst.get(i).stringValue(inst.attribute(5)); if (!clusters.containsKey(cluster)) temp = new ArrayList<Integer>(); else temp = clusters.get(cluster); temp.add(i); clusters.put(cluster, temp); } }
From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java
License:Apache License
/** * This function is taking the instances coming out from clustering and put * each event to each respective cluster. * //from w ww.ja va2 s . c o m * @param inst * The clustered instances */ private void fillClusters(Instances inst) { // Initializing auxiliary variables ArrayList<Integer> temp; // For each instance check the cluster value and put it to the correct // cluster for (int i = 0; i < inst.size(); i++) { String cluster = inst.get(i).stringValue(inst.attribute(6)); if (!clusters.containsKey(cluster)) temp = new ArrayList<Integer>(); else temp = clusters.get(cluster); temp.add(i); clusters.put(cluster, temp); } }
From source file:eu.cassandra.utils.Utils.java
License:Apache License
/** * This function is used in order to create clusters of points of interest * based on the active power difference they have. * /*from w w w . j a va 2s .c o m*/ * @param pois * The list of points of interest that will be clustered. * @return The newly created clusters with the points that are comprising * them. * @throws Exception */ public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias) throws Exception { // Initialize the auxiliary variables ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>(); // Estimating the number of clusters that will be created int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST)) + bias; log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = " + numberOfClusters); // Create a new empty list of points for each cluster for (int i = 0; i < numberOfClusters; i++) result.add(new ArrayList<PointOfInterest>()); // Initializing auxiliary variables namely the attributes of the data set Attribute id = new Attribute("id"); Attribute pDiffRise = new Attribute("pDiff"); ArrayList<Attribute> attr = new ArrayList<Attribute>(); attr.add(id); attr.add(pDiffRise); Instances instances = new Instances("Points of Interest", attr, 0); // Each event is translated to an instance with the above attributes for (int i = 0; i < pois.size(); i++) { Instance inst = new DenseInstance(2); inst.setValue(id, i); inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff())); instances.add(inst); } // System.out.println(instances.toString()); Instances newInst = null; log.debug("Instances: " + instances.toSummaryString()); // Create the addcluster filter of Weka and the set up the hierarchical // clusterer. AddCluster addcluster = new AddCluster(); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(numberOfClusters); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numberOfClusters); kmeans.buildClusterer(instances); addcluster.setClusterer(kmeans); addcluster.setInputFormat(instances); addcluster.setIgnoredAttributeIndices("1"); // Cluster data set newInst = Filter.useFilter(instances, addcluster); // System.out.println(newInst.toString()); // Parse through the dataset to see where each point is placed in the // clusters. for (int i = 0; i < newInst.size(); i++) { String cluster = newInst.get(i).stringValue(newInst.attribute(2)); cluster = cluster.replace("cluster", ""); log.debug("Point of Interest: " + i + " Cluster: " + cluster); result.get(Integer.parseInt(cluster) - 1).add(pois.get(i)); } // Sorting the each cluster points by their minutes. for (int i = result.size() - 1; i >= 0; i--) { if (result.get(i).size() == 0) result.remove(i); else Collections.sort(result.get(i), Constants.comp); } // Sorting the all clusters by their active power. Collections.sort(result, Constants.comp5); return result; }
From source file:eu.linda.analytics.formats.ForecastingRDFGenerator.java
@Override public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) { helpfulFunctions.nicePrintMessage("Generate Forecasting RDFModel for weka algorithms "); Date date = new Date(); DateFormat formatter = new SimpleDateFormat("ddMMyyyy"); String today = formatter.format(date); String base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/myRepository/statements?context=:_"; String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1) + "Date" + today; Instances triplets = (Instances) dataToExport; int tripletsAttibutesNum = triplets.numAttributes(); // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL) Model model = ModelFactory.createDefaultModel(); //openrdf + analytic_process ID_version_date String NS = base + datasetContextToString + "#"; String analytics_base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology"; String analytics_NS = analytics_base + "#"; model.setNsPrefix("ds", NS); model.setNsPrefix("rdf", RDF.getURI()); model.setNsPrefix("xsd", XSD.getURI()); model.setNsPrefix("foaf", FOAF.getURI()); model.setNsPrefix("rdfs", RDFS.getURI()); model.setNsPrefix("prov", "http://www.w3.org/ns/prov#"); model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#"); model.setNsPrefix("an", Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#"); // Define local properties Property analyzedField = model.createProperty(NS + "#analyzedField"); Property predictedValue = model.createProperty(NS + "#predictedValue"); Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom"); Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy"); Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf"); Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith"); Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset"); Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset"); Property algorithmProperty = model.createProperty(NS + "algorithm"); Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity"); Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity"); Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent"); Resource onlineAccount = model.createResource(FOAF.OnlineAccount); Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software"); Resource software = model.createResource(analytics_NS + "Software"); Resource linda_user = model.createResource(analytics_NS + "User"); Resource analytic_process = model.createResource(analytics_NS + "analytic_process"); Resource analytic_process_statement = model.createResource( analytics_NS + "analytic_process/" + analytics.getId() + "/" + (analytics.getVersion() + 1)); analytic_process_statement.addProperty(RDF.type, analytic_process); analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0"); analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name()); analytic_process_statement.addProperty(RDFS.subClassOf, activity); analytic_process_statement.addProperty(wasAssociatedWith, software_statement); analytic_process_statement.addProperty(RDFS.label, "linda analytic process"); analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription()); analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name()); if (helpfulFunctions.isRDFInputFormat(analytics.getTrainQuery_id())) { Resource analytic_train_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id()); analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement); }//from w w w . j a va 2s.c o m if (helpfulFunctions.isRDFInputFormat(analytics.getEvaluationQuery_id())) { Resource analytic_evaluation_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id()); analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement); } Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name()); linda_user_statement.addProperty(RDF.type, linda_user); linda_user_statement.addProperty(RDFS.subClassOf, agent); linda_user_statement.addProperty(RDFS.label, "linda user"); software_statement.addProperty(RDF.type, software); software_statement.addProperty(RDFS.subClassOf, agent); software_statement.addProperty(RDFS.label, "analytics software"); software_statement.addProperty(actedOnBehalfOf, linda_user_statement); linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person); linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount); linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name()); onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI); Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node"); Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node"); // For each triplet, create a resource representing the sentence, as well as the subject, // predicate, and object, and then add the triples to the model. for (int i = 1; i < triplets.size(); i++) { Resource analytic_result_node_statement = model.createResource(NS + "/" + i); Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(1)); analytic_input_node_statement.addProperty(RDF.type, analytic_input_node); analytic_result_node_statement.addProperty(RDF.type, analytic_result_node); analytic_result_node_statement.addProperty(RDFS.subClassOf, entity); analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement); analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement); analytic_result_node_statement.addProperty(predictedValue, triplets.get(i).toString(tripletsAttibutesNum - 1)); } return model; }
From source file:eu.linda.analytics.formats.GeneralRDFGenerator.java
@Override public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) { helpfulFuncions.nicePrintMessage("Generate General RDFModel for weka algorithms "); Date date = new Date(); DateFormat formatter = new SimpleDateFormat("ddMMyyyy"); String today = formatter.format(date); String base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/statements?context=:_"; String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1) + "Date" + today; Instances triplets = (Instances) dataToExport; int tripletsAttibutesNum = triplets.numAttributes(); // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL) Model model = ModelFactory.createDefaultModel(); //openrdf + analytic_process ID_version_date String NS = base + datasetContextToString + "#"; String analytics_base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology"; String analytics_NS = analytics_base + "#"; model.setNsPrefix("ds", NS); model.setNsPrefix("rdf", RDF.getURI()); model.setNsPrefix("xsd", XSD.getURI()); model.setNsPrefix("foaf", FOAF.getURI()); model.setNsPrefix("rdfs", RDFS.getURI()); model.setNsPrefix("prov", "http://www.w3.org/ns/prov#"); model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#"); model.setNsPrefix("an", Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#"); // Define local properties Property analyzedField = model.createProperty(NS + "analyzedField"); Property predictedValue = model.createProperty(NS + "predictedValue"); Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom"); Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy"); Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf"); Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith"); Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset"); Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset"); Property algorithmProperty = model.createProperty(NS + "algorithm"); Property dataSizeOfAnalyzedDataProperty = model.createProperty(NS + "dataSizeOfAnalyzedDatainBytes"); Property timeToGetDataProperty = model.createProperty(NS + "timeToGetDataInSecs"); Property timeToRunAnalyticsProcessProperty = model.createProperty(NS + "timeToRunAnalyticsProcessInSecs"); Property timeToCreateRDFOutPutProperty = model.createProperty(NS + "timeToCreateRDFOutPutInSecs"); Property performanceProperty = model.createProperty(NS + "hasPerformance"); Property atTime = model.createProperty("http://www.w3.org/ns/prov#atTime"); Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity"); Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity"); Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent"); Resource onlineAccount = model.createResource(FOAF.OnlineAccount); Resource linda_user = model.createResource(analytics_NS + "User"); Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software"); Resource software = model.createResource(analytics_NS + "Software"); Resource performance = model.createResource(analytics_NS + "performance"); Resource performance_statement = model .createResource(analytics_NS + "performance/" + analytics.getId() + "/" + analytics.getVersion()); Resource analytic_process = model.createResource(analytics_NS + "analytic_process"); Resource analytic_process_statement = model.createResource( analytics_NS + "analytic_process/" + analytics.getId() + "/" + analytics.getVersion()); analytic_process_statement.addProperty(RDF.type, analytic_process); analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0"); analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name()); analytic_process_statement.addProperty(RDFS.subClassOf, activity); analytic_process_statement.addProperty(wasAssociatedWith, software_statement); analytic_process_statement.addProperty(RDFS.label, "Linda Analytic process"); analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription()); analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name()); Calendar cal = GregorianCalendar.getInstance(); Literal value = model.createTypedLiteral(cal); analytic_process_statement.addProperty(atTime, value); performance_statement.addProperty(RDF.type, performance); performance_statement.addProperty(dataSizeOfAnalyzedDataProperty, Float.toString(analytics.getData_size())); performance_statement.addProperty(timeToGetDataProperty, Float.toString(analytics.getTimeToGet_data())); performance_statement.addProperty(timeToRunAnalyticsProcessProperty, Float.toString(analytics.getTimeToRun_analytics())); performance_statement.addProperty(timeToCreateRDFOutPutProperty, Float.toString(analytics.getTimeToCreate_RDF())); analytic_process_statement.addProperty(performanceProperty, performance_statement); if (helpfulFuncions.isRDFInputFormat(analytics.getTrainQuery_id())) { Resource analytic_train_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id()); analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement); }// w w w . java 2 s . co m if (helpfulFuncions.isRDFInputFormat(analytics.getEvaluationQuery_id())) { Resource analytic_evaluation_dataset_statement = model.createResource( Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id()); analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement); } Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name()); linda_user_statement.addProperty(RDF.type, linda_user); linda_user_statement.addProperty(RDFS.subClassOf, agent); linda_user_statement.addProperty(RDFS.label, "linda user"); software_statement.addProperty(RDF.type, software); software_statement.addProperty(RDFS.subClassOf, agent); software_statement.addProperty(RDFS.label, "analytics software"); software_statement.addProperty(actedOnBehalfOf, linda_user_statement); linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person); linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount); linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name()); onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI); Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node"); Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node"); // For each triplet, create a resource representing the sentence, as well as the subject, // predicate, and object, and then add the triples to the model. for (int i = 1; i < triplets.size(); i++) { //for (Instance triplet : triplets) { Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(0)); analytic_input_node_statement.addProperty(RDF.type, analytic_input_node); Resource analytic_result_node_statement = model.createResource(NS + "/" + i); analytic_result_node_statement.addProperty(RDF.type, analytic_result_node); analytic_result_node_statement.addProperty(RDFS.subClassOf, entity); analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement); analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement); analytic_result_node_statement.addProperty(predictedValue, triplets.get(i).toString(tripletsAttibutesNum - 1)); } return model; }
From source file:examples.ExperimentDemo.java
License:Open Source License
/** * Expects the following parameters://w w w . j a v a 2 s .c o m * <ul> * <li>-classifier "classifier incl. parameters"</li> * <li>-exptype "classification|regression"</li> * <li>-splittype "crossvalidation|randomsplit"</li> * <li>-runs "# of runs"</li> * <li>-folds "# of cross-validation folds"</li> * <li>-percentage "percentage for randomsplit"</li> * <li>-result "arff file for storing the results"</li> * <li>-t "dataset" (can be supplied multiple times)</li> * </ul> * * @param args * the commandline arguments * @throws Exception * if something goes wrong */ // ref: http://weka.wikispaces.com/Using+the+Experiment+API public static void main(String[] args) throws Exception { // @xr: my modification of args, output to download folder // @xr: direct args not working, has to put paras in run-configuration-paras // String[] args = { // "weka.classifiers.trees.J48", // "classification", // "crossvalidation", // "10", // "10", // "/Users/renxin/Downloads/output.arff", // "vote.arff", // "iris.arff" }; // String[] args = { // "-classifier weka.classifiers.trees.J48", // "-exptype classification", // "-splittype crossvalidation", // "-runs 10", // "-folds 10", // "-result /some/where/results.arff", // "-t vote.arff", // "-t iris.arff" // }; // parameters provided? if (args.length == 0) { System.out.println("\nUsage: ExperimentDemo\n" + "\t -classifier <classifier incl. parameters>\n" + "\t -exptype <classification|regression>\n" + "\t -splittype <crossvalidation|randomsplit>\n" + "\t -runs <# of runs>\n" + "\t -folds <folds for CV>\n" + "\t -percentage <percentage for randomsplit>\n" + "\t -result <ARFF file for storing the results>\n" + "\t -t dataset (can be supplied multiple times)\n"); System.exit(1); } // 1. setup the experiment System.out.println("Setting up..."); Experiment exp = new Experiment(); exp.setPropertyArray(new Classifier[0]); exp.setUsePropertyIterator(true); String option; // classification or regression option = Utils.getOption("exptype", args); if (option.length() == 0) throw new IllegalArgumentException("No experiment type provided!"); SplitEvaluator se = null; Classifier sec = null; boolean classification = false; if (option.equals("classification")) { classification = true; se = new ClassifierSplitEvaluator(); sec = ((ClassifierSplitEvaluator) se).getClassifier(); } else if (option.equals("regression")) { se = new RegressionSplitEvaluator(); sec = ((RegressionSplitEvaluator) se).getClassifier(); } else { throw new IllegalArgumentException("Unknown experiment type '" + option + "'!"); } // crossvalidation or randomsplit option = Utils.getOption("splittype", args); if (option.length() == 0) throw new IllegalArgumentException("No split type provided!"); if (option.equals("crossvalidation")) { CrossValidationResultProducer cvrp = new CrossValidationResultProducer(); option = Utils.getOption("folds", args); if (option.length() == 0) throw new IllegalArgumentException("No folds provided!"); cvrp.setNumFolds(Integer.parseInt(option)); cvrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class), CrossValidationResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(cvrp); exp.setPropertyPath(propertyPath); } else if (option.equals("randomsplit")) { RandomSplitResultProducer rsrp = new RandomSplitResultProducer(); rsrp.setRandomizeData(true); option = Utils.getOption("percentage", args); if (option.length() == 0) throw new IllegalArgumentException("No percentage provided!"); rsrp.setTrainPercent(Double.parseDouble(option)); rsrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class), RandomSplitResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(rsrp); exp.setPropertyPath(propertyPath); } else { throw new IllegalArgumentException("Unknown split type '" + option + "'!"); } // runs option = Utils.getOption("runs", args); if (option.length() == 0) throw new IllegalArgumentException("No runs provided!"); exp.setRunLower(1); exp.setRunUpper(Integer.parseInt(option)); // classifier option = Utils.getOption("classifier", args); if (option.length() == 0) throw new IllegalArgumentException("No classifier provided!"); String[] options = Utils.splitOptions(option); String classname = options[0]; options[0] = ""; Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options); exp.setPropertyArray(new Classifier[] { c }); // datasets boolean data = false; DefaultListModel model = new DefaultListModel(); do { option = Utils.getOption("t", args); if (option.length() > 0) { File file = new File(option); if (!file.exists()) throw new IllegalArgumentException("File '" + option + "' does not exist!"); data = true; model.addElement(file); } } while (option.length() > 0); if (!data) throw new IllegalArgumentException("No data files provided!"); exp.setDatasets(model); // result option = Utils.getOption("result", args); if (option.length() == 0) throw new IllegalArgumentException("No result file provided!"); InstancesResultListener irl = new InstancesResultListener(); irl.setOutputFile(new File(option)); exp.setResultListener(irl); // 2. run experiment System.out.println("Initializing..."); exp.initialize(); System.out.println("Running..."); exp.runExperiment(); System.out.println("Finishing..."); exp.postProcess(); // 3. calculate statistics and output them System.out.println("Evaluating..."); PairedTTester tester = new PairedCorrectedTTester(); Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile()))); tester.setInstances(result); tester.setSortColumn(-1); tester.setRunColumn(result.attribute("Key_Run").index()); if (classification) tester.setFoldColumn(result.attribute("Key_Fold").index()); tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1))); tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + "," + (result.attribute("Key_Scheme_options").index() + 1) + "," + (result.attribute("Key_Scheme_version_ID").index() + 1))); tester.setResultMatrix(new ResultMatrixPlainText()); tester.setDisplayedResultsets(null); tester.setSignificanceLevel(0.05); tester.setShowStdDevs(true); // fill result matrix (but discarding the output) if (classification) tester.multiResultsetFull(0, result.attribute("Percent_correct").index()); else tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index()); // output results for reach dataset System.out.println("\nResult:"); ResultMatrix matrix = tester.getResultMatrix(); for (int i = 0; i < matrix.getColCount(); i++) { System.out.println(matrix.getColName(i)); System.out.println(" Perc. correct: " + matrix.getMean(i, 0)); System.out.println(" StdDev: " + matrix.getStdDev(i, 0)); } }
From source file:facebookpostpuller.PostModel.java
public static void convertToArff(File file) throws Exception { FastVector atts;//w ww . j a v a 2s .co m FastVector attVals; Instances data; double[] vals; file = new File(file + ".arff"); atts = new FastVector(); atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014 atts.addElement(new Attribute(("message"), (FastVector) null)); attVals = new FastVector(); attVals.addElement("13-17"); attVals.addElement("18-24"); attVals.addElement("25-34"); attVals.addElement("35-44"); attVals.addElement("45-54"); atts.addElement(new Attribute("age-group", attVals)); data = new Instances("predict_age", atts, 0); Iterator it = posts.entrySet().iterator(); while (it.hasNext()) { Map.Entry pairs = (Map.Entry) it.next(); vals = new double[data.numAttributes()]; User user = (User) pairs.getValue(); String name = user.getName(); // 5/27/2014 String message = ((Post) (pairs.getKey())).getMessage(); Preprocess pre = new Preprocess(); message = pre.emoticons(message); message = pre.emoji(message); message = pre.url(message); //StringFilter filter = new StringFilter(message); vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014 vals[1] = data.attribute(1).addStringValue(message); int age = calculateAge(user.getBirthdayAsDate()); if (age >= 13 && age <= 17) { vals[2] = attVals.indexOf("13-17"); } else if (age >= 18 && age <= 24) { vals[2] = attVals.indexOf("18-24"); } else if (age >= 25 && age <= 34) { vals[2] = attVals.indexOf("25-34"); } else if (age >= 35 && age <= 44) { vals[2] = attVals.indexOf("35-44"); } else if (age >= 45) { // Modified 6/11/2014 vals[2] = attVals.indexOf("45-54"); } data.add(new Instance(1.0, vals)); it.remove(); } ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(file); saver.writeBatch(); }
From source file:facebookpostpuller.PostModelBACKUP.java
public static void convertToArff(File file) throws Exception { FastVector atts;/*from w ww. j av a 2 s.c o m*/ FastVector attVals; Instances data; double[] vals; file = new File(file + ".arff"); atts = new FastVector(); atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014 atts.addElement(new Attribute(("message"), (FastVector) null)); attVals = new FastVector(); attVals.addElement("13-17"); attVals.addElement("18-24"); attVals.addElement("25-34"); attVals.addElement("35-44"); attVals.addElement("45-54"); atts.addElement(new Attribute("age-group", attVals)); data = new Instances("predict_age", atts, 0); Iterator it = posts.entrySet().iterator(); while (it.hasNext()) { Map.Entry pairs = (Map.Entry) it.next(); vals = new double[data.numAttributes()]; User user = (User) pairs.getValue(); String name = user.getName(); // 5/27/2014 String message = ((Post) (pairs.getKey())).getMessage(); //StringFilter filter = new StringFilter(message); vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014 vals[1] = data.attribute(1).addStringValue(message); int age = calculateAge(user.getBirthdayAsDate()); if (age >= 13 && age <= 17) { vals[2] = attVals.indexOf("13-17"); } else if (age >= 18 && age <= 24) { vals[2] = attVals.indexOf("18-24"); } else if (age >= 25 && age <= 34) { vals[2] = attVals.indexOf("25-34"); } else if (age >= 35 && age <= 44) { vals[2] = attVals.indexOf("35-44"); } else if (age >= 45 && age <= 54) { vals[2] = attVals.indexOf("45-54"); } data.add(new Instance(1.0, vals)); it.remove(); } ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(file); saver.writeBatch(); }