Example usage for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name)

Source Link

Document

Returns an attribute given its name.

Usage

From source file:etc.aloe.filters.WordFeaturesExtractor.java

License:Open Source License

@Override
public boolean setInputFormat(Instances instanceInfo) throws Exception {
    if (selectedAttributeName == null) {
        throw new IllegalStateException("String attribute name was not set");
    }/*from  ww  w  .  j  ava  2 s .  c  o  m*/

    Attribute stringAttr = instanceInfo.attribute(selectedAttributeName);
    if (stringAttr == null) {
        throw new IllegalStateException("Attribute " + selectedAttributeName + " does not exist");
    }

    return super.setInputFormat(instanceInfo); //To change body of generated methods, choose Tools | Templates.
}

From source file:etc.aloe.filters.WordFeaturesExtractor.java

License:Open Source License

@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    if (this.selectedAttributeName == null) {
        throw new IllegalStateException("String attribute name not set");
    }//from   w w  w  . j a v  a  2  s.  c o  m

    //Lookup the selected attribute
    Attribute stringAttr = inputFormat.attribute(selectedAttributeName);
    selectedAttributeIndex = stringAttr.index();

    //Read the stopwords
    stopwords = this.prepareStopwords();

    //Tokenize all documents
    List<List<String>> documents = tokenizeDocuments(inputFormat);

    //Wrap the instances in something more convenient
    ClassData instances = new ClassData(inputFormat);

    //First determine the list of viable unigrams
    determineUnigrams(documents, instances);

    //Find all bigrams including one of the unigrams, filtered
    if (useBigrams) {
        determineBigrams(documents, instances);
    }

    return generateOutputFormat(inputFormat);
}

From source file:eu.cassandra.appliance.IsolatedApplianceExtractor.java

License:Apache License

/**
 * This function is taking the instances coming out from clustering and put
 * each event to each respective cluster.
 * /* w w  w .  ja  v a  2 s. c  o  m*/
 * @param inst
 *          The clustered instances
 */
private void fillClusters(Instances inst) {
    // Initializing auxiliary variables
    ArrayList<Integer> temp;

    // For each instance check the cluster value and put it to the correct
    // cluster
    for (int i = 0; i < inst.size(); i++) {

        String cluster = inst.get(i).stringValue(inst.attribute(5));

        if (!clusters.containsKey(cluster))
            temp = new ArrayList<Integer>();
        else
            temp = clusters.get(cluster);

        temp.add(i);

        clusters.put(cluster, temp);

    }

}

From source file:eu.cassandra.appliance.IsolatedEventsExtractor.java

License:Apache License

/**
 * This function is taking the instances coming out from clustering and put
 * each event to each respective cluster.
 * //from  w ww.ja va2 s  . c o  m
 * @param inst
 *          The clustered instances
 */
private void fillClusters(Instances inst) {
    // Initializing auxiliary variables
    ArrayList<Integer> temp;

    // For each instance check the cluster value and put it to the correct
    // cluster
    for (int i = 0; i < inst.size(); i++) {

        String cluster = inst.get(i).stringValue(inst.attribute(6));

        if (!clusters.containsKey(cluster))
            temp = new ArrayList<Integer>();
        else
            temp = clusters.get(cluster);

        temp.add(i);

        clusters.put(cluster, temp);

    }

}

From source file:eu.cassandra.utils.Utils.java

License:Apache License

/**
 * This function is used in order to create clusters of points of interest
 * based on the active power difference they have.
 * /*from   w w w  . j  a va 2s  .c  o  m*/
 * @param pois
 *          The list of points of interest that will be clustered.
 * @return The newly created clusters with the points that are comprising
 *         them.
 * @throws Exception
 */
public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias)
        throws Exception {
    // Initialize the auxiliary variables
    ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>();

    // Estimating the number of clusters that will be created
    int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST))
            + bias;

    log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = "
            + numberOfClusters);

    // Create a new empty list of points for each cluster
    for (int i = 0; i < numberOfClusters; i++)
        result.add(new ArrayList<PointOfInterest>());

    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiff");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);

    Instances instances = new Instances("Points of Interest", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (int i = 0; i < pois.size(); i++) {

        Instance inst = new DenseInstance(2);
        inst.setValue(id, i);
        inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff()));

        instances.add(inst);

    }

    // System.out.println(instances.toString());

    Instances newInst = null;

    log.debug("Instances: " + instances.toSummaryString());

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setSeed(numberOfClusters);

    // This is the important parameter to set
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setNumClusters(numberOfClusters);
    kmeans.buildClusterer(instances);

    addcluster.setClusterer(kmeans);
    addcluster.setInputFormat(instances);
    addcluster.setIgnoredAttributeIndices("1");

    // Cluster data set
    newInst = Filter.useFilter(instances, addcluster);

    // System.out.println(newInst.toString());

    // Parse through the dataset to see where each point is placed in the
    // clusters.
    for (int i = 0; i < newInst.size(); i++) {

        String cluster = newInst.get(i).stringValue(newInst.attribute(2));

        cluster = cluster.replace("cluster", "");

        log.debug("Point of Interest: " + i + " Cluster: " + cluster);

        result.get(Integer.parseInt(cluster) - 1).add(pois.get(i));
    }

    // Sorting the each cluster points by their minutes.
    for (int i = result.size() - 1; i >= 0; i--) {
        if (result.get(i).size() == 0)
            result.remove(i);
        else
            Collections.sort(result.get(i), Constants.comp);
    }

    // Sorting the all clusters by their active power.

    Collections.sort(result, Constants.comp5);

    return result;
}

From source file:eu.linda.analytics.formats.ForecastingRDFGenerator.java

@Override
public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) {

    helpfulFunctions.nicePrintMessage("Generate Forecasting RDFModel for weka algorithms ");

    Date date = new Date();
    DateFormat formatter = new SimpleDateFormat("ddMMyyyy");
    String today = formatter.format(date);
    String base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/myRepository/statements?context=:_";
    String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1)
            + "Date" + today;

    Instances triplets = (Instances) dataToExport;
    int tripletsAttibutesNum = triplets.numAttributes();

    // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
    Model model = ModelFactory.createDefaultModel();
    //openrdf + analytic_process ID_version_date
    String NS = base + datasetContextToString + "#";

    String analytics_base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology";
    String analytics_NS = analytics_base + "#";

    model.setNsPrefix("ds", NS);
    model.setNsPrefix("rdf", RDF.getURI());
    model.setNsPrefix("xsd", XSD.getURI());
    model.setNsPrefix("foaf", FOAF.getURI());
    model.setNsPrefix("rdfs", RDFS.getURI());
    model.setNsPrefix("prov", "http://www.w3.org/ns/prov#");
    model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#");
    model.setNsPrefix("an", Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#");

    // Define local properties
    Property analyzedField = model.createProperty(NS + "#analyzedField");
    Property predictedValue = model.createProperty(NS + "#predictedValue");
    Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom");
    Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy");
    Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf");
    Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith");
    Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset");
    Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset");
    Property algorithmProperty = model.createProperty(NS + "algorithm");

    Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity");
    Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity");
    Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent");
    Resource onlineAccount = model.createResource(FOAF.OnlineAccount);

    Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software");
    Resource software = model.createResource(analytics_NS + "Software");
    Resource linda_user = model.createResource(analytics_NS + "User");

    Resource analytic_process = model.createResource(analytics_NS + "analytic_process");
    Resource analytic_process_statement = model.createResource(
            analytics_NS + "analytic_process/" + analytics.getId() + "/" + (analytics.getVersion() + 1));
    analytic_process_statement.addProperty(RDF.type, analytic_process);
    analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0");
    analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name());
    analytic_process_statement.addProperty(RDFS.subClassOf, activity);
    analytic_process_statement.addProperty(wasAssociatedWith, software_statement);
    analytic_process_statement.addProperty(RDFS.label, "linda analytic process");
    analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription());
    analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name());

    if (helpfulFunctions.isRDFInputFormat(analytics.getTrainQuery_id())) {

        Resource analytic_train_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id());
        analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement);

    }//from   w w  w .  j  a va 2s.c  o m

    if (helpfulFunctions.isRDFInputFormat(analytics.getEvaluationQuery_id())) {

        Resource analytic_evaluation_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id());
        analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement);

    }

    Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name());
    linda_user_statement.addProperty(RDF.type, linda_user);
    linda_user_statement.addProperty(RDFS.subClassOf, agent);
    linda_user_statement.addProperty(RDFS.label, "linda user");

    software_statement.addProperty(RDF.type, software);
    software_statement.addProperty(RDFS.subClassOf, agent);
    software_statement.addProperty(RDFS.label, "analytics software");
    software_statement.addProperty(actedOnBehalfOf, linda_user_statement);

    linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person);

    linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount);

    linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name());
    onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI);

    Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node");
    Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node");

    // For each triplet, create a resource representing the sentence, as well as the subject, 
    // predicate, and object, and then add the triples to the model.
    for (int i = 1; i < triplets.size(); i++) {

        Resource analytic_result_node_statement = model.createResource(NS + "/" + i);

        Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(1));
        analytic_input_node_statement.addProperty(RDF.type, analytic_input_node);

        analytic_result_node_statement.addProperty(RDF.type, analytic_result_node);
        analytic_result_node_statement.addProperty(RDFS.subClassOf, entity);
        analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement);
        analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement);
        analytic_result_node_statement.addProperty(predictedValue,
                triplets.get(i).toString(tripletsAttibutesNum - 1));
    }
    return model;

}

From source file:eu.linda.analytics.formats.GeneralRDFGenerator.java

@Override
public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) {

    helpfulFuncions.nicePrintMessage("Generate General RDFModel for weka algorithms ");

    Date date = new Date();
    DateFormat formatter = new SimpleDateFormat("ddMMyyyy");
    String today = formatter.format(date);
    String base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/statements?context=:_";
    String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1)
            + "Date" + today;

    Instances triplets = (Instances) dataToExport;
    int tripletsAttibutesNum = triplets.numAttributes();

    // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
    Model model = ModelFactory.createDefaultModel();
    //openrdf + analytic_process ID_version_date
    String NS = base + datasetContextToString + "#";

    String analytics_base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology";
    String analytics_NS = analytics_base + "#";

    model.setNsPrefix("ds", NS);
    model.setNsPrefix("rdf", RDF.getURI());
    model.setNsPrefix("xsd", XSD.getURI());
    model.setNsPrefix("foaf", FOAF.getURI());
    model.setNsPrefix("rdfs", RDFS.getURI());
    model.setNsPrefix("prov", "http://www.w3.org/ns/prov#");
    model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#");
    model.setNsPrefix("an", Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#");

    // Define local properties
    Property analyzedField = model.createProperty(NS + "analyzedField");
    Property predictedValue = model.createProperty(NS + "predictedValue");
    Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom");
    Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy");
    Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf");
    Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith");
    Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset");
    Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset");
    Property algorithmProperty = model.createProperty(NS + "algorithm");
    Property dataSizeOfAnalyzedDataProperty = model.createProperty(NS + "dataSizeOfAnalyzedDatainBytes");
    Property timeToGetDataProperty = model.createProperty(NS + "timeToGetDataInSecs");
    Property timeToRunAnalyticsProcessProperty = model.createProperty(NS + "timeToRunAnalyticsProcessInSecs");
    Property timeToCreateRDFOutPutProperty = model.createProperty(NS + "timeToCreateRDFOutPutInSecs");
    Property performanceProperty = model.createProperty(NS + "hasPerformance");
    Property atTime = model.createProperty("http://www.w3.org/ns/prov#atTime");

    Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity");
    Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity");
    Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent");
    Resource onlineAccount = model.createResource(FOAF.OnlineAccount);
    Resource linda_user = model.createResource(analytics_NS + "User");
    Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software");
    Resource software = model.createResource(analytics_NS + "Software");
    Resource performance = model.createResource(analytics_NS + "performance");
    Resource performance_statement = model
            .createResource(analytics_NS + "performance/" + analytics.getId() + "/" + analytics.getVersion());

    Resource analytic_process = model.createResource(analytics_NS + "analytic_process");
    Resource analytic_process_statement = model.createResource(
            analytics_NS + "analytic_process/" + analytics.getId() + "/" + analytics.getVersion());
    analytic_process_statement.addProperty(RDF.type, analytic_process);
    analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0");
    analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name());
    analytic_process_statement.addProperty(RDFS.subClassOf, activity);
    analytic_process_statement.addProperty(wasAssociatedWith, software_statement);
    analytic_process_statement.addProperty(RDFS.label, "Linda Analytic process");
    analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription());
    analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name());

    Calendar cal = GregorianCalendar.getInstance();
    Literal value = model.createTypedLiteral(cal);
    analytic_process_statement.addProperty(atTime, value);

    performance_statement.addProperty(RDF.type, performance);
    performance_statement.addProperty(dataSizeOfAnalyzedDataProperty, Float.toString(analytics.getData_size()));
    performance_statement.addProperty(timeToGetDataProperty, Float.toString(analytics.getTimeToGet_data()));
    performance_statement.addProperty(timeToRunAnalyticsProcessProperty,
            Float.toString(analytics.getTimeToRun_analytics()));
    performance_statement.addProperty(timeToCreateRDFOutPutProperty,
            Float.toString(analytics.getTimeToCreate_RDF()));
    analytic_process_statement.addProperty(performanceProperty, performance_statement);

    if (helpfulFuncions.isRDFInputFormat(analytics.getTrainQuery_id())) {

        Resource analytic_train_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id());
        analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement);

    }// w  w  w . java  2 s .  co m

    if (helpfulFuncions.isRDFInputFormat(analytics.getEvaluationQuery_id())) {

        Resource analytic_evaluation_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id());
        analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement);

    }

    Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name());
    linda_user_statement.addProperty(RDF.type, linda_user);
    linda_user_statement.addProperty(RDFS.subClassOf, agent);
    linda_user_statement.addProperty(RDFS.label, "linda user");

    software_statement.addProperty(RDF.type, software);
    software_statement.addProperty(RDFS.subClassOf, agent);
    software_statement.addProperty(RDFS.label, "analytics software");
    software_statement.addProperty(actedOnBehalfOf, linda_user_statement);

    linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person);

    linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount);

    linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name());
    onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI);

    Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node");
    Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node");

    // For each triplet, create a resource representing the sentence, as well as the subject, 
    // predicate, and object, and then add the triples to the model.
    for (int i = 1; i < triplets.size(); i++) {
        //for (Instance triplet : triplets) {
        Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(0));
        analytic_input_node_statement.addProperty(RDF.type, analytic_input_node);

        Resource analytic_result_node_statement = model.createResource(NS + "/" + i);
        analytic_result_node_statement.addProperty(RDF.type, analytic_result_node);
        analytic_result_node_statement.addProperty(RDFS.subClassOf, entity);
        analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement);
        analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement);
        analytic_result_node_statement.addProperty(predictedValue,
                triplets.get(i).toString(tripletsAttibutesNum - 1));

    }

    return model;

}

From source file:examples.ExperimentDemo.java

License:Open Source License

/**
 * Expects the following parameters://w  w  w  . j a v  a 2 s  .c  o  m
 * <ul>
 * <li>-classifier "classifier incl. parameters"</li>
 * <li>-exptype "classification|regression"</li>
 * <li>-splittype "crossvalidation|randomsplit"</li>
 * <li>-runs "# of runs"</li>
 * <li>-folds "# of cross-validation folds"</li>
 * <li>-percentage "percentage for randomsplit"</li>
 * <li>-result "arff file for storing the results"</li>
 * <li>-t "dataset" (can be supplied multiple times)</li>
 * </ul>
 * 
 * @param args
 *            the commandline arguments
 * @throws Exception
 *             if something goes wrong
 */

// ref: http://weka.wikispaces.com/Using+the+Experiment+API
public static void main(String[] args) throws Exception {

    // @xr: my modification of args, output to download folder
    // @xr: direct args not working, has to put paras in run-configuration-paras
    //      String[] args = { 
    //            "weka.classifiers.trees.J48", 
    //            "classification",
    //            "crossvalidation", 
    //            "10", 
    //            "10",
    //            "/Users/renxin/Downloads/output.arff", 
    //            "vote.arff", 
    //            "iris.arff" };

    //      String[] args = {
    //               "-classifier weka.classifiers.trees.J48",
    //               "-exptype classification",
    //               "-splittype crossvalidation",
    //               "-runs 10",
    //               "-folds 10",
    //               "-result /some/where/results.arff",
    //               "-t vote.arff",
    //               "-t iris.arff"
    //      };

    // parameters provided?
    if (args.length == 0) {
        System.out.println("\nUsage: ExperimentDemo\n" + "\t   -classifier <classifier incl. parameters>\n"
                + "\t   -exptype <classification|regression>\n"
                + "\t   -splittype <crossvalidation|randomsplit>\n" + "\t   -runs <# of runs>\n"
                + "\t   -folds <folds for CV>\n" + "\t   -percentage <percentage for randomsplit>\n"
                + "\t   -result <ARFF file for storing the results>\n"
                + "\t   -t dataset (can be supplied multiple times)\n");
        System.exit(1);
    }

    // 1. setup the experiment
    System.out.println("Setting up...");
    Experiment exp = new Experiment();
    exp.setPropertyArray(new Classifier[0]);
    exp.setUsePropertyIterator(true);

    String option;

    // classification or regression
    option = Utils.getOption("exptype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No experiment type provided!");

    SplitEvaluator se = null;
    Classifier sec = null;
    boolean classification = false;
    if (option.equals("classification")) {
        classification = true;
        se = new ClassifierSplitEvaluator();
        sec = ((ClassifierSplitEvaluator) se).getClassifier();
    } else if (option.equals("regression")) {
        se = new RegressionSplitEvaluator();
        sec = ((RegressionSplitEvaluator) se).getClassifier();
    } else {
        throw new IllegalArgumentException("Unknown experiment type '" + option + "'!");
    }

    // crossvalidation or randomsplit
    option = Utils.getOption("splittype", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No split type provided!");

    if (option.equals("crossvalidation")) {
        CrossValidationResultProducer cvrp = new CrossValidationResultProducer();
        option = Utils.getOption("folds", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No folds provided!");
        cvrp.setNumFolds(Integer.parseInt(option));
        cvrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class),
                    CrossValidationResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(cvrp);
        exp.setPropertyPath(propertyPath);

    } else if (option.equals("randomsplit")) {
        RandomSplitResultProducer rsrp = new RandomSplitResultProducer();
        rsrp.setRandomizeData(true);
        option = Utils.getOption("percentage", args);
        if (option.length() == 0)
            throw new IllegalArgumentException("No percentage provided!");
        rsrp.setTrainPercent(Double.parseDouble(option));
        rsrp.setSplitEvaluator(se);

        PropertyNode[] propertyPath = new PropertyNode[2];
        try {
            propertyPath[0] = new PropertyNode(se,
                    new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class),
                    RandomSplitResultProducer.class);
            propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()),
                    se.getClass());
        } catch (IntrospectionException e) {
            e.printStackTrace();
        }

        exp.setResultProducer(rsrp);
        exp.setPropertyPath(propertyPath);
    } else {
        throw new IllegalArgumentException("Unknown split type '" + option + "'!");
    }

    // runs
    option = Utils.getOption("runs", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No runs provided!");
    exp.setRunLower(1);
    exp.setRunUpper(Integer.parseInt(option));

    // classifier
    option = Utils.getOption("classifier", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No classifier provided!");
    String[] options = Utils.splitOptions(option);
    String classname = options[0];
    options[0] = "";
    Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options);
    exp.setPropertyArray(new Classifier[] { c });

    // datasets
    boolean data = false;
    DefaultListModel model = new DefaultListModel();
    do {
        option = Utils.getOption("t", args);
        if (option.length() > 0) {
            File file = new File(option);
            if (!file.exists())
                throw new IllegalArgumentException("File '" + option + "' does not exist!");
            data = true;
            model.addElement(file);
        }
    } while (option.length() > 0);
    if (!data)
        throw new IllegalArgumentException("No data files provided!");
    exp.setDatasets(model);

    // result
    option = Utils.getOption("result", args);
    if (option.length() == 0)
        throw new IllegalArgumentException("No result file provided!");
    InstancesResultListener irl = new InstancesResultListener();
    irl.setOutputFile(new File(option));
    exp.setResultListener(irl);

    // 2. run experiment
    System.out.println("Initializing...");
    exp.initialize();
    System.out.println("Running...");
    exp.runExperiment();
    System.out.println("Finishing...");
    exp.postProcess();

    // 3. calculate statistics and output them
    System.out.println("Evaluating...");
    PairedTTester tester = new PairedCorrectedTTester();
    Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile())));
    tester.setInstances(result);
    tester.setSortColumn(-1);
    tester.setRunColumn(result.attribute("Key_Run").index());
    if (classification)
        tester.setFoldColumn(result.attribute("Key_Fold").index());
    tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1)));
    tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + ","
            + (result.attribute("Key_Scheme_options").index() + 1) + ","
            + (result.attribute("Key_Scheme_version_ID").index() + 1)));
    tester.setResultMatrix(new ResultMatrixPlainText());
    tester.setDisplayedResultsets(null);
    tester.setSignificanceLevel(0.05);
    tester.setShowStdDevs(true);
    // fill result matrix (but discarding the output)
    if (classification)
        tester.multiResultsetFull(0, result.attribute("Percent_correct").index());
    else
        tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index());
    // output results for reach dataset
    System.out.println("\nResult:");
    ResultMatrix matrix = tester.getResultMatrix();
    for (int i = 0; i < matrix.getColCount(); i++) {
        System.out.println(matrix.getColName(i));
        System.out.println("    Perc. correct: " + matrix.getMean(i, 0));
        System.out.println("    StdDev: " + matrix.getStdDev(i, 0));
    }
}

From source file:facebookpostpuller.PostModel.java

public static void convertToArff(File file) throws Exception {

    FastVector atts;//w ww .  j  a  v a 2s  .co  m
    FastVector attVals;
    Instances data;
    double[] vals;

    file = new File(file + ".arff");

    atts = new FastVector();
    atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014
    atts.addElement(new Attribute(("message"), (FastVector) null));

    attVals = new FastVector();
    attVals.addElement("13-17");
    attVals.addElement("18-24");
    attVals.addElement("25-34");
    attVals.addElement("35-44");
    attVals.addElement("45-54");
    atts.addElement(new Attribute("age-group", attVals));

    data = new Instances("predict_age", atts, 0);

    Iterator it = posts.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry pairs = (Map.Entry) it.next();

        vals = new double[data.numAttributes()];
        User user = (User) pairs.getValue();
        String name = user.getName(); // 5/27/2014
        String message = ((Post) (pairs.getKey())).getMessage();

        Preprocess pre = new Preprocess();
        message = pre.emoticons(message);
        message = pre.emoji(message);
        message = pre.url(message);

        //StringFilter filter = new StringFilter(message);
        vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014
        vals[1] = data.attribute(1).addStringValue(message);

        int age = calculateAge(user.getBirthdayAsDate());
        if (age >= 13 && age <= 17) {
            vals[2] = attVals.indexOf("13-17");
        } else if (age >= 18 && age <= 24) {
            vals[2] = attVals.indexOf("18-24");
        } else if (age >= 25 && age <= 34) {
            vals[2] = attVals.indexOf("25-34");
        } else if (age >= 35 && age <= 44) {
            vals[2] = attVals.indexOf("35-44");
        } else if (age >= 45) { // Modified 6/11/2014 
            vals[2] = attVals.indexOf("45-54");
        }

        data.add(new Instance(1.0, vals));

        it.remove();
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(data);
    saver.setFile(file);
    saver.writeBatch();
}

From source file:facebookpostpuller.PostModelBACKUP.java

public static void convertToArff(File file) throws Exception {

    FastVector atts;/*from   w ww. j  av a 2  s.c  o  m*/
    FastVector attVals;
    Instances data;
    double[] vals;

    file = new File(file + ".arff");

    atts = new FastVector();
    atts.addElement(new Attribute(("name"), (FastVector) null)); // 5/27/2014
    atts.addElement(new Attribute(("message"), (FastVector) null));

    attVals = new FastVector();
    attVals.addElement("13-17");
    attVals.addElement("18-24");
    attVals.addElement("25-34");
    attVals.addElement("35-44");
    attVals.addElement("45-54");
    atts.addElement(new Attribute("age-group", attVals));

    data = new Instances("predict_age", atts, 0);

    Iterator it = posts.entrySet().iterator();

    while (it.hasNext()) {
        Map.Entry pairs = (Map.Entry) it.next();

        vals = new double[data.numAttributes()];
        User user = (User) pairs.getValue();
        String name = user.getName(); // 5/27/2014
        String message = ((Post) (pairs.getKey())).getMessage();
        //StringFilter filter = new StringFilter(message);
        vals[0] = data.attribute(0).addStringValue(name); // 5/27/2014
        vals[1] = data.attribute(1).addStringValue(message);

        int age = calculateAge(user.getBirthdayAsDate());
        if (age >= 13 && age <= 17) {
            vals[2] = attVals.indexOf("13-17");
        } else if (age >= 18 && age <= 24) {
            vals[2] = attVals.indexOf("18-24");
        } else if (age >= 25 && age <= 34) {
            vals[2] = attVals.indexOf("25-34");
        } else if (age >= 35 && age <= 44) {
            vals[2] = attVals.indexOf("35-44");
        } else if (age >= 45 && age <= 54) {
            vals[2] = attVals.indexOf("45-54");
        }

        data.add(new Instance(1.0, vals));

        it.remove();
    }

    ArffSaver saver = new ArffSaver();
    saver.setInstances(data);
    saver.setFile(file);
    saver.writeBatch();
}