Example usage for weka.core Instances size

Introduction

In this page you can find the example usage for weka.core Instances size.

Prototype


@Override
publicint size()

Source Link

Document

Returns the number of instances in the dataset.

Usage

From source file:eu.cassandra.utils.Utils.java

License:Apache License

/**
 * This function is used in order to create clusters of points of interest
 * based on the active power difference they have.
 * //from   w ww . j a v  a2 s . co m
 * @param pois
 *          The list of points of interest that will be clustered.
 * @return The newly created clusters with the points that are comprising
 *         them.
 * @throws Exception
 */
public static ArrayList<ArrayList<PointOfInterest>> clusterPoints(ArrayList<PointOfInterest> pois, int bias)
        throws Exception {
    // Initialize the auxiliary variables
    ArrayList<ArrayList<PointOfInterest>> result = new ArrayList<ArrayList<PointOfInterest>>();

    // Estimating the number of clusters that will be created
    int numberOfClusters = (int) (Math.ceil((double) pois.size() / (double) Constants.MAX_POINTS_OF_INTEREST))
            + bias;

    log.info("Clusters: " + pois.size() + " / " + Constants.MAX_POINTS_OF_INTEREST + " + " + bias + " = "
            + numberOfClusters);

    // Create a new empty list of points for each cluster
    for (int i = 0; i < numberOfClusters; i++)
        result.add(new ArrayList<PointOfInterest>());

    // Initializing auxiliary variables namely the attributes of the data set
    Attribute id = new Attribute("id");
    Attribute pDiffRise = new Attribute("pDiff");

    ArrayList<Attribute> attr = new ArrayList<Attribute>();
    attr.add(id);
    attr.add(pDiffRise);

    Instances instances = new Instances("Points of Interest", attr, 0);

    // Each event is translated to an instance with the above attributes
    for (int i = 0; i < pois.size(); i++) {

        Instance inst = new DenseInstance(2);
        inst.setValue(id, i);
        inst.setValue(pDiffRise, Math.abs(pois.get(i).getPDiff()));

        instances.add(inst);

    }

    // System.out.println(instances.toString());

    Instances newInst = null;

    log.debug("Instances: " + instances.toSummaryString());

    // Create the addcluster filter of Weka and the set up the hierarchical
    // clusterer.
    AddCluster addcluster = new AddCluster();

    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setSeed(numberOfClusters);

    // This is the important parameter to set
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setNumClusters(numberOfClusters);
    kmeans.buildClusterer(instances);

    addcluster.setClusterer(kmeans);
    addcluster.setInputFormat(instances);
    addcluster.setIgnoredAttributeIndices("1");

    // Cluster data set
    newInst = Filter.useFilter(instances, addcluster);

    // System.out.println(newInst.toString());

    // Parse through the dataset to see where each point is placed in the
    // clusters.
    for (int i = 0; i < newInst.size(); i++) {

        String cluster = newInst.get(i).stringValue(newInst.attribute(2));

        cluster = cluster.replace("cluster", "");

        log.debug("Point of Interest: " + i + " Cluster: " + cluster);

        result.get(Integer.parseInt(cluster) - 1).add(pois.get(i));
    }

    // Sorting the each cluster points by their minutes.
    for (int i = result.size() - 1; i >= 0; i--) {
        if (result.get(i).size() == 0)
            result.remove(i);
        else
            Collections.sort(result.get(i), Constants.comp);
    }

    // Sorting the all clusters by their active power.

    Collections.sort(result, Constants.comp5);

    return result;
}

From source file:eu.linda.analytics.formats.ForecastingRDFGenerator.java

@Override
public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) {

    helpfulFunctions.nicePrintMessage("Generate Forecasting RDFModel for weka algorithms ");

    Date date = new Date();
    DateFormat formatter = new SimpleDateFormat("ddMMyyyy");
    String today = formatter.format(date);
    String base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/myRepository/statements?context=:_";
    String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1)
            + "Date" + today;

    Instances triplets = (Instances) dataToExport;
    int tripletsAttibutesNum = triplets.numAttributes();

    // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
    Model model = ModelFactory.createDefaultModel();
    //openrdf + analytic_process ID_version_date
    String NS = base + datasetContextToString + "#";

    String analytics_base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology";
    String analytics_NS = analytics_base + "#";

    model.setNsPrefix("ds", NS);
    model.setNsPrefix("rdf", RDF.getURI());
    model.setNsPrefix("xsd", XSD.getURI());
    model.setNsPrefix("foaf", FOAF.getURI());
    model.setNsPrefix("rdfs", RDFS.getURI());
    model.setNsPrefix("prov", "http://www.w3.org/ns/prov#");
    model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#");
    model.setNsPrefix("an", Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#");

    // Define local properties
    Property analyzedField = model.createProperty(NS + "#analyzedField");
    Property predictedValue = model.createProperty(NS + "#predictedValue");
    Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom");
    Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy");
    Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf");
    Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith");
    Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset");
    Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset");
    Property algorithmProperty = model.createProperty(NS + "algorithm");

    Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity");
    Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity");
    Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent");
    Resource onlineAccount = model.createResource(FOAF.OnlineAccount);

    Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software");
    Resource software = model.createResource(analytics_NS + "Software");
    Resource linda_user = model.createResource(analytics_NS + "User");

    Resource analytic_process = model.createResource(analytics_NS + "analytic_process");
    Resource analytic_process_statement = model.createResource(
            analytics_NS + "analytic_process/" + analytics.getId() + "/" + (analytics.getVersion() + 1));
    analytic_process_statement.addProperty(RDF.type, analytic_process);
    analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0");
    analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name());
    analytic_process_statement.addProperty(RDFS.subClassOf, activity);
    analytic_process_statement.addProperty(wasAssociatedWith, software_statement);
    analytic_process_statement.addProperty(RDFS.label, "linda analytic process");
    analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription());
    analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name());

    if (helpfulFunctions.isRDFInputFormat(analytics.getTrainQuery_id())) {

        Resource analytic_train_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id());
        analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement);

    }/* w  ww  . j  a  v a2 s . co  m*/

    if (helpfulFunctions.isRDFInputFormat(analytics.getEvaluationQuery_id())) {

        Resource analytic_evaluation_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id());
        analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement);

    }

    Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name());
    linda_user_statement.addProperty(RDF.type, linda_user);
    linda_user_statement.addProperty(RDFS.subClassOf, agent);
    linda_user_statement.addProperty(RDFS.label, "linda user");

    software_statement.addProperty(RDF.type, software);
    software_statement.addProperty(RDFS.subClassOf, agent);
    software_statement.addProperty(RDFS.label, "analytics software");
    software_statement.addProperty(actedOnBehalfOf, linda_user_statement);

    linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person);

    linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount);

    linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name());
    onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI);

    Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node");
    Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node");

    // For each triplet, create a resource representing the sentence, as well as the subject, 
    // predicate, and object, and then add the triples to the model.
    for (int i = 1; i < triplets.size(); i++) {

        Resource analytic_result_node_statement = model.createResource(NS + "/" + i);

        Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(1));
        analytic_input_node_statement.addProperty(RDF.type, analytic_input_node);

        analytic_result_node_statement.addProperty(RDF.type, analytic_result_node);
        analytic_result_node_statement.addProperty(RDFS.subClassOf, entity);
        analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement);
        analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement);
        analytic_result_node_statement.addProperty(predictedValue,
                triplets.get(i).toString(tripletsAttibutesNum - 1));
    }
    return model;

}

From source file:eu.linda.analytics.formats.GeneralRDFGenerator.java

@Override
public Model generateRDFModel(Analytics analytics, AbstractList dataToExport) {

    helpfulFuncions.nicePrintMessage("Generate General RDFModel for weka algorithms ");

    Date date = new Date();
    DateFormat formatter = new SimpleDateFormat("ddMMyyyy");
    String today = formatter.format(date);
    String base = Configuration.lindaworkbenchURI + "openrdf-sesame/repositories/linda/statements?context=:_";
    String datasetContextToString = "analytics" + analytics.getId() + "V" + (analytics.getVersion() + 1)
            + "Date" + today;

    Instances triplets = (Instances) dataToExport;
    int tripletsAttibutesNum = triplets.numAttributes();

    // Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
    Model model = ModelFactory.createDefaultModel();
    //openrdf + analytic_process ID_version_date
    String NS = base + datasetContextToString + "#";

    String analytics_base = Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology";
    String analytics_NS = analytics_base + "#";

    model.setNsPrefix("ds", NS);
    model.setNsPrefix("rdf", RDF.getURI());
    model.setNsPrefix("xsd", XSD.getURI());
    model.setNsPrefix("foaf", FOAF.getURI());
    model.setNsPrefix("rdfs", RDFS.getURI());
    model.setNsPrefix("prov", "http://www.w3.org/ns/prov#");
    model.setNsPrefix("sio", "http://semanticscience.org/ontology/sio#");
    model.setNsPrefix("an", Configuration.lindaworkbenchURI
            + "openrdf-sesame/repositories/linda/rdf-graphs/analyticsontology#");

    // Define local properties
    Property analyzedField = model.createProperty(NS + "analyzedField");
    Property predictedValue = model.createProperty(NS + "predictedValue");
    Property wasDerivedFrom = model.createProperty("http://www.w3.org/ns/prov#wasDerivedFrom");
    Property wasGeneratedBy = model.createProperty("http://www.w3.org/ns/prov#wasGeneratedBy");
    Property actedOnBehalfOf = model.createProperty("http://www.w3.org/ns/prov#actedOnBehalfOf");
    Property wasAssociatedWith = model.createProperty("http://www.w3.org/ns/prov#wasAssociatedWith");
    Property hasTrainDataset = model.createProperty(NS + "hasTrainDataset");
    Property hasEvaluationDataset = model.createProperty(NS + "hasEvaluationDataset");
    Property algorithmProperty = model.createProperty(NS + "algorithm");
    Property dataSizeOfAnalyzedDataProperty = model.createProperty(NS + "dataSizeOfAnalyzedDatainBytes");
    Property timeToGetDataProperty = model.createProperty(NS + "timeToGetDataInSecs");
    Property timeToRunAnalyticsProcessProperty = model.createProperty(NS + "timeToRunAnalyticsProcessInSecs");
    Property timeToCreateRDFOutPutProperty = model.createProperty(NS + "timeToCreateRDFOutPutInSecs");
    Property performanceProperty = model.createProperty(NS + "hasPerformance");
    Property atTime = model.createProperty("http://www.w3.org/ns/prov#atTime");

    Resource entity = model.createResource("http://www.w3.org/ns/prov#Entity");
    Resource activity = model.createResource("http://www.w3.org/ns/prov#Activity");
    Resource agent = model.createResource("http://www.w3.org/ns/prov#Agent");
    Resource onlineAccount = model.createResource(FOAF.OnlineAccount);
    Resource linda_user = model.createResource(analytics_NS + "User");
    Resource software_statement = model.createResource(analytics_NS + "Software/LinDa_analytics_software");
    Resource software = model.createResource(analytics_NS + "Software");
    Resource performance = model.createResource(analytics_NS + "performance");
    Resource performance_statement = model
            .createResource(analytics_NS + "performance/" + analytics.getId() + "/" + analytics.getVersion());

    Resource analytic_process = model.createResource(analytics_NS + "analytic_process");
    Resource analytic_process_statement = model.createResource(
            analytics_NS + "analytic_process/" + analytics.getId() + "/" + analytics.getVersion());
    analytic_process_statement.addProperty(RDF.type, analytic_process);
    analytic_process_statement.addProperty(OWL.versionInfo, "1.0.0");
    analytic_process_statement.addLiteral(analyzedField, triplets.attribute(tripletsAttibutesNum - 1).name());
    analytic_process_statement.addProperty(RDFS.subClassOf, activity);
    analytic_process_statement.addProperty(wasAssociatedWith, software_statement);
    analytic_process_statement.addProperty(RDFS.label, "Linda Analytic process");
    analytic_process_statement.addProperty(RDFS.comment, analytics.getDescription());
    analytic_process_statement.addProperty(algorithmProperty, analytics.getAlgorithm_name());

    Calendar cal = GregorianCalendar.getInstance();
    Literal value = model.createTypedLiteral(cal);
    analytic_process_statement.addProperty(atTime, value);

    performance_statement.addProperty(RDF.type, performance);
    performance_statement.addProperty(dataSizeOfAnalyzedDataProperty, Float.toString(analytics.getData_size()));
    performance_statement.addProperty(timeToGetDataProperty, Float.toString(analytics.getTimeToGet_data()));
    performance_statement.addProperty(timeToRunAnalyticsProcessProperty,
            Float.toString(analytics.getTimeToRun_analytics()));
    performance_statement.addProperty(timeToCreateRDFOutPutProperty,
            Float.toString(analytics.getTimeToCreate_RDF()));
    analytic_process_statement.addProperty(performanceProperty, performance_statement);

    if (helpfulFuncions.isRDFInputFormat(analytics.getTrainQuery_id())) {

        Resource analytic_train_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getTrainQuery_id());
        analytic_process_statement.addProperty(hasTrainDataset, analytic_train_dataset_statement);

    }// w  w w .  j  av  a 2  s. com

    if (helpfulFuncions.isRDFInputFormat(analytics.getEvaluationQuery_id())) {

        Resource analytic_evaluation_dataset_statement = model.createResource(
                Configuration.lindaworkbenchURI + "sparql/?q_id=" + analytics.getEvaluationQuery_id());
        analytic_process_statement.addProperty(hasEvaluationDataset, analytic_evaluation_dataset_statement);

    }

    Resource linda_user_statement = model.createResource(analytics_NS + "User/" + analytics.getUser_name());
    linda_user_statement.addProperty(RDF.type, linda_user);
    linda_user_statement.addProperty(RDFS.subClassOf, agent);
    linda_user_statement.addProperty(RDFS.label, "linda user");

    software_statement.addProperty(RDF.type, software);
    software_statement.addProperty(RDFS.subClassOf, agent);
    software_statement.addProperty(RDFS.label, "analytics software");
    software_statement.addProperty(actedOnBehalfOf, linda_user_statement);

    linda_user_statement.addProperty(OWL.equivalentClass, FOAF.Person);

    linda_user_statement.addProperty(FOAF.holdsAccount, onlineAccount);

    linda_user_statement.addProperty(FOAF.accountName, analytics.getUser_name());
    onlineAccount.addProperty(FOAF.homepage, Configuration.lindaworkbenchURI);

    Resource analytic_result_node = model.createResource(analytics_NS + "analytics_result_node");
    Resource analytic_input_node = model.createResource(analytics_NS + "analytic_input_node");

    // For each triplet, create a resource representing the sentence, as well as the subject, 
    // predicate, and object, and then add the triples to the model.
    for (int i = 1; i < triplets.size(); i++) {
        //for (Instance triplet : triplets) {
        Resource analytic_input_node_statement = model.createResource(triplets.get(i).toString(0));
        analytic_input_node_statement.addProperty(RDF.type, analytic_input_node);

        Resource analytic_result_node_statement = model.createResource(NS + "/" + i);
        analytic_result_node_statement.addProperty(RDF.type, analytic_result_node);
        analytic_result_node_statement.addProperty(RDFS.subClassOf, entity);
        analytic_result_node_statement.addProperty(wasDerivedFrom, analytic_input_node_statement);
        analytic_result_node_statement.addProperty(wasGeneratedBy, analytic_process_statement);
        analytic_result_node_statement.addProperty(predictedValue,
                triplets.get(i).toString(tripletsAttibutesNum - 1));

    }

    return model;

}

From source file:gov.va.chir.tagline.TagLineScorer.java

License:Open Source License

public void applyModel(final Document document) throws Exception {
    // Calculate features at both document and line level      
    extractor.calculateFeatureValues(document);

    final Instances instances = DatasetUtil.createDataset(tagLineModel.getHeader(), document);

    for (int i = 0; i < instances.size(); i++) {
        final double[] probs = fc.distributionForInstance(instances.get(i));

        int maxPos = -1;
        double maxProb = 0.0;

        for (int j = 0; j < probs.length; j++) {
            if (probs[j] > maxProb) {
                maxProb = probs[j];// w  w  w . j a  va2 s.  c o  m
                maxPos = j;
            }
        }

        if (maxPos < 0) {
            throw new IllegalStateException(
                    String.format("Predicted label array index must not be negative (%d)", maxPos));
        }

        // Set predicted label and probability label to correct line
        final int lineId = (int) instances.get(i).value(lineIdAttr);
        document.getLine(lineId).setPredictedLabel(classAttr.value(maxPos));
        document.getLine(lineId).setPredictedProbability(maxProb);
    }
}

From source file:imba.classifier.FFNNTubes.java

@Override
public void buildClassifier(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    data.deleteWithMissingClass();/*from  w w  w . ja v  a  2s .  c  o m*/

    nAttribute = data.numAttributes() - 1;
    nOutput = data.numClasses();
    nData = data.size();

    //set target data
    setTarget(data);

    //generate weight
    generateRandomWeight();

    //normalisasi data
    Normalize norm = new Normalize();
    Filter filter = new NominalToBinary();

    norm.setInputFormat(data);

    Instances filteredData = Filter.useFilter(data, norm);

    try {
        filter.setInputFormat(filteredData);

        for (Instance i1 : filteredData) {
            filter.input(i1);
        }

        filter.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    int z = 0;
    double valMSE = 100.0;
    while ((z <= nEpoch) && (valMSE >= 0.00001)) {
        for (int j = 0; j < nData; j++) {
            feedForward(filteredData.get(j));

            if (nHidden == 0) {
                updateWeight(target[j]);
            } else {
                backPropagation(target[j]);
            }
        }

        countError(filteredData);
        valMSE = countMSE(filteredData);
        System.out.println("ACCURACY " + z + " : " + accuracy);
        System.out.println("MSE " + z + " : " + valMSE);
        z++;
    }
}

From source file:imba.classifier.NBTubes.java

@Override
public void buildClassifier(Instances data) {
    dataClassifier = new ArrayList<>();
    infoClassifier = new ArrayList<>();
    validAttribute = new ArrayList<>();
    dataset = null;//from   w  w w  .ja  v a 2s  .  c o m
    sumClass = null;
    dataSize = 0;
    header_Instances = data;

    Filter f;
    int i, j, k, l, m;
    int sumVal;

    int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1

    i = 0;
    while (i < numAttr && wasNumeric == false) {
        if (i == classIdx) {
            i++;
        }

        if (i != numAttr && data.attribute(i).isNumeric()) {
            wasNumeric = true;
        }

        i++;
    }

    Instance p;

    //kasih filter
    if (wasNumeric) {
        f = new Normalize();
        //Filter f = new NumericToNominal();
        try {
            f.setInputFormat(data);

            for (Instance i1 : data) {
                f.input(i1);
            }

            f.batchFinished();
        } catch (Exception ex) {
            Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
        }

        dataset = f.getOutputFormat();

        while ((p = f.output()) != null) {
            dataset.add(p);
        }
    }

    //f = new NumericToNominal();
    if (filter.equals("Discretize")) {
        f = new Discretize();
    } else {
        f = new NumericToNominal();
    }

    try {
        if (wasNumeric) {
            f.setInputFormat(dataset);
            for (Instance i1 : dataset) {
                f.input(i1);
            }
        } else {
            f.setInputFormat(data);
            for (Instance i1 : data) {
                f.input(i1);
            }
        }

        f.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    dataset = null;
    dataset = f.getOutputFormat();

    while ((p = f.output()) != null) {
        dataset.add(p);
    }

    //building data structure
    classIdx = data.classIndex();

    dataSize = data.size();

    //isi data dan info classifier dengan array kosong
    i = 0;
    j = i;
    while (j < numAttr) {
        if (i == classIdx) {
            i++;
        } else {
            dataClassifier.add(new ArrayList<>());
            infoClassifier.add(new ArrayList<>());

            if (j < i) {
                m = j - 1;
            } else {
                m = j;
            }

            k = 0;
            while (k < dataset.attribute(j).numValues()) {
                dataClassifier.get(m).add(new ArrayList<>());
                infoClassifier.get(m).add(new ArrayList<>());

                l = 0;
                while (l < dataset.attribute(classIdx).numValues()) {
                    dataClassifier.get(m).get(k).add(0);
                    infoClassifier.get(m).get(k).add(0.0);

                    l++;
                }

                k++;
            }
        }

        i++;
        j++;
    }

    //isi data classifier dari dataset
    sumClass = new int[data.numClasses()];

    i = 0;
    while (i < dataset.size()) {
        j = 0;
        k = j;
        while (k < dataset.numAttributes()) {
            if (j == classIdx) {
                j++;
            } else {
                if (k < j) {
                    m = k - 1;
                } else {
                    m = k;
                }

                dataClassifier.get(m).get((int) dataset.get(i).value(k)).set(
                        (int) dataset.get(i).value(classIdx),
                        dataClassifier.get(m).get((int) dataset.get(i).value(k))
                                .get((int) dataset.get(i).value(classIdx)) + 1);

                if (m == 0) {
                    sumClass[(int) dataset.get(i).value(classIdx)]++;
                }

            }

            k++;
            j++;
        }

        i++;
    }

    //proses double values
    i = 0;
    while (i < dataClassifier.size()) {
        j = 0;
        while (j < dataClassifier.get(i).size()) {
            k = 0;
            while (k < dataClassifier.get(i).get(j).size()) {
                infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]);

                k++;
            }

            j++;
        }

        i++;
    }

    /*
    //liat apakah ada nilai di tiap atribut
    //yang merepresentasikan lebih dari 80% data
    i = 0;
    while (i < dataClassifier.size()) {
    j = 0;
    while (j < dataClassifier.get(i).size()) {
                
                
        j++;
    }
            
    i++;
    }
    */
}

From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java

public WekaEvaluator(String baseFolderPath, String projectName, Classifier classifier, String classifierName,
        String modelName) {//from w  w  w  .  j a  v a 2 s  .  c om
    // READ FILE
    /*CODICE VECCHIO
    try {
    BufferedReader reader = new BufferedReader(new FileReader(filePath));
    Instances data = new Instances(reader);
    data.setClassIndex(data.numAttributes() - 1);
    System.out.println(data.size());
    // dividere istance in train e test
    Instances train = data;
    Instances test = null;
            
    // EVALUATION
    Evaluation eval = new Evaluation(train);
    //eval.evaluateModel(j48, test);
    // CROSS-VALIDATION
    eval.crossValidateModel(classifier, train, 10, new Random(1));
    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());
            
    } catch (Exception ex) {
    Logger.getLogger(WekaEvaluator.class.getName()).log(Level.SEVERE, null, ex);
    } 
    CODICE VECCHIO*/
    String filePath = baseFolderPath + projectName + "/predictors.csv";
    try {
        DataSource source = new DataSource(filePath);
        Instances instances = source.getDataSet();
        instances.setClassIndex(instances.numAttributes() - 1);
        System.out.println("Numero istanze: " + instances.size());
        evaluateModel(baseFolderPath, projectName, classifier, instances, modelName, classifierName);
    } catch (Exception ex) {
        Logger.getLogger(WekaEvaluator.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:lfsom.data.LFSData.java

License:Apache License

/**
 * Gets the data from a csv file./*from  ww  w .  j  a v a 2s  .  c  o m*/
 * 
 * @param fileName
 */

public LFSData(String fileName) {
    Class claseCargador = CSVLoader.class;

    if (fileName.endsWith(ArffLoader.FILE_EXTENSION)) {
        claseCargador = ArffLoader.class;
    } else {
        if (fileName.endsWith(JSONLoader.FILE_EXTENSION)) {
            claseCargador = JSONLoader.class;
        } else {
            if (fileName.endsWith(MatlabLoader.FILE_EXTENSION)) {
                claseCargador = MatlabLoader.class;
            } else {
                if (fileName.endsWith(XRFFLoader.FILE_EXTENSION)) {
                    claseCargador = XRFFLoader.class;
                } else {
                    if (fileName.endsWith(C45Loader.FILE_EXTENSION)) {
                        claseCargador = C45Loader.class;
                    }
                }
            }
        }
    }

    try {
        AbstractFileLoader cargador = (AbstractFileLoader) claseCargador.getConstructor().newInstance();
        boolean cambio_col = false;

        cargador.setSource(new File(fileName));

        Instances data1 = cargador.getDataSet();

        double[][] matrix2 = new double[data1.size()][data1.numAttributes()];

        for (int i = 0; i < data1.size(); i++) {
            matrix2[i] = data1.get(i).toDoubleArray();
        }

        // Ahora se comprueba si todas las columnas son ok

        Integer[] colVale;
        dim = 0;

        if (data1.size() > 0) {
            colVale = new Integer[matrix2[0].length];
            double[] stdevX = StatisticSample.stddeviation(matrix2);

            for (int k = 0; k < matrix2[0].length; k++) {
                if (Math.abs(stdevX[k]) >= 0.000000001) {
                    colVale[k] = dim;
                    dim++;
                } else {
                    colVale[k] = -1;
                    cambio_col = true;
                }
            }

        } else {
            dim = data1.numAttributes();
            colVale = new Integer[dim];
            for (int k = 0; k < dim; k++) {
                colVale[k] = k;
            }
        }

        double[][] matrixAssign = new double[matrix2.length][dim];

        if (cambio_col) {
            for (int k = 0; k < matrix2.length; k++) {
                for (int w = 0; w < matrix2[0].length; w++) {
                    if (colVale[w] != -1) {
                        matrixAssign[k][colVale[w]] = matrix2[k][w];
                    }
                }

            }

        } else {
            matrixAssign = matrix2;
        }

        // Fin de la comprobacion

        setLabels(new String[dim]);
        for (int i = 0; i < data1.numAttributes(); i++) {
            if (colVale[i] != -1) {
                getLabels()[colVale[i]] = data1.attribute(i).name();
            }
        }

        BufferedWriter br = new BufferedWriter(new FileWriter("d:/tmp/fich.csv"));
        StringBuilder sb = new StringBuilder();

        for (int i = 0; i < matrixAssign.length; i++) {
            String cad = String.valueOf(matrixAssign[i][0]);
            for (int k = 1; k < matrixAssign[i].length; k++)
                cad += "," + matrixAssign[i][k];
            sb.append(cad + "\n");
        }

        br.write(sb.toString());
        br.close();

        setMatrix(matrixAssign);

    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }
}

From source file:lu.lippmann.cdb.datasetview.tabs.MDSTabView.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w w  w. j  av a 2  s .  c om*/
 */
@Override
public void update0(final Instances dataSet) throws Exception {
    this.jxp.removeAll();

    if (this.distComboListener != null)
        distCombo.removeActionListener(this.distComboListener);
    this.distComboListener = new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            if (!currentDist.equals(distCombo.getSelectedItem()))
                update(dataSet);
            currentDist = distCombo.getSelectedItem();

            final MDSDistancesEnum mde = MDSDistancesEnum.valueOf(currentDist.toString());
            boolean showDistanceParameters = (mde.equals(MDSDistancesEnum.MINKOWSKI));
            distanceParameters.setVisible(showDistanceParameters);
            distanceParametersLabel.setVisible(showDistanceParameters);
        }
    };
    this.distCombo.addActionListener(this.distComboListener);

    if (this.distanceParametersListener != null)
        distanceParameters.removeActionListener(this.distanceParametersListener);
    this.distanceParameters.addActionListener(new ActionListener() {

        @Override
        public void actionPerformed(ActionEvent e) {
            if (!currentParameter.equals(distanceParameters.getText()))
                update(dataSet);
            currentParameter = distanceParameters.getText();
        }
    });
    this.distanceParameters.addActionListener(this.distanceParametersListener);

    if (this.shihListener != null)
        shihCheckbox.removeActionListener(this.shihListener);
    this.shihListener = new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            update(dataSet);
        }
    };
    this.shihCheckbox.addActionListener(this.shihListener);
    this.shihCheckbox.setEnabled(!WekaDataStatsUtil.areAllAttributesNominal(dataSet));

    if (this.ignoreListener != null)
        ignoreClassCheckbox.removeActionListener(this.ignoreListener);
    this.ignoreListener = new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            update(dataSet);
        }
    };
    this.ignoreClassCheckbox.addActionListener(this.ignoreListener);
    this.ignoreClassCheckbox.setEnabled(dataSet.classIndex() != -1);

    if (this.maxInstancesListener != null)
        maxInstances.removeKeyListener(this.maxInstancesListener);
    this.maxInstancesListener = new KeyAdapter() {
        @Override
        public void keyPressed(KeyEvent e) {
            final int cCode = e.getKeyCode();
            if (cCode == KeyEvent.VK_ENTER) {
                update(dataSet);
                e.consume();
            }
        }
    };
    this.maxInstances.addKeyListener(maxInstancesListener);

    if (this.normalizeListener != null)
        normalizeCheckbox.removeActionListener(this.normalizeListener);
    this.normalizeListener = new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            update(dataSet);
        }
    };
    this.normalizeCheckbox.addActionListener(this.normalizeListener);

    //TODO : use proper layout ...
    final JXPanel northPanel = new JXPanel();
    northPanel.setLayout(new GridBagLayout());
    final GridBagConstraints gbc = new GridBagConstraints();
    gbc.gridx = 0;
    gbc.gridy = 0;
    gbc.gridwidth = 2;
    gbc.weightx = 1;
    gbc.fill = GridBagConstraints.BOTH;
    northPanel.add(this.distCombo, gbc);
    gbc.weightx = 0;
    gbc.gridwidth = 1;
    gbc.gridy = 1;
    northPanel.add(this.distanceParametersLabel, gbc);
    gbc.gridx = 1;
    northPanel.add(this.distanceParameters, gbc);

    this.jxp.add(northPanel, BorderLayout.NORTH);

    final MDSDistancesEnum mde = MDSDistancesEnum.valueOf(distCombo.getSelectedItem().toString());
    final String strOrder = distanceParameters.getText();
    if (mde.equals(MDSDistancesEnum.MINKOWSKI)) {
        mde.setParameters(new String[] { strOrder });
    }
    Instances usedDataSet = dataSet;
    if (shihCheckbox.isSelected()) {
        //Modify instance using SHIH Algorithm
        final Shih2010 shih = new Shih2010(dataSet);
        usedDataSet = shih.getModifiedInstances();
    }

    this.kmeansButton = new JButton("K-means");
    this.maxKField = new JTextField("10");

    //Create whole panel
    final JXPanel southPanel = new JXPanel();
    southPanel.add(shihCheckbox);
    southPanel.add(ignoreClassCheckbox);
    southPanel.add(normalizeCheckbox);
    southPanel.add(maxInstances);
    southPanel.add(new JLabel("Maximum K"));
    southPanel.add(maxKField);
    southPanel.add(kmeansButton);
    this.jxp.add(southPanel, BorderLayout.SOUTH);

    //Compute MDS
    final MDSResult mdsResult = ClassicMDS.doMDS(usedDataSet, mde, 2, Integer.valueOf(maxInstances.getText()),
            ignoreClassCheckbox.isSelected(), normalizeCheckbox.isSelected());

    final JXPanel mdsView = MDSViewBuilder.buildMDSViewFromDataSet(dataSet, mdsResult,
            Integer.valueOf(maxInstances.getText()), new Listener<Instances>() {
                @Override
                public void onAction(final Instances parameter) {
                    pushDataChange(new DataChange(parameter, TabView.DataChangeTypeEnum.Selection));
                }
            });
    this.jxp.add(mdsView, BorderLayout.CENTER);

    this.kmeansButton.addActionListener(new ActionListener() {
        @Override
        public void actionPerformed(ActionEvent e) {
            try {
                //List of coordinates (x,y) of collapsed instances
                final Instances coordsInstances = mdsResult.buildInstancesFromMatrix();
                //FIXME dangerous : K-means on ordered collapsedInstance coordinates
                final KmeansImproved km = new KmeansImproved(coordsInstances,
                        Integer.valueOf(maxKField.getText()));
                final double[] ass = km.getClusteredInstances();
                int usedK = km.getUsedKmeans().getNumClusters();
                final StringBuilder labels = new StringBuilder();
                for (int i = 0; i < usedK; i++) {
                    labels.append("cluster").append((i + 1));
                    if (i < usedK - 1)
                        labels.append(",");
                }

                //Build modified dataset
                String attributeName = "cluster_proj";
                while (dataSet.attribute(attributeName) != null)
                    attributeName += "_proj";
                final Add addFilter = new Add();
                addFilter.setAttributeIndex("last");
                addFilter.setAttributeName(attributeName);
                addFilter.setNominalLabels(labels.toString());
                addFilter.setInputFormat(dataSet);
                final Instances modDataset = Filter.useFilter(dataSet, addFilter);
                final int nbInstances = modDataset.numInstances();
                final int nbAttributes = modDataset.numAttributes();

                if (mdsResult.getCInstances().isCollapsed()) {
                    //
                    final KmeansResult kmr = mdsResult.getCInstances().getCentroidMap();
                    final List<Instances> clusters = kmr.getClusters();
                    int nbClusters = clusters.size();

                    //Build a map between any instance and it's cluster's centroid
                    final Map<ComparableInstance, Integer> mapCentroid = new HashMap<ComparableInstance, Integer>();
                    for (int i = 0; i < nbClusters; i++) {
                        final Instances cluster = clusters.get(i);
                        final int clusterSize = cluster.size();
                        for (int k = 0; k < clusterSize; k++) {
                            mapCentroid.put(new ComparableInstance(cluster.instance(k)), i);
                        }
                    }

                    //Use the previous map to add the additionnal feature for every element !
                    for (int i = 0; i < nbInstances; i++) {
                        final int centroidIndex = mapCentroid.get(new ComparableInstance(dataSet.instance(i)));
                        final String value = "cluster" + (int) (ass[centroidIndex] + 1);
                        modDataset.instance(i).setValue(nbAttributes - 1, value);
                    }
                } else {
                    for (int i = 0; i < nbInstances; i++) {
                        final String value = "cluster" + (int) (ass[i] + 1);
                        modDataset.instance(i).setValue(nbAttributes - 1, value);
                    }
                }
                pushDataChange(new DataChange(modDataset, TabView.DataChangeTypeEnum.Update));
            } catch (Exception e1) {
                e1.printStackTrace();
            }
        }
    });

    this.jxp.repaint();
}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

public static void main(String[] args) throws Exception {
    final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(
            //new File("./samples/csv/uci/zoo.csv"));
            new File("./samples/csv/test50bis.csv"));
    //new File("./samples/arff/UCI/cmc.arff"));
    //new File("./samples/csv/direct-marketing-bank-reduced.csv"));
    //new File("./samples/csv/bank.csv"));
    //new File("./samples/csv/preswissroll.csv"));
    //new File("./samples/csv/preswissroll-mod4.csv"));

    ds.setClassIndex(-1);/*  w  w w. j  a v a 2 s. c  o m*/

    final int N = ds.size();
    final int M = ds.numAttributes();
    SimpleMatrix dist = new SimpleMatrix(N, N);

    for (int i = 0; i < N; i++) {
        for (int j = i + 1; j < N; j++) {
            Instance xi = ds.instance(i);
            Instance xj = ds.instance(j);
            double d = 0, s = 0, a = 0, b = 0;
            for (int k = 1; k < M; k++) {
                s += xi.value(k) * xj.value(k);
                a += xi.value(k) * xi.value(k);
                b += xi.value(k) * xi.value(k);
            }
            d = 1 - s / (Math.sqrt(a) * Math.sqrt(b));
            dist.set(i, j, d);
            dist.set(j, i, d);
        }
    }

    final MDSResult res = ClassicMDS.doMDSV1(ds, dist);

    JXPanel p2 = MDSViewBuilder.buildMDSViewFromDataSet(ds, res, 5000, null);
    p2.setPreferredSize(new Dimension(800, 600));

    final JXFrame f = new JXFrame();
    f.setPreferredSize(new Dimension(1024, 768));
    final Container c = f.getContentPane();
    c.add(p2);
    f.pack();
    f.setVisible(true);
    f.setDefaultCloseOperation(JXFrame.EXIT_ON_CLOSE);

    System.out.println("Kruskal stress : =" + getKruskalStressFromMDSResult(res));

}