Example usage for weka.core Instances setClassIndex

Introduction

In this page you can find the example usage for weka.core Instances setClassIndex.

Prototype

public void setClassIndex(int classIndex)

Source Link

Document

Sets the class index of the set.

Usage

From source file:gr.auth.ee.lcs.utilities.InstancesUtility.java

License:Open Source License

public static Instances[] partitionInstances(final AbstractLearningClassifierSystem lcs,
        final Instances trainSet) throws Exception {

    // Open .arff
    final Instances set = trainSet;
    if (set.classIndex() < 0) {
        set.setClassIndex(set.numAttributes() - 1);
    }//from  w w  w.java  2 s  .  co m
    //set.randomize(new Random());
    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    // the partitions vector holds the indices      
    String stringsArray[] = new String[trainSet.numInstances()];
    int indicesArray[] = new int[trainSet.numInstances()];

    // convert each instance's labelset into a string and store it in the stringsArray array
    for (int i = 0; i < set.numInstances(); i++) {
        stringsArray[i] = "";
        indicesArray[i] = i;

        for (int j = set.numAttributes() - numberOfLabels; j < set.numAttributes(); j++) {
            stringsArray[i] += (int) set.instance(i).value(j);
        }
    }

    // contains the indicesVector(s)
    Vector<Vector> mothershipVector = new Vector<Vector>();

    String baseString = "";
    for (int i = 0; i < set.numInstances(); i++) {

        baseString = stringsArray[i];
        if (baseString.equals(""))
            continue;
        Vector<Integer> indicesVector = new Vector<Integer>();

        for (int j = 0; j < set.numInstances(); j++) {
            if (baseString.equals(stringsArray[j])) {
                stringsArray[j] = "";
                indicesVector.add(j);
            }
        }
        mothershipVector.add(indicesVector);
    }

    Instances[] partitions = new Instances[mothershipVector.size()];

    for (int i = 0; i < mothershipVector.size(); i++) {
        partitions[i] = new Instances(set, mothershipVector.elementAt(i).size());
        for (int j = 0; j < mothershipVector.elementAt(i).size(); j++) {
            Instance instanceToAdd = set.instance((Integer) mothershipVector.elementAt(i).elementAt(j));
            partitions[i].add(instanceToAdd);
        }
    }
    /*
     * up to here, the partitions array has been formed. it contains the split dataset by label combinations
     * it holds both the attributes and the labels, but for clustering the input should only be the attributes,
     * so we need to delete the labels. this is taken care of by initializePopulation()
     */
    return partitions;
}

From source file:gr.demokritos.iit.cpgislanddetection.analysis.VectorSequenceDetector.java

License:Apache License

public VectorSequenceDetector(List<BaseSequence> sequences, List<String> labels)
        throws FileNotFoundException, IOException, Exception {

    //gia ola ta seq
    //gia kathe seq pare to vector me vash ton analyzer 
    //vale kai to label
    //kai update classify

    // load data/*from www  .  j  av a2s. c om*/
    ArffLoader loader = new ArffLoader();
    loader.setFile(new File("/Desktop/filesForWeka/2o_peirama/dataForWeka.arff"));
    Instances structure = loader.getStructure();
    // setting class attribute
    structure.setClassIndex(structure.numAttributes() - 1);

    // train NaiveBayes
    NaiveBayesUpdateable nb = new NaiveBayesUpdateable();
    nb.buildClassifier(structure);
    Instance current;
    while ((current = loader.getNextInstance(structure)) != null)
        nb.updateClassifier(current);
}

From source file:gr.demokritos.iit.cpgislanddetection.CpGIslandDetection.java

License:Apache License

/**
 * @param args the command line arguments
 *//* w  w w  . j av  a 2 s .  c  o m*/
public static void main(String[] args) throws IOException, ParseException, Exception {

    // String sFileNameArgs = args[0];

    // String[] fileNames = null;
    // Read  file
    //IGenomicSequenceFileReader reader = new SequenceListFileReader();

    //        String seq ="GCTCTTGACTTTCAGACTTCCTGAAAACAACGTTCTGGTAAGGACAAGGGTT";
    //
    //        CpGIslandIdentification iClass = new CpGIslandIdentification();
    //        boolean b = iClass.identify(seq);
    //        System.out.println("This sequence is a CpG island: " + b);
    //        SequenceListFileReader s = new SequenceListFileReader();
    //        ArrayList<BaseSequence> alRes = new ArrayList<>();
    //        
    //        alRes = s.getSequencesFromFile("C:\\Users\\Xenia\\Desktop\\files\\posSamples.txt");

    //        for(int i=0; i<alRes.size(); i++)
    //        System.out.println("alRes = " + i + alRes.get(i));
    //        VectorAnalyzer vA = new VectorAnalyzer();
    //        List<Vector<Integer>> listVector = new ArrayList<>();
    //Vector<Vector<Integer>> list = 
    //        listVector = vA.analyze(alRes);
    //        for(int i=0; i<listVector.size();i++)
    //        System.out.println(i + " " +listVector.get(i));
    //IGenomicSequenceFileReader reader = new FASTAFileReader();

    // If no input file has been given
    /*        if (args.length == 0) {
    // Use default
    fileNames[0] = "C:\\Users\\Xenia\\Desktop\\files\\posSamples.txt";
    fileNames[1] = "C:\\Users\\Xenia\\Desktop\\files\\negSamples.txt";
    fileNames[2] = "C:\\Users\\Xenia\\Desktop\\files\\newsamples.txt";
            
            } else // else use the provided one
            {
    fileNames = sFileNameArgs.split(";");
            }
    */

    //-----------------VECTOR ANALYSIS STARTS HERE--------------------------------------

    //read sequences from txt files
    SequenceListFileReader reader = new SequenceListFileReader();
    ArrayList<BaseSequence> lSeqs1 = new ArrayList<>();
    ArrayList<BaseSequence> lSeqs2 = new ArrayList<>();

    lSeqs1 = reader.getSequencesFromFile("C:\\Users\\Xenia\\Desktop\\files\\posSamples.txt");
    lSeqs2 = reader.getSequencesFromFile("C:\\Users\\Xenia\\Desktop\\files\\negSamples.txt");

    //create vectors for every sequence
    List<Vector<Integer>> listVectorForPositiveSamples = new ArrayList<>();
    List<Vector<Integer>> listVectorForNegativeSamples = new ArrayList<>();
    VectorAnalyzer v = new VectorAnalyzer();
    listVectorForPositiveSamples = v.analyze(lSeqs1);
    listVectorForNegativeSamples = v.analyze(lSeqs2);

    //create ARFF files for positive and negative samples
    FileCreatorARFF fc = new FileCreatorARFF();
    Instances positiveInstances = fc.createARFF(listVectorForPositiveSamples, "yes");
    Instances negativeInstances = fc.createARFF(listVectorForNegativeSamples, "no");
    //System.out.println(positiveInstances);

    //build and train classifier
    // setting class attribute
    positiveInstances.setClassIndex(positiveInstances.numAttributes() - 1);
    negativeInstances.setClassIndex(negativeInstances.numAttributes() - 1);
    // train NaiveBayes
    NaiveBayesUpdateable nb = new NaiveBayesUpdateable();
    nb.buildClassifier(positiveInstances);
    nb.buildClassifier(negativeInstances);
    Instance current;
    for (int i = 0; i < positiveInstances.numInstances(); i++) {
        current = positiveInstances.instance(i);
        nb.updateClassifier(current);
    }

    // Test the model
    Evaluation eTest = new Evaluation(positiveInstances);
    Instances isTestingSet = fc.createARFF(listVectorForNegativeSamples, "?");
    isTestingSet.setClassIndex(isTestingSet.numAttributes() - 1);
    eTest.evaluateModel(nb, isTestingSet);

    //------------------VECTOR ANALYSIS ENDS HERE---------------------------------------

    //----------------------------HMM CLASSIFIER STARTS HERE----------------------------------
    // Init classifier
    /*       ISequenceClassifier<List<ObservationDiscrete<HmmSequence.Packet>>> classifier
        = new HmmClassifier();
    */
    // WARNING: Remember to change when you have normal data!!!
    // Obfuscation in negative training file?
    //       final boolean bObfuscateNeg = true;
    //        FASTAObfuscatorReader r = new FASTAObfuscatorReader();
    //for each file do the same work: train
    //        for (int i = 0; i < 3; i++) {
    // Read the sequences

    // If obfuscation is on and we are dealing with the negative
    // training file
    /*            if ((i == 2) && (bObfuscateNeg)) {
        //FASTAObfuscatorReader r = new FASTAObfuscatorReader();
        lSeqs = r.getSequencesFromFile(fileNames[i]);
        fileNames[1] = "Not" + fileNames[1]; // Update to indicate different class
    }
    else
        // else read normally
        lSeqs = reader.getSequencesFromFile(fileNames[i]);
            
    System.out.println("lSeqs size="+lSeqs.size());
    */
    // Create HMM sequences
    /*            ISequenceAnalyst<List<ObservationDiscrete<HmmSequence.Packet>>> analyst
            = new HmmAnalyzer();
    List<List<ObservationDiscrete<HmmSequence.Packet>>> lHmmSeqs = analyst.analyze(lSeqs);
            
    // Train classifier with the observations
    classifier.train(lHmmSeqs, new File(fileNames[i]).getName());
            }
            
            //Classify the test file        
            //First: Read the sequences
            lSeqs = r.getSequencesFromFile(fileNames[2]);
            //System.out.println("file name= "+fileNames[2]);
            //Then: Create HMM sequences
            ISequenceAnalyst<List<ObservationDiscrete<HmmSequence.Packet>>> analyst
        = new HmmAnalyzer();
            List<List<ObservationDiscrete<HmmSequence.Packet>>> lHmmSeqs = analyst.analyze(lSeqs);
    */

    //-------------------------------HMM CLASSIFIER ENDS HERE-----------------------------------------

    /*
            
    //----------------------------HMM EVALUATION STARTS-----------------------------------------------
    //System.out.println("size of lHmmSeqs="+ lHmmSeqs.size());
    String str = null;
    String[] savedResults = new String[lHmmSeqs.size()];
            
    //create a 2x2 array to store successes and failures for each class
    int[][] matrix = new int[2][2];
    int successForCpG = 0, failForCpG = 0, successForNotCpG = 0, failForNotCpG = 0;
            
    // Init identifier
    // CpGIslandIdentification identifier = new CpGIslandIdentification();
    CpGIslandIdentification identifier = new CpGIslandIdentificationByList("CpG_hg18.fa");
            
    for (int i = 0; i < lHmmSeqs.size(); i++) {
    // DEBUG
    System.err.print(".");
    if (i % 10 == 0)
        System.err.println();
    ////////
    str = classifier.classify(lHmmSeqs.get(i));
            //  System.out.println(  "i="+i);    
            
    System.out.println("Determined class:" + str);
    //            savedResults[i] = str;
            
    //kalw sunarthsh pou exetazei an to sequence ikanopoiei ta CpG criterias
    if (identifier.identify(lSeqs.get(i).getSymbolSequence()) && str.equals(fileNames[0])) {
            
        //Success for CpG class
        successForCpG++;
        System.out.println("successForCpG"  + successForCpG);
    } else if (identifier.identify(lSeqs.get(i).getSymbolSequence()) && str.equals(fileNames[1])) {
        //fail for CpG class
        failForCpG++;
        System.out.println("failForCpG" + failForCpG);
    } else if (identifier.identify(lSeqs.get(i).getSymbolSequence()) == false && str.equals(fileNames[1])) {
            
        //System.out.println(i);
        //Success for Not CpG class
        successForNotCpG++;
        System.out.println("successForNotCpG" + successForNotCpG);
    } else if (identifier.identify(lSeqs.get(i).getSymbolSequence()) == false && str.equals(fileNames[0])) {
              
        //fail for Not CpG class
        failForNotCpG++;
        System.out.println("failForNotCpG" + failForNotCpG);
    }
              
    }
            
    //Evaluation: calculation of classification rate and accuracy
    double totalAccuracy = (successForNotCpG + successForCpG)/(successForCpG + failForCpG + failForNotCpG + successForNotCpG);
            
    //missclassification rate for CpG class
    double rate1 = ( failForCpG + successForCpG ) != 0 ?
        failForCpG / ( failForCpG + successForCpG ) :
        0.0;
            
    //missclassification rate for Not CpG class
    double rate2 = ( failForNotCpG + successForNotCpG ) != 0 ? 
        failForNotCpG / ( failForNotCpG + successForNotCpG ) :
        0.0;
            
    System.out.println(totalAccuracy +" "+ rate1 + " "+ rate2);
            
    NGramGraphClassifier nGramGraphClassifier = new NGramGraphClassifier();
    List<List<DocumentNGramGraph>> representation;
    NGramGraphAnalyzer myAnalyst = new NGramGraphAnalyzer();
    representation = myAnalyst.analyze(lSeqs);
    for(int i=0; i<representation.size();i++)
    nGramGraphClassifier.classify(representation.get(i));
            
            
    */
}

From source file:gr.demokritos.iit.cpgislanddetection.io.FileCreatorARFF.java

public Instances createARFF(List<Vector<Integer>> listVector, String nameClass) throws ParseException {

    // Declare four numeric attributes
    Attribute Attribute1 = new Attribute("adenine");
    Attribute Attribute2 = new Attribute("thymine");
    Attribute Attribute3 = new Attribute("cytosine");
    Attribute Attribute4 = new Attribute("guanine");

    // Declare the class attribute along with its values
    FastVector fvClassVal = new FastVector(2);
    fvClassVal.addElement("yes");
    fvClassVal.addElement("no");
    Attribute ClassAttribute = new Attribute("theClass", fvClassVal);

    // Declare the feature vector
    FastVector fvWekaAttributes = new FastVector(5);
    fvWekaAttributes.addElement(Attribute1);
    fvWekaAttributes.addElement(Attribute2);
    fvWekaAttributes.addElement(Attribute3);
    fvWekaAttributes.addElement(Attribute4);
    fvWekaAttributes.addElement(ClassAttribute);

    // Create an empty training set
    int capacity = listVector.size() + 7;
    Instances isTrainingSet = new Instances("isCpG", fvWekaAttributes, capacity);

    // Set class index
    isTrainingSet.setClassIndex(4);

    // Create the instances from the file with vectors
    for (int i = 0; i < listVector.size(); i++) {
        Instance instance = new Instance(5);
        instance.setValue((Attribute) fvWekaAttributes.elementAt(0), listVector.get(i).get(0));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(1), listVector.get(i).get(1));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(2), listVector.get(i).get(2));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(3), listVector.get(i).get(3));
        instance.setValue((Attribute) fvWekaAttributes.elementAt(4), nameClass);

        //add the instance in training set
        isTrainingSet.add(instance);//from w w  w  . ja  v a  2 s  .  c  o  m

    }
    System.out.println(isTrainingSet);
    return isTrainingSet;
}

From source file:gr.iti.mklab.visual.quantization.SimpleKMeansWithOutput.java

License:Open Source License

/**
 * Generates a clusterer. Has to initialize all fields of the clusterer that are not being set via
 * options./*from  w w  w.  j av  a 2 s .  com*/
 * 
 * @param data
 *            set of instances serving as training data
 * @throws Exception
 *             if the clusterer has not been generated successfully
 */
@Override
public void buildClusterer(Instances data) throws Exception {

    // can clusterer handle the data?
    getCapabilities().testWithFail(data);

    m_Iterations = 0;

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    Instances instances = new Instances(data);

    instances.setClassIndex(-1);
    if (!m_dontReplaceMissing) {
        m_ReplaceMissingFilter.setInputFormat(instances);
        instances = Filter.useFilter(instances, m_ReplaceMissingFilter);
    }

    m_FullMissingCounts = new int[instances.numAttributes()];
    if (m_displayStdDevs) {
        m_FullStdDevs = new double[instances.numAttributes()];
    }
    m_FullNominalCounts = new int[instances.numAttributes()][0];

    m_FullMeansOrMediansOrModes = moveCentroid(0, instances, false, false);
    for (int i = 0; i < instances.numAttributes(); i++) {
        m_FullMissingCounts[i] = instances.attributeStats(i).missingCount;
        if (instances.attribute(i).isNumeric()) {
            if (m_displayStdDevs) {
                m_FullStdDevs[i] = Math.sqrt(instances.variance(i));
            }
            if (m_FullMissingCounts[i] == instances.numInstances()) {
                m_FullMeansOrMediansOrModes[i] = Double.NaN; // mark missing as mean
            }
        } else {
            m_FullNominalCounts[i] = instances.attributeStats(i).nominalCounts;
            if (m_FullMissingCounts[i] > m_FullNominalCounts[i][Utils.maxIndex(m_FullNominalCounts[i])]) {
                m_FullMeansOrMediansOrModes[i] = -1; // mark missing as most common
                // value
            }
        }
    }

    m_ClusterCentroids = new Instances(instances, m_NumClusters);
    int[] clusterAssignments = new int[instances.numInstances()];

    if (m_PreserveOrder)
        m_Assignments = clusterAssignments;

    m_DistanceFunction.setInstances(instances);

    Random RandomO = new Random(getSeed());
    int instIndex;
    HashMap initC = new HashMap();
    DecisionTableHashKey hk = null;

    Instances initInstances = null;
    if (m_PreserveOrder)
        initInstances = new Instances(instances);
    else
        initInstances = instances;

    if (m_initializeWithKMeansPlusPlus) {
        kMeansPlusPlusInit(initInstances);
    } else {
        for (int j = initInstances.numInstances() - 1; j >= 0; j--) {
            instIndex = RandomO.nextInt(j + 1);
            hk = new DecisionTableHashKey(initInstances.instance(instIndex), initInstances.numAttributes(),
                    true);
            if (!initC.containsKey(hk)) {
                m_ClusterCentroids.add(initInstances.instance(instIndex));
                initC.put(hk, null);
            }
            initInstances.swap(j, instIndex);

            if (m_ClusterCentroids.numInstances() == m_NumClusters) {
                break;
            }
        }
    }

    m_NumClusters = m_ClusterCentroids.numInstances();

    // removing reference
    initInstances = null;

    int i;
    boolean converged = false;
    int emptyClusterCount;
    Instances[] tempI = new Instances[m_NumClusters];
    m_squaredErrors = new double[m_NumClusters];
    m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
    m_ClusterMissingCounts = new int[m_NumClusters][instances.numAttributes()];
    startExecutorPool();

    long start = System.currentTimeMillis();
    while (!converged) {
        emptyClusterCount = 0;
        m_Iterations++;
        converged = true;
        System.out.print(new Date() + ": " + "Iter " + m_Iterations + " ");
        if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) {
            for (i = 0; i < instances.numInstances(); i++) {
                Instance toCluster = instances.instance(i);
                int newC = clusterProcessedInstance(toCluster, true, true);
                if (newC != clusterAssignments[i]) {
                    converged = false;
                }
                clusterAssignments[i] = newC;
            }
        } else {
            converged = launchAssignToClusters(instances, clusterAssignments);
        }

        // update centroids
        m_ClusterCentroids = new Instances(instances, m_NumClusters);
        for (i = 0; i < m_NumClusters; i++) {
            tempI[i] = new Instances(instances, 0);
        }
        for (i = 0; i < instances.numInstances(); i++) {
            tempI[clusterAssignments[i]].add(instances.instance(i));
        }
        if (m_executionSlots <= 1 || instances.numInstances() < 2 * m_executionSlots) {
            for (i = 0; i < m_NumClusters; i++) {
                if (tempI[i].numInstances() == 0) {
                    // empty cluster
                    emptyClusterCount++;
                } else {
                    moveCentroid(i, tempI[i], true, true);
                }
            }
        } else {
            emptyClusterCount = launchMoveCentroids(tempI);
        }

        if (m_Iterations == m_MaxIterations)
            converged = true;

        if (emptyClusterCount > 0) {
            m_NumClusters -= emptyClusterCount;
            if (converged) {
                Instances[] t = new Instances[m_NumClusters];
                int index = 0;
                for (int k = 0; k < tempI.length; k++) {
                    if (tempI[k].numInstances() > 0) {
                        t[index++] = tempI[k];
                    }
                }
                tempI = t;
            } else {
                tempI = new Instances[m_NumClusters];
            }
        }

        if (!converged) {
            m_ClusterNominalCounts = new int[m_NumClusters][instances.numAttributes()][0];
        }
        System.out.println("Sum of within cluster distances: " + Utils.sum(m_squaredErrors));
        // reset erros to zero
        m_squaredErrors = new double[m_NumClusters];
    }
    long end = System.currentTimeMillis();
    System.out.println("\nClustering completed in " + (end - start) + " ms and converged in " + m_Iterations
            + " iterations");

    // calculate errors
    if (!m_FastDistanceCalc) {
        for (i = 0; i < instances.numInstances(); i++) {
            clusterProcessedInstance(instances.instance(i), true, false);
        }
    }

    if (m_displayStdDevs) {
        m_ClusterStdDevs = new Instances(instances, m_NumClusters);
    }
    m_ClusterSizes = new int[m_NumClusters];
    for (i = 0; i < m_NumClusters; i++) {
        if (m_displayStdDevs) {
            double[] vals2 = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); j++) {
                if (instances.attribute(j).isNumeric()) {
                    vals2[j] = Math.sqrt(tempI[i].variance(j));
                } else {
                    vals2[j] = Utils.missingValue();
                }
            }
            m_ClusterStdDevs.add(new DenseInstance(1.0, vals2));
        }
        m_ClusterSizes[i] = tempI[i].numInstances();
    }

    m_executorPool.shutdown();
}

From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java

License:Apache License

/**
 * Converts an input space point to a Weka instance.
 * @param point//  w ww.  j  a  v  a2s  .  com
 * @return 
 */
public static Instance convertPointToInstance(InputSpacePoint point, OutputSpacePoint outputPoint) {
    Instance inst = new Instance(point.numberDimensions() + outputPoint.numberDimensions());
    int index = 0;
    for (String k : point.getKeysAsCollection()) {
        Attribute att = new Attribute(k, index++);
        inst.setValue(att, point.getValue(k));
    }
    for (Entry<String, Double> e : outputPoint.getOutputPoints().entrySet()) {
        if (e.getValue() == null) {
            inst.setMissing(index++);
        } else {
            Attribute att = new Attribute(e.getKey(), index++);
            inst.setValue(att, e.getValue());
        }
    }

    //assign instance to dataset
    FastVector att = new FastVector(point.numberDimensions() + 1);
    for (String s : point.getKeysAsCollection())
        att.addElement(new Attribute(s, index++));
    for (String k : outputPoint.getOutputPoints().keySet()) {
        att.addElement(new Attribute(k, index++));
    }

    Instances dataset = new Instances("instances", att, point.numberDimensions() + 1);
    dataset.setClassIndex(dataset.numAttributes() - 1);
    inst.setDataset(dataset);
    return inst;
}

From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java

License:Apache License

public static Instance convertPointToInstance(InputSpacePoint point) {
    Instance inst = new Instance(point.numberDimensions() + 1);
    int index = 0;
    for (String k : point.getKeysAsCollection()) {
        Attribute att = new Attribute(k, index++);
        inst.setValue(att, point.getValue(k));
    }/*from  ww  w.j  a v  a 2  s. com*/
    inst.setMissing(index);

    //assign instance to dataset
    FastVector att = new FastVector(point.numberDimensions() + 1);
    for (String s : point.getKeysAsCollection())
        att.addElement(new Attribute(s, index++));
    att.addElement(new Attribute("objective", index++));

    Instances dataset = new Instances("instances", att, point.numberDimensions() + 1);
    dataset.setClassIndex(dataset.numAttributes() - 1);
    inst.setDataset(dataset);
    return inst;
}

From source file:gr.ntua.ece.cslab.panic.core.models.AbstractWekaModel.java

License:Apache License

/**
 * Creates a new dataset out of a OutputSpacePoint list.
 * @param points/*from ww  w.ja va  2s.  c o m*/
 * @return 
 */
protected static Instances getInstances(List<OutputSpacePoint> points) {
    OutputSpacePoint first = points.get(0);
    FastVector att = new FastVector(first.getInputSpacePoint().numberDimensions() + first.numberDimensions());
    int index = 0;
    for (String s : first.getInputSpacePoint().getKeysAsCollection())
        att.addElement(new Attribute(s, index++));

    for (String s : first.getOutputPoints().keySet())
        att.addElement(new Attribute(s, index++));

    Instances instances = new Instances("instances", att,
            first.getInputSpacePoint().numberDimensions() + first.numberDimensions());
    for (OutputSpacePoint p : points) {
        Instance i = convertPointToInstance(p.getInputSpacePoint(), p);
        instances.add(i);
        //System.out.println(i);
    }
    instances.setClassIndex(first.getInputSpacePoint().numberDimensions());
    return instances;
}

From source file:gr.uoc.nlp.opinion.analysis.suggestion.AnalyzeSuggestions.java

private Instances retriveTrainSet() {
    System.out.println("Retrieving dataset from Database..");

    InstanceQuery query;/*from w  w  w .jav a 2 s.  c  o m*/

    try {
        //initialize database, weka api
        query = new InstanceQuery();

        //set database attributes, weka apit
        query.setDatabaseURL(this.connection.getJdbcUrl());
        query.setUsername(this.connection.getUsername());
        query.setPassword(this.connection.getPassword());
        query.setQuery(this.queryTrainset());

        //retrieve trainset
        Instances data = query.retrieveInstances();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("Done retrieving dataset from Database!");

        return data;
    } catch (Exception ex) {
        System.err.println("Abort!");
        Logger.getLogger(AnalyzeArguments.class.getName()).log(Level.SEVERE, null, ex);
    }

    return null;
}

From source file:gr.uoc.nlp.opinion.analysis.suggestion.AnalyzeSuggestions.java

/**
 *
 * @param classifier//from   ww w  . j  av a 2 s  .c  o  m
 * @param unclassified
 * @return
 */
public Instances classify(Classifier classifier, Instances unclassified) {

    unclassified.setClassIndex(unclassified.numAttributes() - 1);

    //new set wich will contain classifies instances
    Instances classified = new Instances(unclassified);

    double clsLabel;
    try {
        for (int i = 0; i < unclassified.numInstances(); i++) {
            //for each unclassifies, classify
            clsLabel = classifier.classifyInstance(unclassified.instance(i));
            //append result to final set
            classified.instance(i).setClassValue(clsLabel);
        }

    } catch (Exception ex) {
        Logger.getLogger(AnalyzeArguments.class.getName()).log(Level.SEVERE, null, ex);
    }

    return classified;
}