Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:svmal.SVMStrategymulti.java

public static Instances PatternsToInstances2(Pattern[] patts, int positiveLabel) {
    Instances result = new Instances(patts[0].dataset(), 0, 0);
    int classIndex = result.classIndex();
    for (Pattern orig : patts) {
        double[] vals = orig.toDoubleArray();
        Instance2 inst2;/*from   w ww.  jav  a2  s. c  o  m*/
        if (vals[classIndex] == positiveLabel)
            vals[classIndex] = 1;
        else
            vals[classIndex] = 0;
        inst2 = new Instance2(orig.weight(), vals);
        inst2.setIndex(orig.id());
        inst2.setDataset(result);
        result.add(inst2);
    }
    return result;
}

From source file:tclass.WekaBridge.java

License:Open Source License

/** 
 *  Makes an instances objects. //from   w ww  . jav a  2  s . c  o  m
 */

public static Instances makeInstances(ClassStreamEventsVecI csevi, ClassDescVecI cdvi, EventDescVecI edvi,
        int evPos, boolean ignoreClasses, boolean ignoreTime) throws Exception {
    // System.out.println("Asked to ignore classes."); 

    Instances retval;
    // Ok, first convert attributes. 
    StreamEventsVecI sevi = csevi.getStreamEventsVec();
    ClassificationVecI cvi = csevi.getClassVec();
    // System.out.println("Event Desc Vec = " + edvi); 
    int numAtts = edvi.elAt(evPos).numParams();
    if (ignoreTime) {
        numAtts = numAtts - 1;
    }
    FastVector atts = makeAttVector(edvi.elAt(evPos), ignoreTime);
    if (!ignoreClasses) {
        int size = cdvi.size();
        FastVector classes = new FastVector(size);
        for (int i = 0; i < size; i++) {
            classes.addElement(cdvi.getClassLabel(i));
        }
        atts.addElement(new Attribute("class", classes));
    }
    int size = csevi.size();
    retval = new Instances(edvi.elName(evPos), atts, size);
    if (!ignoreClasses) {
        retval.setClassIndex(numAtts);
    } else {
        retval.setClassIndex(-1);
    }
    for (int i = 0; i < size; i++) {
        // Get events of this type: 
        EventVecI evi = sevi.elAt(i).getEvents(evPos);
        int numEvents = evi.size();
        for (int j = 0; j < numEvents; j++) {
            // System.out.println("Adding event " + j + " of stream " + i); 
            Instance thisInst = new DenseInstance(atts.size());
            thisInst.setDataset(retval);
            EventI thisEvent = evi.elAt(j);

            for (int k = (ignoreTime ? 1 : 0); k < edvi.elAt(evPos).numParams(); k++) {
                thisInst.setValue(k - (ignoreTime ? 1 : 0), thisEvent.valOf(k));
            }
            if (!ignoreClasses) {
                thisInst.setValue(numAtts, cdvi.getClassLabel(cvi.elAt(i).getRealClass()));
            }
            retval.add(thisInst);
        }
    }
    return retval;
}

From source file:tclass.WekaBridge.java

License:Open Source License

public static Instances makeInstances(ClassStreamAttValVecI csavvi, String name) throws Exception {
    StreamAttValVecI origData = csavvi.getStreamAttValVec();
    AttDescVecI format = origData.getDescription();
    ClassificationVecI classes = csavvi.getClassVec();
    ClassDescVecI classInfo = classes.getClassDescVec();
    FastVector instanceDesc = makeAttVector(format, classInfo);
    int numInstances = origData.size();
    int numAtts = format.size();
    Instances retval = new Instances(name, instanceDesc, numInstances);
    retval.setClassIndex(numAtts); // Set class to last attribute. 

    for (int i = 0; i < numInstances; i++) {
        Instance thisInst = new DenseInstance(numAtts + 1); // To include the class.  
        thisInst.setDataset(retval);//from w  w  w.j  a  v a  2s  .c  o  m
        StreamAttValI thisStream = origData.elAt(i);
        for (int j = 0; j < numAtts; j++) {
            thisInst.setValue(j, thisStream.getAtt(j));
        }
        thisInst.setValue(numAtts, classInfo.getClassLabel(classes.elAt(i).getRealClass()));
        retval.add(thisInst);
    }
    return retval;
}

From source file:themeextractor.main.MauiModelBuilder.java

License:Open Source License

/**
 * Builds the model from the training data
 *//* w  ww.ja  v  a 2  s  .  c  o  m*/
public void buildModel(HashSet<String> fileNames) throws Exception {

    // Check whether there is actually any data
    if (fileNames.size() == 0) {
        throw new Exception("Couldn't find any data in " + inputDirectoryName);
    }

    System.err.println("-- Building the model... ");

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("document", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    // Build model
    mauiFilter = new MauiFilter();

    mauiFilter.setDebug(debugMode);
    mauiFilter.setMaxPhraseLength(maxPhraseLength);
    mauiFilter.setMinPhraseLength(minPhraseLength);
    mauiFilter.setMinNumOccur(minNumOccur);
    mauiFilter.setStemmer(stemmer);
    mauiFilter.setDocumentLanguage(documentLanguage);
    mauiFilter.setVocabularyName(vocabularyName);
    mauiFilter.setVocabularyFormat(vocabularyFormat);
    mauiFilter.setStopwords(stopwords);

    if (wikipedia != null) {
        mauiFilter.setWikipedia(wikipedia);
    } else if (wikipediaServer.equals("localhost") && wikipediaDatabase.equals("database")) {
        mauiFilter.setWikipedia(wikipedia);
    } else {
        mauiFilter.setWikipedia(wikipediaServer, wikipediaDatabase, cacheWikipediaData, wikipediaDataDirectory);
    }

    if (classifier != null) {
        mauiFilter.setClassifier(classifier);
    }

    mauiFilter.setInputFormat(data);

    // set features configurations
    mauiFilter.setBasicFeatures(useBasicFeatures);
    mauiFilter.setKeyphrasenessFeature(useKeyphrasenessFeature);
    mauiFilter.setFrequencyFeatures(useFrequencyFeatures);
    mauiFilter.setPositionsFeatures(usePositionsFeatures);
    mauiFilter.setLengthFeature(useLengthFeature);
    mauiFilter.setThesaurusFeatures(useNodeDegreeFeature);
    mauiFilter.setBasicWikipediaFeatures(useBasicWikipediaFeatures);
    mauiFilter.setAllWikipediaFeatures(useAllWikipediaFeatures);
    mauiFilter.setThesaurusFeatures(useNodeDegreeFeature);

    mauiFilter.setClassifier(classifier);

    mauiFilter.setContextSize(contextSize);
    mauiFilter.setMinKeyphraseness(minKeyphraseness);
    mauiFilter.setMinSenseProbability(minSenseProbability);

    if (!vocabularyName.equals("none") && !vocabularyName.equals("wikipedia")) {
        loadThesaurus(stemmer, stopwords, vocabularyDirectory);
        mauiFilter.setVocabulary(vocabulary);
    }

    System.err.println("-- Reading the input documents... ");

    for (String fileName : fileNames) {

        double[] newInst = new double[3];

        newInst[0] = (double) data.attribute(0).addStringValue(fileName);

        File documentTextFile = new File(inputDirectoryName + "/" + fileName + ".txt");
        File documentTopicsFile = new File(inputDirectoryName + "/" + fileName + ".key");

        try {

            String documentText;
            if (!documentEncoding.equals("default")) {
                documentText = FileUtils.readFileToString(documentTextFile, documentEncoding);
            } else {
                documentText = FileUtils.readFileToString(documentTextFile);
            }

            // Adding the text of the document to the instance
            newInst[1] = (double) data.attribute(1).addStringValue(documentText);

        } catch (Exception e) {

            System.err.println("Problem with reading " + documentTextFile);
            e.printStackTrace();
            newInst[1] = Instance.missingValue();
        }

        try {

            String documentTopics;
            if (!documentEncoding.equals("default")) {
                documentTopics = FileUtils.readFileToString(documentTopicsFile, documentEncoding);
            } else {
                documentTopics = FileUtils.readFileToString(documentTopicsFile);
            }

            // Adding the topics to the file
            newInst[2] = (double) data.attribute(2).addStringValue(documentTopics);

        } catch (Exception e) {

            System.err.println("Problem with reading " + documentTopicsFile);
            e.printStackTrace();
            newInst[2] = Instance.missingValue();
        }

        data.add(new Instance(1.0, newInst));

        mauiFilter.input(data.instance(0));
        data = data.stringFreeStructure();
    }
    mauiFilter.batchFinished();

    while ((mauiFilter.output()) != null) {
    }
    ;
}

From source file:themeextractor.main.MauiTopicExtractor.java

License:Open Source License

/**
 * Builds the model from the files//from w  ww . j  a  v  a 2  s  .  com
 */
public void extractKeyphrases(HashSet<String> fileNames) throws Exception {

    // Check whether there is actually any data
    if (fileNames.size() == 0) {
        throw new Exception("Couldn't find any data in " + inputDirectoryName);
    }

    mauiFilter.setVocabularyName(vocabularyName);
    mauiFilter.setVocabularyFormat(vocabularyFormat);
    mauiFilter.setDocumentLanguage(documentLanguage);
    mauiFilter.setStemmer(stemmer);
    mauiFilter.setStopwords(stopwords);
    if (wikipedia != null) {
        mauiFilter.setWikipedia(wikipedia);
    } else if (wikipediaServer.equals("localhost") && wikipediaDatabase.equals("database")) {
        mauiFilter.setWikipedia(wikipedia);
    } else {
        mauiFilter.setWikipedia(wikipediaServer, wikipediaDatabase, cacheWikipediaData, wikipediaDataDirectory);
    }
    if (!vocabularyName.equals("none") && !vocabularyName.equals("wikipedia")) {
        loadThesaurus(stemmer, stopwords, vocabularyDirectory);
        mauiFilter.setVocabulary(vocabulary);
    }

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("doc", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    System.err.println("-- Extracting keyphrases... ");

    Vector<Double> correctStatistics = new Vector<Double>();
    Vector<Double> precisionStatistics = new Vector<Double>();
    Vector<Double> recallStatistics = new Vector<Double>();

    for (String fileName : fileNames) {

        double[] newInst = new double[3];

        newInst[0] = (double) data.attribute(0).addStringValue(fileName);
        ;

        File documentTextFile = new File(inputDirectoryName + "/" + fileName + ".txt");
        File documentTopicsFile = new File(inputDirectoryName + "/" + fileName + ".key");

        try {

            String documentText;
            if (!documentEncoding.equals("default")) {
                documentText = FileUtils.readFileToString(documentTextFile, documentEncoding);
            } else {
                documentText = FileUtils.readFileToString(documentTextFile);
            }

            // Adding the text of the document to the instance
            newInst[1] = (double) data.attribute(1).addStringValue(documentText);

        } catch (Exception e) {
            System.err.println("Problem with reading " + documentTextFile);
            e.printStackTrace();
            newInst[1] = Instance.missingValue();
        }

        try {

            String documentTopics;
            if (!documentEncoding.equals("default")) {
                documentTopics = FileUtils.readFileToString(documentTopicsFile, documentEncoding);
            } else {
                documentTopics = FileUtils.readFileToString(documentTopicsFile);
            }

            // Adding the topics to the file
            newInst[2] = (double) data.attribute(2).addStringValue(documentTopics);

        } catch (Exception e) {
            if (debugMode) {
                System.err.println("No existing topics for " + documentTextFile);
            }
            newInst[2] = Instance.missingValue();
        }

        data.add(new Instance(1.0, newInst));

        mauiFilter.input(data.instance(0));

        data = data.stringFreeStructure();
        if (debugMode) {
            System.err.println("-- Processing document: " + fileName);
        }
        Instance[] topRankedInstances = new Instance[topicsPerDocument];
        Instance inst;

        // Iterating over all extracted keyphrases (inst)
        while ((inst = mauiFilter.output()) != null) {

            int index = (int) inst.value(mauiFilter.getRankIndex()) - 1;

            if (index < topicsPerDocument) {
                topRankedInstances[index] = inst;
            }
        }

        if (debugMode) {
            System.err.println("-- Keyphrases and feature values:");
        }
        FileOutputStream out = null;
        PrintWriter printer = null;

        if (!documentTopicsFile.exists()) {
            out = new FileOutputStream(documentTopicsFile);
            if (!documentEncoding.equals("default")) {
                printer = new PrintWriter(new OutputStreamWriter(out, documentEncoding));
            } else {
                printer = new PrintWriter(out);
            }
        }

        double numExtracted = 0, numCorrect = 0;
        wikipedia = mauiFilter.getWikipedia();

        HashMap<Article, Integer> topics = null;

        if (printGraph) {
            topics = new HashMap<Article, Integer>();
        }

        int p = 0;
        String root = "";
        for (int i = 0; i < topicsPerDocument; i++) {
            if (topRankedInstances[i] != null) {
                if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) {
                    numExtracted += 1.0;
                }
                if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) {
                    numCorrect += 1.0;
                }
                if (printer != null) {
                    String topic = topRankedInstances[i].stringValue(mauiFilter.getOutputFormIndex());
                    printer.print(topic);

                    if (printGraph) {

                        Article article = wikipedia.getArticleByTitle(topic);
                        if (article == null) {
                            article = wikipedia.getMostLikelyArticle(topic, new CaseFolder());
                        }
                        if (article != null) {
                            if (root == "") {
                                root = article.getTitle();
                            }
                            topics.put(article, new Integer(p));
                        } else {
                            if (debugMode) {
                                System.err.println(
                                        "Couldn't find article for " + topic + " in " + documentTopicsFile);
                            }
                        }
                        p++;
                    }
                    if (additionalInfo) {
                        printer.print("\t");
                        printer.print(topRankedInstances[i].stringValue(mauiFilter.getNormalizedFormIndex()));
                        printer.print("\t");
                        printer.print(Utils.doubleToString(
                                topRankedInstances[i].value(mauiFilter.getProbabilityIndex()), 4));
                    }
                    printer.println();
                }
                if (debugMode) {
                    System.err.println(topRankedInstances[i]);
                }
            }
        }

        if (printGraph) {
            String graphFile = documentTopicsFile.getAbsolutePath().replace(".key", ".gv");
            computeGraph(topics, root, graphFile);
        }
        if (numExtracted > 0) {
            if (debugMode) {
                System.err.println("-- " + numCorrect + " correct");
            }
            double totalCorrect = mauiFilter.getTotalCorrect();
            correctStatistics.addElement(new Double(numCorrect));
            precisionStatistics.addElement(new Double(numCorrect / numExtracted));
            recallStatistics.addElement(new Double(numCorrect / totalCorrect));

        }
        if (printer != null) {
            printer.flush();
            printer.close();
            out.close();
        }
    }

    if (correctStatistics.size() != 0) {

        double[] st = new double[correctStatistics.size()];
        for (int i = 0; i < correctStatistics.size(); i++) {
            st[i] = correctStatistics.elementAt(i).doubleValue();
        }
        double avg = Utils.mean(st);
        double stdDev = Math.sqrt(Utils.variance(st));

        if (correctStatistics.size() == 1) {
            System.err.println("\n-- Evaluation results based on 1 document:");

        } else {
            System.err.println("\n-- Evaluation results based on " + correctStatistics.size() + " documents:");
        }
        System.err.println("Avg. number of correct keyphrases per document: " + Utils.doubleToString(avg, 2)
                + " +/- " + Utils.doubleToString(stdDev, 2));

        st = new double[precisionStatistics.size()];
        for (int i = 0; i < precisionStatistics.size(); i++) {
            st[i] = precisionStatistics.elementAt(i).doubleValue();
        }
        double avgPrecision = Utils.mean(st);
        double stdDevPrecision = Math.sqrt(Utils.variance(st));

        System.err.println("Precision: " + Utils.doubleToString(avgPrecision * 100, 2) + " +/- "
                + Utils.doubleToString(stdDevPrecision * 100, 2));

        st = new double[recallStatistics.size()];
        for (int i = 0; i < recallStatistics.size(); i++) {
            st[i] = recallStatistics.elementAt(i).doubleValue();
        }
        double avgRecall = Utils.mean(st);
        double stdDevRecall = Math.sqrt(Utils.variance(st));

        System.err.println("Recall: " + Utils.doubleToString(avgRecall * 100, 2) + " +/- "
                + Utils.doubleToString(stdDevRecall * 100, 2));

        double fMeasure = 2 * avgRecall * avgPrecision / (avgRecall + avgPrecision);
        System.err.println("F-Measure: " + Utils.doubleToString(fMeasure * 100, 2));

        System.err.println("");
    }
    mauiFilter.batchFinished();
}

From source file:tml.vectorspace.factorisation.MultiDimensionalScalingNR.java

License:Apache License

public Instances scale(Instances instances) {
    // approximation error
    error = 0.0;/*from www  .  jav a2  s  . c o m*/
    double error_previous;

    // number of points
    int n = instances.numInstances();

    // distance between points in the p-dimensional layout
    d = new Matrix(n, n);
    Matrix d_previous;

    // dissimilarity between vectors
    d_hat = new Matrix(n, n);

    // points instances
    FastVector attributes = new FastVector(p);
    attributes.addElement(new Attribute("X"));
    attributes.addElement(new Attribute("Y"));
    Instances x = new Instances("MDS", attributes, instances.numInstances());
    Instances x_previous;

    // initialise points sequence
    ArrayList<Integer> kseq = new ArrayList<Integer>();
    for (int k = 0; k < n; k++) {
        kseq.add(k);
    }

    // initialise x
    if (initialX != null) {
        x = new Instances(initialX);

    } else {
        Random rand = new Random();
        for (int k = 0; k < n; k++) {
            Instance x_inst = new Instance(p);
            x_inst.setValue(X, rand.nextDouble() - rand.nextInt(1));
            x_inst.setValue(Y, rand.nextDouble() - rand.nextInt(1));
            x.add(x_inst);
        }
    }

    // calculate d
    for (int j = 0; j < n; j++) {
        for (int i = 0; i < j; i++) {
            double distance = this.distance(x.instance(i), x.instance(j));
            d.set(i, j, distance);
            d.set(j, i, distance);

            double dissimilarity = this.dissimilarity(instances.instance(i), instances.instance(j));
            d_hat.set(i, j, dissimilarity);
            d_hat.set(j, i, dissimilarity);

            if (d_hat.get(i, j) != 0) {
                error += Math.pow(d.get(i, j) - d_hat.get(i, j), 2) / Math.pow(d_hat.get(i, j), 2);
            }
        }
    }

    // record previous results
    error_previous = error;
    d_previous = d.copy();
    x_previous = new Instances(x);

    // start of Newton-Raphson method
    logger.info("Starting Newton-Raphson MDS.");
    for (int iter = 0; iter < maxIterations; iter++) {
        // randomise points sequence to ensure faster convergence
        Collections.shuffle(kseq);
        for (int k : kseq) {
            Matrix gradient = new Matrix(p, 1);
            Matrix hessian = new Matrix(p, p);

            // calculate gradient vector
            for (int a = 0; a < p; a++) {
                double sum = 0;
                for (int l = 0; l < n; l++) {
                    if (k != l) {
                        if (d.get(k, l) != 0 && d_hat.get(k, l) != 0) {
                            sum += ((d.get(k, l) - d_hat.get(k, l))
                                    / (d.get(k, l) * Math.pow(d_hat.get(k, l), 2)))
                                    * (x.instance(k).value(a) - x.instance(l).value(a));
                        }
                    }
                }
                gradient.set(a, 0, 2 * sum);
            }

            // calculate hessian matrix
            for (int a = 0; a < p; a++) {
                for (int b = 0; b < p; b++) {
                    double sum = 0.0;
                    if (a != b) {
                        for (int l = 0; l < n; l++) {
                            if (k != l) {
                                if (d.get(k, l) != 0 && d_hat.get(k, l) != 0) {
                                    sum += ((x.instance(k).value(a) - x.instance(l).value(a))
                                            * (x.instance(k).value(b) - x.instance(l).value(b)))
                                            / (Math.pow(d.get(k, l), 3) * d_hat.get(k, l));
                                }
                            }
                        }
                        sum = 2 * sum;
                    } else {
                        for (int l = 0; l < n; l++) {
                            if (k != l) {
                                if (d_hat.get(k, l) != 0 && d.get(k, l) != 0) {
                                    sum += (1.0 / Math.pow(d_hat.get(k, l), 2)) - (Math.pow(d.get(k, l), 2)
                                            - Math.pow((x.instance(k).value(a) - x.instance(l).value(a)), 2))
                                            / (Math.pow(d.get(k, l), 3) * d_hat.get(k, l));
                                }
                            }
                        }
                        sum = 2 * sum;
                    }
                    hessian.set(a, b, sum);
                }
            }

            // update x
            Matrix x_k = new Matrix(x.instance(k).toDoubleArray(), p);
            Matrix x_k_tilda = x_k.minus(hessian.inverse().times(gradient));
            x.instance(k).setValue(X, x_k_tilda.get(X, 0));
            x.instance(k).setValue(Y, x_k_tilda.get(Y, 0));
        }

        // calculate d and error
        error = 0;
        for (int j = 0; j < n; j++) {
            for (int i = 0; i < j; i++) {
                double distance = this.distance(x.instance(i), x.instance(j));
                d.set(i, j, distance);
                d.set(j, i, distance);

                if (d_hat.get(i, j) != 0) {
                    error += Math.pow(d.get(i, j) - d_hat.get(i, j), 2) / Math.pow(d_hat.get(i, j), 2);
                }
            }
        }

        if (error < error_previous) {
            logger.debug(iter + ".\t error " + error);
            if (error_previous - error <= tolerence) {
                break;
            }

            error_previous = error;
            d_previous = d.copy();
            x_previous = new Instances(x);
        } else // invalidates last run
        {
            x = new Instances(x_previous);
            d = d_previous.copy();
        }
    }

    logger.info("Finished Newton-Raphson MDS.");
    return x;
}

From source file:tml.vectorspace.factorisation.PrincipalCoordinateAnalysis.java

License:Apache License

public Instances scale(Instances instances) {
    // number of points
    int n = instances.numInstances();

    // distance matrix
    Matrix d = new Matrix(n, n);
    Matrix G = new Matrix(n, n);

    // points instances
    FastVector attributes = new FastVector(p);
    attributes.addElement(new Attribute("X"));
    attributes.addElement(new Attribute("Y"));
    Instances x = new Instances("PCO", attributes, instances.numInstances());

    // calculate distance matrix
    for (int j = 0; j < n; j++) {
        for (int i = 0; i < j; i++) {
            double distance = this.distance(instances.instance(i), instances.instance(j));
            d.set(i, j, distance);/*from   w w  w  . ja va2 s .  c om*/
            d.set(j, i, distance);
        }
    }

    // create centered matrix G by centering the elements of A
    Matrix A = d.arrayTimes(d).times((double) -1 / 2);
    Matrix B = Matrix.identity(n, n).minus(new Matrix(n, n, 1).times((double) 1 / n));
    G = B.times(A).times(B);

    // eigenvalue decomposition
    EigenvalueDecomposition eig = G.eig();
    Matrix eigenvalues = eig.getD();
    Matrix eigenvectors = eig.getV();

    // output eigenvectors as the principal coordinate axes, and normalise 
    // them by dividing by the square root of their corresponding eigenvalue.
    for (int i = 0; i < n; i++) {
        Instance instance = new Instance(p);
        instance.setValue(X, eigenvectors.get(i, X)
                / Math.copySign(Math.sqrt(Math.abs(eigenvalues.get(X, X))), eigenvalues.get(X, X)));
        instance.setValue(Y, eigenvectors.get(i, Y)
                / Math.copySign(Math.sqrt(Math.abs(eigenvalues.get(Y, Y))), eigenvalues.get(Y, Y)));
        x.add(instance);
    }

    return x;
}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

public static Instances clusterInstances(Instances data) {
    XMeans xmeans = new XMeans();
    Remove filter = new Remove();
    Instances dataClusterer = null;// ww  w .j a v  a  2s  .c  om
    if (data == null) {
        throw new NullPointerException("Data is null at clusteredInstances method");
    }
    //Get the attributes from the data for creating the sampled_data object

    ArrayList<Attribute> attrList = new ArrayList<Attribute>();
    Enumeration attributes = data.enumerateAttributes();
    while (attributes.hasMoreElements()) {
        attrList.add((Attribute) attributes.nextElement());
    }

    Instances sampled_data = new Instances(data.relationName(), attrList, 0);
    data.setClassIndex(data.numAttributes() - 1);
    sampled_data.setClassIndex(data.numAttributes() - 1);
    filter.setAttributeIndices("" + (data.classIndex() + 1));
    data.remove(0);//In Wavelet Stream of MOA always the first element comes without class

    try {
        filter.setInputFormat(data);
        dataClusterer = Filter.useFilter(data, filter);
        String[] options = new String[4];
        options[0] = "-L"; // max. iterations
        options[1] = Integer.toString(noOfClassesInPool - 1);
        if (noOfClassesInPool > 2) {
            options[1] = Integer.toString(noOfClassesInPool - 1);
            xmeans.setMinNumClusters(noOfClassesInPool - 1);
        } else {
            options[1] = Integer.toString(noOfClassesInPool);
            xmeans.setMinNumClusters(noOfClassesInPool);
        }
        xmeans.setMaxNumClusters(data.numClasses() + 1);
        System.out.println("No of classes in the pool: " + noOfClassesInPool);
        xmeans.setUseKDTree(true);
        //xmeans.setOptions(options);
        xmeans.buildClusterer(dataClusterer);
        System.out.println("Xmeans\n:" + xmeans);
    } catch (Exception e) {
        e.printStackTrace();
    }
    //System.out.println("Assignments\n: " + assignments);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xmeans);
    try {
        eval.evaluateClusterer(data);
        int classesToClustersMap[] = eval.getClassesToClusters();
        //check the classes to cluster map
        int clusterNo = 0;
        for (int i = 0; i < data.size(); i++) {
            clusterNo = xmeans.clusterInstance(dataClusterer.get(i));
            //Check if the class value of instance and class value of cluster matches
            if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) {
                sampled_data.add(data.get(i));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ((Instances) sampled_data);
}

From source file:trainableSegmentation.FeatureStack.java

License:GNU General Public License

/**
 * Create the instances for the whole stack
 * /*from  w  w  w  . ja  v  a  2 s.c om*/
 * @param classes list of classes names
 * 
 * @return whole stack set of instances
 */
public Instances createInstances(ArrayList<String> classes) {
    if (Thread.currentThread().isInterrupted())
        return null;

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = 1; i <= wholeStack.getSize(); i++) {
        String attString = wholeStack.getSliceLabel(i);
        attributes.add(new Attribute(attString));
    }

    if (useNeighborhood())
        for (int i = 0; i < 8; i++) {
            IJ.log("Adding extra attribute original_neighbor_" + (i + 1) + "...");
            attributes.add(new Attribute(new String("original_neighbor_" + (i + 1))));
        }

    attributes.add(new Attribute("class", classes));

    Instances data = new Instances("segment", attributes, width * height);

    for (int y = 0; y < wholeStack.getHeight(); y++) {
        if (Thread.currentThread().isInterrupted())
            return null;
        IJ.showProgress(y, wholeStack.getHeight());
        for (int x = 0; x < wholeStack.getWidth(); x++) {
            data.add(createInstance(x, y, 0));
        }
    }
    // Set the index of the class attribute
    data.setClassIndex(attributes.size() - 1);
    IJ.showProgress(1.0);
    return data;
}

From source file:trainableSegmentation.Trainable_Segmentation.java

License:GNU General Public License

/**
 * Create training instances out of the user markings
 * @return set of instances/*from  w ww  .  j a va  2  s .co  m*/
 */
public Instances createTrainingInstances() {
    //IJ.log("create training instances: num of features = " + featureStack.getSize());

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = 1; i <= featureStack.getSize(); i++) {
        String attString = featureStack.getSliceLabel(i);
        attributes.add(new Attribute(attString));
    }

    final ArrayList<String> classes = new ArrayList<String>();

    int numOfInstances = 0;
    for (int i = 0; i < numOfClasses; i++) {
        // Do not add empty lists
        if (examples[i].size() > 0)
            classes.add(classLabels[i]);
        numOfInstances += examples[i].size();
    }

    attributes.add(new Attribute("class", classes));

    final Instances trainingData = new Instances("segment", attributes, numOfInstances);

    IJ.log("\nTraining input:");

    // For all classes
    for (int l = 0; l < numOfClasses; l++) {
        int nl = 0;
        // Read all lists of examples
        for (int j = 0; j < examples[l].size(); j++) {
            Roi r = examples[l].get(j);

            // For polygon rois we get the list of points
            if (r instanceof PolygonRoi && r.getType() != Roi.FREEROI) {
                if (r.getStrokeWidth() == 1) {
                    int[] x = r.getPolygon().xpoints;
                    int[] y = r.getPolygon().ypoints;
                    final int n = r.getPolygon().npoints;

                    for (int i = 0; i < n; i++) {
                        double[] values = new double[featureStack.getSize() + 1];
                        for (int z = 1; z <= featureStack.getSize(); z++)
                            values[z - 1] = featureStack.getProcessor(z).getPixelValue(x[i], y[i]);
                        values[featureStack.getSize()] = (double) l;
                        trainingData.add(new DenseInstance(1.0, values));
                        // increase number of instances for this class
                        nl++;
                    }
                } else // For thicker lines, include also neighbors
                {
                    final int width = (int) Math.round(r.getStrokeWidth());
                    FloatPolygon p = r.getFloatPolygon();
                    int n = p.npoints;

                    double x1, y1;
                    double x2 = p.xpoints[0] - (p.xpoints[1] - p.xpoints[0]);
                    double y2 = p.ypoints[0] - (p.ypoints[1] - p.ypoints[0]);
                    for (int i = 0; i < n; i++) {
                        x1 = x2;
                        y1 = y2;
                        x2 = p.xpoints[i];
                        y2 = p.ypoints[i];

                        double dx = x2 - x1;
                        double dy = y1 - y2;
                        double length = (float) Math.sqrt(dx * dx + dy * dy);
                        dx /= length;
                        dy /= length;
                        double x = x2 - dy * width / 2.0;
                        double y = y2 - dx * width / 2.0;

                        int n2 = width;
                        do {
                            if (x >= 0 && x < featureStack.getWidth() && y >= 0
                                    && y < featureStack.getHeight()) {
                                double[] values = new double[featureStack.getSize() + 1];
                                for (int z = 1; z <= featureStack.getSize(); z++)
                                    values[z - 1] = featureStack.getProcessor(z).getInterpolatedValue(x, y);
                                values[featureStack.getSize()] = (double) l;
                                trainingData.add(new DenseInstance(1.0, values));
                                // increase number of instances for this class
                                nl++;
                            }
                            x += dy;
                            y += dx;
                        } while (--n2 > 0);
                    }

                }
            } else // for the rest of rois we get ALL points inside the roi
            {
                final ShapeRoi shapeRoi = new ShapeRoi(r);
                final Rectangle rect = shapeRoi.getBounds();

                final int lastX = rect.x + rect.width;
                final int lastY = rect.y + rect.height;

                for (int x = rect.x; x < lastX; x++)
                    for (int y = rect.y; y < lastY; y++)
                        if (shapeRoi.contains(x, y)) {
                            double[] values = new double[featureStack.getSize() + 1];
                            for (int z = 1; z <= featureStack.getSize(); z++)
                                values[z - 1] = featureStack.getProcessor(z).getPixelValue(x, y);
                            values[featureStack.getSize()] = (double) l;
                            trainingData.add(new DenseInstance(1.0, values));
                            // increase number of instances for this class
                            nl++;
                        }
            }

        }

        IJ.log("# of pixels selected as " + classLabels[l] + ": " + nl);
    }

    return trainingData;
}