Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(String name, ArrayList<Attribute> attInfo, int capacity)

Source Link

Document

Creates an empty set of instances.

Usage

From source file:Bilbo.java

License:Open Source License

/**
 * Returns a training set for a particular iteration.
 * // ww  w . ja va  2s .  com
 * @param iteration the number of the iteration for the requested training set.
 * @return the training set for the supplied iteration number
 * @throws Exception if something goes wrong when generating a training set.
 */
@Override
protected synchronized Instances getTrainingSet(Instances p_data, int iteration) throws Exception {
    int bagSize = (int) (p_data.numInstances() * (m_BagSizePercent / 100.0));
    Instances bagData = null;
    Random r = new Random(m_Seed + iteration);

    // create the in-bag dataset
    if (m_CalcOutOfBag && p_data.classIndex() != -1) {
        m_inBag[iteration] = new boolean[p_data.numInstances()];
        bagData = p_data.resampleWithWeights(r, m_inBag[iteration], getRepresentCopiesUsingWeights());
    } else {
        bagData = p_data.resampleWithWeights(r, getRepresentCopiesUsingWeights());
        if (bagSize < p_data.numInstances()) {
            bagData.randomize(r);
            Instances newBagData = new Instances(bagData, 0, bagSize);
            bagData = newBagData;
        }
    }

    return bagData;
}

From source file:DocClassifier.java

public Instances createInstances(File[] files) {
    Instances instances = new Instances("Inst" + files.hashCode(), attrList, files.length);
    for (File file : files) {
        Instance inst = createInstance(file);
        inst.setDataset(instances);//from   www  . j a  va 2 s  . c o  m
        instances.add(inst);
        instances.setClass((Attribute) attrList.lastElement());
    }
    return instances;
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Set up the header for the PC->original space dataset
 *
 * @return the output format//from  w w  w.  j a v a  2s . c  om
 * @throws Exception if something goes wrong
 */
private Instances setOutputFormatOriginal() throws Exception {
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    for (int i = 0; i < m_numAttribs; i++) {
        String att = m_trainInstances.attribute(i).name();
        attributes.add(new Attribute(att));
    }

    if (m_hasClass) {
        attributes.add((Attribute) m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainHeader.relationName() + "->PC->original space", attributes,
            0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    return outputFormat;
}

From source file:PrincipalComponents.java

License:Open Source License

/**
 * Set the format for the transformed data
 *
 * @return a set of empty Instances (header only) in the new format
 * @throws Exception if the output format can't be set
 *//* w w  w.j a v a2 s  .co m*/
private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
        return null;
    }

    double cumulative = 0.0;
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = m_numAttribs - 1; i >= 0; i--) {
        StringBuffer attName = new StringBuffer();
        // build array of coefficients
        double[] coeff_mags = new double[m_numAttribs];
        for (int j = 0; j < m_numAttribs; j++) {
            coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);
        }
        int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;
        // this array contains the sorted indices of the coefficients
        int[] coeff_inds;
        if (m_numAttribs > 0) {
            // if m_maxAttrsInName > 0, sort coefficients by decreasing
            // magnitude
            coeff_inds = Utils.sort(coeff_mags);
        } else {
            // if m_maxAttrsInName <= 0, use all coeffs in original order
            coeff_inds = new int[m_numAttribs];
            for (int j = 0; j < m_numAttribs; j++) {
                coeff_inds[j] = j;
            }
        }
        // build final attName string
        for (int j = 0; j < num_attrs; j++) {
            double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];
            if (j > 0 && coeff_value >= 0) {
                attName.append("+");
            }
            attName.append(
                    Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name());
        }
        if (num_attrs < m_numAttribs) {
            attName.append("...");
        }

        attributes.add(new Attribute(attName.toString()));
        cumulative += m_eigenvalues[m_sortedEigens[i]];

        if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
            break;
        }
    }

    if (m_hasClass) {
        attributes.add((Attribute) m_trainHeader.classAttribute().copy());
    }

    Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components",
            attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
        outputFormat.setClassIndex(outputFormat.numAttributes() - 1);
    }

    m_outputNumAtts = outputFormat.numAttributes();
    return outputFormat;
}

From source file:TextDirectoryLoader.java

License:Open Source License

/**
 * Determines and returns (if possible) the structure (internally the 
 * header) of the data set as an empty set of instances.
 *
 * @return          the structure of the data set as an empty 
 *             set of Instances/*  w  w  w. j av a 2  s. com*/
 * @throws IOException    if an error occurs
 */
public Instances getStructure() throws IOException {
    if (getDirectory() == null) {
        throw new IOException("No directory/source has been specified");
    }

    // determine class labels, i.e., sub-dirs
    if (m_structure == null) {
        String directoryPath = getDirectory().getAbsolutePath();
        FastVector atts = new FastVector();
        FastVector classes = new FastVector();

        File dir = new File(directoryPath);
        String[] subdirs = dir.list();

        for (int i = 0; i < subdirs.length; i++) {
            File subdir = new File(directoryPath + File.separator + subdirs[i]);
            if (subdir.isDirectory())
                classes.addElement(subdirs[i]);
        }

        atts.addElement(new Attribute("text", (FastVector) null));
        if (m_OutputFilename)
            atts.addElement(new Attribute("filename", (FastVector) null));
        // make sure that the name of the class attribute is unlikely to 
        // clash with any attribute created via the StringToWordVector filter
        atts.addElement(new Attribute("@@class@@", classes));

        String relName = directoryPath.replaceAll("/", "_");
        relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
        m_structure = new Instances(relName, atts, 0);
        m_structure.setClassIndex(m_structure.numAttributes() - 1);
    }

    return m_structure;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set.//from   ww  w.  ja  va 2  s. co  m
 *
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (m_data == null) {
        throw new IOException("No source has been specified");
    }

    if (m_structure == null) {
        getStructure();
    }

    m_cumulativeStructure = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        m_cumulativeStructure.addElement(new Hashtable());
    }

    m_cumulativeInstances = new FastVector();
    FastVector current;

    for (int i = 0; i < m_data.length; i++) {
        current = getInstance(m_data[i]);

        m_cumulativeInstances.addElement(current);
    }

    FastVector atts = new FastVector(m_structure.numAttributes());
    for (int i = 0; i < m_structure.numAttributes(); i++) {
        String attname = m_structure.attribute(i).name();
        Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i));
        if (tempHash.size() == 0) {
            atts.addElement(new Attribute(attname));
        } else {
            if (m_StringAttributes.isInRange(i)) {
                atts.addElement(new Attribute(attname, (FastVector) null));
            } else {
                FastVector values = new FastVector(tempHash.size());
                // add dummy objects in order to make the FastVector's size == capacity
                for (int z = 0; z < tempHash.size(); z++) {
                    values.addElement("dummy");
                }
                Enumeration e = tempHash.keys();
                while (e.hasMoreElements()) {
                    Object ob = e.nextElement();
                    //     if (ob instanceof Double) {
                    int index = ((Integer) tempHash.get(ob)).intValue();
                    String s = ob.toString();
                    if (s.startsWith("'") || s.startsWith("\""))
                        s = s.substring(1, s.length() - 1);
                    values.setElementAt(new String(s), index);
                    //     }
                }
                atts.addElement(new Attribute(attname, values));
            }
        }
    }

    // make the instances
    String relationName;
    relationName = "ArrayData";
    Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size());

    for (int i = 0; i < m_cumulativeInstances.size(); i++) {
        current = ((FastVector) m_cumulativeInstances.elementAt(i));
        double[] vals = new double[dataSet.numAttributes()];
        for (int j = 0; j < current.size(); j++) {
            Object cval = current.elementAt(j);
            if (cval instanceof String) {
                if (((String) cval).compareTo(m_MissingValue) == 0) {
                    vals[j] = Instance.missingValue();
                } else {
                    if (dataSet.attribute(j).isString()) {
                        vals[j] = dataSet.attribute(j).addStringValue((String) cval);
                    } else if (dataSet.attribute(j).isNominal()) {
                        // find correct index
                        Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                        int index = ((Integer) lookup.get(cval)).intValue();
                        vals[j] = index;
                    } else {
                        throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!");
                    }
                }
            } else if (dataSet.attribute(j).isNominal()) {
                // find correct index
                Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j);
                int index = ((Integer) lookup.get(cval)).intValue();
                vals[j] = index;
            } else if (dataSet.attribute(j).isString()) {
                vals[j] = dataSet.attribute(j).addStringValue("" + cval);
            } else {
                vals[j] = ((Double) cval).doubleValue();
            }
        }
        dataSet.add(new Instance(1.0, vals));
    }
    m_structure = new Instances(dataSet, 0);
    m_cumulativeStructure = null; // conserve memory

    return dataSet;
}

From source file:ArrayLoader.java

License:Open Source License

/**
 * Assumes the first line of the file contains the attribute names.
 * Assumes all attributes are real (Reading the full data set with
 * getDataSet will establish the true structure).
 *
 *///ww  w .j  ava2s.c om
private void readHeader(String[] column) throws IOException {

    FastVector attribNames = new FastVector();

    // Assume first row of data are the column titles
    for (int i = 0; i < column.length; i++) {
        attribNames.addElement(new Attribute(column[i]));
    }

    m_structure = new Instances("DataArray", attribNames, 0);
}

From source file:FlexDMThread.java

License:Open Source License

public void run() {
    try {/*from  ww  w.j  a  v a 2 s . c  o m*/
        //Get the data from the source

        FlexDM.getMainData.acquire();
        Instances data = dataset.getSource().getDataSet();
        FlexDM.getMainData.release();

        //Set class attribute if undefined
        if (data.classIndex() == -1) {
            data.setClassIndex(data.numAttributes() - 1);
        }

        //Process hyperparameters for classifier
        String temp = "";
        for (int i = 0; i < classifier.getNumParams(); i++) {
            temp += classifier.getParameter(i).getName();
            temp += " ";
            if (classifier.getParameter(i).getValue() != null) {
                temp += classifier.getParameter(i).getValue();
                temp += " ";
            }
        }

        String[] options = weka.core.Utils.splitOptions(temp);

        //Print to console- experiment is starting
        if (temp.equals("")) { //no parameters
            temp = "results_no_parameters";
            try {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with no parameters");
            } catch (Exception e) {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with no parameters");
            }
        } else { //parameters
            try {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with parameters " + temp);
            } catch (Exception e) {
                System.out.println("STARTING CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with parameters " + temp);
            }
        }

        //Create classifier, setting parameters
        weka.classifiers.Classifier x = createObject(classifier.getName());
        x.setOptions(options);
        x.buildClassifier(data);

        //Process the test selection
        String[] tempTest = dataset.getTest().split("\\s");

        //Create evaluation object for training and testing classifiers
        Evaluation eval = new Evaluation(data);
        StringBuffer predictions = new StringBuffer();

        //Train and evaluate classifier
        if (tempTest[0].equals("testset")) { //specified test file
            //Build classifier
            x.buildClassifier(data);

            //Open test file, load data
            //DataSource testFile = new DataSource(dataset.getTest().substring(7).trim());
            // Instances testSet = testFile.getDataSet();
            FlexDM.getTestData.acquire();
            Instances testSet = dataset.getTestFile().getDataSet();
            FlexDM.getTestData.release();

            //Set class attribute if undefined
            if (testSet.classIndex() == -1) {
                testSet.setClassIndex(testSet.numAttributes() - 1);
            }

            //Evaluate model
            Object[] array = { predictions, new Range(), new Boolean(true) };
            eval.evaluateModel(x, testSet, array);
        } else if (tempTest[0].equals("xval")) { //Cross validation
            //Build classifier
            x.buildClassifier(data);

            //Cross validate
            eval.crossValidateModel(x, data, Integer.parseInt(tempTest[1]), new Random(1), predictions,
                    new Range(), true);
        } else if (tempTest[0].equals("leavexval")) { //Leave one out cross validation
            //Build classifier
            x.buildClassifier(data);

            //Cross validate
            eval.crossValidateModel(x, data, data.numInstances() - 1, new Random(1), predictions, new Range(),
                    true);
        } else if (tempTest[0].equals("percent")) { //Percentage split of single data set
            //Set training and test sizes from percentage
            int trainSize = (int) Math.round(data.numInstances() * Double.parseDouble(tempTest[1]));
            int testSize = data.numInstances() - trainSize;

            //Load specified data
            Instances train = new Instances(data, 0, trainSize);
            Instances testSet = new Instances(data, trainSize, testSize);

            //Build classifier
            x.buildClassifier(train);

            //Train and evaluate model
            Object[] array = { predictions, new Range(), new Boolean(true) };
            eval.evaluateModel(x, testSet, array);
        } else { //Evaluate on training data
            //Test and evaluate model
            Object[] array = { predictions, new Range(), new Boolean(true) };
            eval.evaluateModel(x, data, array);
        }

        //create datafile for results
        String filename = dataset.getDir() + "/" + classifier.getDirName() + "/" + temp + ".txt";
        PrintWriter writer = new PrintWriter(filename, "UTF-8");

        //Print classifier, dataset, parameters info to file
        try {
            writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName()
                    + "\n PARAMETERS: " + temp);
        } catch (Exception e) {
            writer.println("CLASSIFIER: " + classifier.getName() + "\n DATASET: " + dataset.getName()
                    + "\n PARAMETERS: " + temp);
        }

        //Add evaluation string to file
        writer.println(eval.toSummaryString());
        //Process result options
        if (checkResults("stats")) { //Classifier statistics
            writer.println(eval.toClassDetailsString());
        }
        if (checkResults("model")) { //The model
            writer.println(x.toString());
        }
        if (checkResults("matrix")) { //Confusion matrix
            writer.println(eval.toMatrixString());
        }
        if (checkResults("entropy")) { //Entropy statistics
            //Set options req'd to get the entropy stats
            String[] opt = new String[4];
            opt[0] = "-t";
            opt[1] = dataset.getName();
            opt[2] = "-k";
            opt[3] = "-v";

            //Evaluate model
            String entropy = Evaluation.evaluateModel(x, opt);

            //Grab the relevant info from the results, print to file
            entropy = entropy.substring(entropy.indexOf("=== Stratified cross-validation ===") + 35,
                    entropy.indexOf("=== Confusion Matrix ==="));
            writer.println("=== Entropy Statistics ===");
            writer.println(entropy);
        }
        if (checkResults("predictions")) { //The models predictions
            writer.println("=== Predictions ===\n");
            if (!dataset.getTest().contains("xval")) { //print header of predictions table if req'd
                writer.println(" inst#     actual  predicted error distribution ()");
            }
            writer.println(predictions.toString()); //print predictions to file
        }

        writer.close();

        //Summary file is semaphore controlled to ensure quality
        try { //get a permit
              //grab the summary file, write the classifiers details to it
            FlexDM.writeFile.acquire();
            PrintWriter p = new PrintWriter(new FileWriter(summary, true));
            if (temp.equals("results_no_parameters")) { //change output based on parameters
                temp = temp.substring(8);
            }

            //write percent correct, classifier name, dataset name to summary file
            p.write(dataset.getName() + ", " + classifier.getName() + ", " + temp + ", " + eval.correct() + ", "
                    + eval.incorrect() + ", " + eval.unclassified() + ", " + eval.pctCorrect() + ", "
                    + eval.pctIncorrect() + ", " + eval.pctUnclassified() + ", " + eval.kappa() + ", "
                    + eval.meanAbsoluteError() + ", " + eval.rootMeanSquaredError() + ", "
                    + eval.relativeAbsoluteError() + ", " + eval.rootRelativeSquaredError() + ", "
                    + eval.SFPriorEntropy() + ", " + eval.SFSchemeEntropy() + ", " + eval.SFEntropyGain() + ", "
                    + eval.SFMeanPriorEntropy() + ", " + eval.SFMeanSchemeEntropy() + ", "
                    + eval.SFMeanEntropyGain() + ", " + eval.KBInformation() + ", " + eval.KBMeanInformation()
                    + ", " + eval.KBRelativeInformation() + ", " + eval.weightedTruePositiveRate() + ", "
                    + eval.weightedFalsePositiveRate() + ", " + eval.weightedTrueNegativeRate() + ", "
                    + eval.weightedFalseNegativeRate() + ", " + eval.weightedPrecision() + ", "
                    + eval.weightedRecall() + ", " + eval.weightedFMeasure() + ", "
                    + eval.weightedAreaUnderROC() + "\n");
            p.close();

            //release semaphore
            FlexDM.writeFile.release();
        } catch (InterruptedException e) { //bad things happened
            System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName()
                    + " on dataset " + dataset.getName());
        }

        //output we have successfully finished processing classifier
        if (temp.equals("no_parameters")) { //no parameters
            try {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with no parameters");
            } catch (Exception e) {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with no parameters");
            }
        } else { //with parameters
            try {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName().substring(dataset.getName().lastIndexOf("\\") + 1)
                        + " with parameters " + temp);
            } catch (Exception e) {
                System.out.println("FINISHED CLASSIFIER " + cNum + " - " + classifier.getName() + " on dataset "
                        + dataset.getName() + " with parameters " + temp);
            }
        }

        try { //get a permit
              //grab the log file, write the classifiers details to it
            FlexDM.writeLog.acquire();
            PrintWriter p = new PrintWriter(new FileWriter(log, true));

            Date date = new Date();
            Format formatter = new SimpleDateFormat("dd/MM/YYYY HH:mm:ss");
            //formatter.format(date)

            if (temp.equals("results_no_parameters")) { //change output based on parameters
                temp = temp.substring(8);
            }

            //write details to log file
            p.write(dataset.getName() + ", " + dataset.getTest() + ", \"" + dataset.getResult_string() + "\", "
                    + classifier.getName() + ", " + temp + ", " + formatter.format(date) + "\n");
            p.close();

            //release semaphore
            FlexDM.writeLog.release();
        } catch (InterruptedException e) { //bad things happened
            System.err.println("FATAL ERROR OCCURRED: Classifier: " + cNum + " - " + classifier.getName()
                    + " on dataset " + dataset.getName());
        }

        s.release();

    } catch (Exception e) {
        //an error occurred
        System.err.println("FATAL ERROR OCCURRED: " + e.toString() + "\nClassifier: " + cNum + " - "
                + classifier.getName() + " on dataset " + dataset.getName());
        s.release();
    }

}

From source file:Pair.java

License:Open Source License

private void doCV(Instances targetData) throws Exception {
    System.out.println();/*from w  w w .j a  va2 s . c o m*/
    System.out.flush();
    int numSourceInstances = m_SourceInstances.numInstances();
    int numInstances = targetData.numInstances() + numSourceInstances;
    numTargetInstances = numInstances - numSourceInstances;
    double weightSource, weightTarget;
    double initialSourceFraction;
    double[] weights = new double[numInstances];
    Random randomInstance = new Random(1);

    Instances data = new Instances(m_SourceInstances, 0, numSourceInstances);
    // Now add the target data, shallow copying the instances as they are added
    // so it doesn't mess up the weights for anyone else
    Enumeration enumer = targetData.enumerateInstances();
    while (enumer.hasMoreElements()) {
        Instance instance = (Instance) enumer.nextElement();
        data.add(instance);
    }

    if (sourceRatio < 0) { //weight all equally
        weightSource = weightTarget = 1.0/*/numInstances*/;
        initialSourceFraction = numSourceInstances / (double) numInstances;
    } else {
        double totalWeight = 1 + sourceRatio;
        weightSource = sourceRatio / totalWeight/*/numSourceInstances*/;
        weightTarget = 1.0 / totalWeight/*/numTargetInstances*/;
        initialSourceFraction = weightSource;
    }
    for (int j = 0; j < numInstances; j++) {
        Instance instance = data.instance(j);
        if (j < numSourceInstances)
            instance.setWeight(weightSource);
        else
            instance.setWeight(weightTarget);
    }

    if (doFraction) {
        for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) {

            sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular]
            if (sourceFraction > .995)
                sourceFraction = .995;
            //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances;
            double sourceWeight = (sourceFraction * numTargetInstances)
                    / (numSourceInstances * (1 - sourceFraction));
            for (int j = 0; j < numInstances; j++) {
                Instance instance = data.instance(j);
                if (j < numSourceInstances)
                    instance.setWeight(sourceWeight);
                else
                    instance.setWeight(1);
            }
            buildClassifierWithWeights(data);
            System.out.println("Iteration " + it + ":" + getTestError());
        }
    } else {

        for (int i = 0; i < numInstances; i++)
            weights[i] = data.instance(i).weight();
        buildClassifierWithWeights(data);
        System.out.println("Iteration -1:" + getTestError());
        for (int i = 0; i < numInstances; i++)
            data.instance(i).setWeight(weights[i]);

        for (int it = 0; it < sourceIterations; it++) {

            Instances sample = null;
            if (!resample || m_NumIterationsPerformed == 0) {
                sample = data;
            } else {
                double sum = data.sumOfWeights();
                double[] sweights = new double[data.numInstances()];
                for (int i = 0; i < sweights.length; i++) {
                    sweights[i] = data.instance(i).weight() / sum;
                }
                sample = data.resampleWithWeights(randomInstance, sweights);
            }

            try {
                m_Classifiers[it].buildClassifier(sample);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("E: " + e);
            }

            sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations);
            setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false);

            for (int i = 0; i < numInstances; i++)
                weights[i] = data.instance(i).weight();

            buildClassifierWithWeights(data);

            System.out.println("Iteration " + it + ":" + getTestError());

            for (int i = 0; i < numInstances; i++)
                data.instance(i).setWeight(weights[i]);

        }

    }

}

From source file:Pair.java

License:Open Source License

/**
 * Boosting method. Boosts any classifier that can handle weighted
 * instances./*ww  w .ja  v a2  s .  co m*/
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @exception Exception if the classifier could not be built successfully
 */
protected void buildClassifierWithWeights(Instances data) throws Exception {

    Random randomInstance = new Random(0);
    double epsilon, reweight, beta = 0;
    Evaluation evaluation;
    Instances sample;
    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    int numSourceInstances = m_SourceInstances.numInstances();

    // Do boostrap iterations
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        // Build the classifier
        sample = null;
        if (!resample || m_NumIterationsPerformed == 0) {
            sample = data;
        } else {
            double sum = data.sumOfWeights();
            double[] weights = new double[data.numInstances()];
            for (int i = 0; i < weights.length; i++) {
                weights[i] = data.instance(i).weight() / sum;
            }
            sample = data.resampleWithWeights(randomInstance, weights);

            if (doSampleSize) {
                int effectiveInstances = (int) (sourceFraction * weights.length + numTargetInstances);
                if (effectiveInstances > numSourceInstances + numTargetInstances)
                    effectiveInstances = numSourceInstances + numTargetInstances;
                //System.out.println(effectiveInstances);               
                sample.randomize(randomInstance);
                Instances q = new Instances(sample, 0, effectiveInstances);
                sample = q;
            }
        }
        try {
            m_Classifiers[m_NumIterationsPerformed].buildClassifier(sample);
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("E: " + e);
        }

        if (doBagging)
            beta = 0.4 / .6; //always same beta
        else
            beta = setWeights(data, m_Classifiers[m_NumIterationsPerformed], -1, numSourceInstances, true);

        // Stop if error too small or error too big and ignore this model
        if (beta < 0) { //setWeights indicates a problem with negative beta
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model

        m_Betas[m_NumIterationsPerformed] = Math.log(1 / beta);

    }

    betaSum = 0;

    for (int i = 0; i < m_NumIterationsPerformed; i++)
        betaSum += m_Betas[i];
}