Example usage for weka.core WekaException WekaException

List of usage examples for weka.core WekaException WekaException

Introduction

In this page you can find the example usage for weka.core WekaException WekaException.

Prototype

public WekaException(Throwable cause) 

Source Link

Document

Constructor with cause argument

Usage

From source file:joelib2.algo.datamining.yale.ExampleSetHelper.java

License:Open Source License

public static Instances createMolInstances(MoleculeVector molecules, String[] attributes, int[] attributeTypes)
        throws WekaException {
    // load descriptor binning
    DescriptorBinning binning = DescriptorBinning.getDescBinning(molecules);

    int length = molecules.getSize();

    if (attributes.length != attributeTypes.length) {
        throw new WekaException("Different number of attributes and attribute types.");

        //return null;
    }//from   w  w  w . j  a  v  a 2  s . c  om

    Enumeration enumeration = binning.getDescriptors();
    FastVector attributesV = new FastVector(binning.numberOfDescriptors());
    Molecule mol;
    BasicPairData pairData;

    for (int i = 0; i < attributes.length; i++) {
        if (attributeTypes[i] == Attribute.NUMERIC) {
            // numeric
            attributesV.addElement(new Attribute((String) enumeration.nextElement(), attributesV.size()));
        } else if (attributeTypes[i] == Attribute.NOMINAL) {
            // nominal
            // create a list with all nominal values
            Hashtable hashed = new Hashtable();

            for (int j = 0; j < length; j++) {
                mol = molecules.getMol(j);

                // get unparsed data
                pairData = (BasicPairData) mol.getData(attributes[i], false);

                if (pairData != null) {
                    if (pairData.getKeyValue() instanceof String) {
                        hashed.put(pairData.getKeyValue(), "");
                    } else {
                        hashed.put(pairData.toString(), "");
                    }
                }
            }

            // store list of nominal values in the Weka data structure
            FastVector attributeValues = new FastVector(hashed.size());
            String tmp;

            for (Enumeration e = hashed.keys(); e.hasMoreElements();) {
                tmp = (String) e.nextElement();
                attributeValues.addElement(tmp);

                //System.out.println("NOMINAL " + tmp);
            }

            attributesV.addElement(new Attribute(attributes[i], attributeValues, attributesV.size()));
        }
    }

    int size = attributesV.size();
    Attribute attribute;

    // create molecule instances
    Instances instances = new Instances("MoleculeInstances", attributesV, attributesV.size());

    // iterate over all instances (to generate them)
    double[] instance;

    for (int i = 0; i < length; i++) {
        mol = molecules.getMol(i);
        instance = new double[size];

        for (int j = 0; j < size; j++) {
            attribute = (Attribute) attributesV.elementAt(j);

            // get parsed data
            pairData = (BasicPairData) mol.getData(attribute.name(), true);

            // add nominal or numeric or missing value
            if (pairData == null) {
                instance[attribute.index()] = Instance.missingValue();
            } else {
                if (attribute.isNominal()) {
                    // nominal
                    String tmpS = pairData.toString().trim();

                    if (tmpS.indexOf("\n") != -1) {
                        throw new WekaException("Descriptor " + attribute.name()
                                + " contains multiple lines and is not a valid nominal value.");
                    } else {
                        instance[attribute.index()] = attribute.indexOfValue(pairData.toString());

                        if (instance[attribute.index()] == -1) {
                            // invalid nominal value
                            logger.error("Invalid nominal value.");

                            return null;
                        }
                    }
                } else {
                    // numeric
                    if (pairData instanceof NativeValue) {
                        double tmpD = ((NativeValue) pairData).getDoubleNV();

                        if (Double.isNaN(tmpD)) {
                            instance[attribute.index()] = Instance.missingValue();
                        } else {
                            instance[attribute.index()] = tmpD;
                        }
                    } else {
                        throw new WekaException("Descriptor " + attribute.name() + " is not a native value.");
                    }
                }
            }

            attribute.index();
        }

        // add created molecule instance to molecule instances
        instances.add(new Instance(1, instance));
    }

    return instances;
}

From source file:meka.filters.unsupervised.attribute.MekaClassAttributes.java

License:Open Source License

/**
 * Determines the output format based on the input format and returns 
 * this. In case the output format cannot be returned immediately, i.e.,
 * hasImmediateOutputFormat() returns false, then this method will called
 * from batchFinished() after the call of preprocess(Instances), in which,
 * e.g., statistics for the actual processing step can be gathered.
 *
 * @param inputFormat     the input format to base the output format on
 * @return                the output format
 * @throws Exception      in case the determination goes wrong
 *//* w w w  .  j  a v a 2  s. c  o  m*/
protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
    int i;
    int[] indices;
    StringBuilder order;
    Instances output;

    m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);
    order = new StringBuilder();
    indices = m_AttributeIndices.getSelection();
    if (indices.length == 0)
        throw new WekaException("No attributes defined as class attributes!");
    for (i = 0; i < indices.length; i++) {
        if (i > 0)
            order.append(",");
        order.append("" + (indices[i] + 1));
    }
    for (i = 0; i < inputFormat.numAttributes(); i++) {
        if (m_AttributeIndices.isInRange(i))
            continue;
        order.append(",");
        order.append("" + (i + 1));
    }
    m_Reorder.setAttributeIndices(order.toString());
    m_Reorder.setInputFormat(inputFormat);

    output = m_Reorder.getOutputFormat();
    output.setClassIndex(indices.length);
    output.setRelationName("-C " + indices.length);

    return output;
}

From source file:net.paudan.evosvm.LibLINEAR.java

License:Open Source License

/**
* Computes the distribution for a given instance.
*
* @param instance the instance for which distribution is computed
* @return the distribution// w ww  .  j a  v a2 s.  com
* @throws Exception if the distribution can't be computed successfully
*/
public double[] distributionForInstance(Instance instance) throws Exception {

    if (!getDoNotReplaceMissingValues()) {
        m_ReplaceMissingValues.input(instance);
        m_ReplaceMissingValues.batchFinished();
        instance = m_ReplaceMissingValues.output();
    }

    if (getConvertNominalToBinary() && m_NominalToBinary != null) {
        m_NominalToBinary.input(instance);
        m_NominalToBinary.batchFinished();
        instance = m_NominalToBinary.output();
    }

    if (m_Filter != null) {
        m_Filter.input(instance);
        m_Filter.batchFinished();
        instance = m_Filter.output();
    }

    FeatureNode[] x = instanceToArray(instance);
    double[] result = new double[instance.numClasses()];
    if (m_ProbabilityEstimates) {
        if (m_SolverType != SolverType.L2R_LR && m_SolverType != SolverType.L2R_LR_DUAL
                && m_SolverType != SolverType.L1R_LR) {
            throw new WekaException("probability estimation is currently only "
                    + "supported for L2-regularized logistic regression");
        }

        int[] labels = m_Model.getLabels();
        double[] prob_estimates = new double[instance.numClasses()];

        Linear.predictProbability(m_Model, x, prob_estimates);

        // Return order of probabilities to canonical weka attribute order
        for (int k = 0; k < labels.length; k++) {
            result[labels[k]] = prob_estimates[k];
        }
    } else {
        int prediction = (int) Linear.predict(m_Model, x);
        assert (instance.classAttribute().isNominal());
        result[prediction] = 1;
    }

    return result;
}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

/**
 * Builds a classifier of the given type using the previously built (or loaded) training data.
 * //  w w w .ja  v  a  2 s .  c o m
 * @param classifier a configured classifier, that is ready to be built.
 * @throws Exception if there is no training data
 */
public void buildClassifier(Classifier classifier) throws Exception {
    System.out.println("Disambiguator: Building classifier...");

    weightTrainingInstances();

    if (trainingData == null) {
        throw new WekaException(
                "You must load training data or train on a set of articles before builing classifier.");
    } else {
        this.classifier = classifier;
        classifier.buildClassifier(trainingData);
    }
}

From source file:org.wikipedia.miner.annotation.Disambiguator.java

License:Open Source License

/**
 * Tests the disambiguator on a set of Wikipedia articles, to see how well it makes the same 
 * decisions as the original article editors did. You need to train the disambiguator and build 
 * a classifier before using this.//from   w w  w .j  a  va  2 s.c  o m
 * 
 * @param testSet the set of articles to use for testing. You should make sure these are reasonably tidy, and roughly representative (in size, link distribution, etc) as the documents you intend to process automatically.
 * @param snippetLength the portion of each article that should be considered for testing (see ArticleCleaner).  
 * @param rc a cache in which relatedness measures will be saved so they aren't repeatedly calculated. Make this null if using extremely large testing sets, so that caches will be reset from document to document, and won't grow too large.
 * @return Result a result (including recall, precision, f-measure) of how well the classifier did.   
 * @throws SQLException if there is a problem with the WikipediaMiner database.
 * @throws Exception if there is a problem with the classifier
 */
public Result<Integer> test(ArticleSet testSet, int snippetLength, RelatednessCache rc)
        throws SQLException, Exception {

    if (classifier == null)
        throw new WekaException("You must build (or load) classifier first.");

    Result<Integer> r = new Result<Integer>();
    Article art = null;

    ProgressNotifier pn = new ProgressNotifier(testSet.getArticleIds().size(), "Testing");
    for (int id : testSet.getArticleIds()) {
        try {
            art = new Article(wikipedia.getDatabase(), id);
        } catch (Exception e) {
            System.err.println("Warning: " + id + " is not a valid article");
        }
        ;

        if (art != null)
            r.addIntermediateResult(test(art, snippetLength, rc));

        pn.update();
    }

    return r;
}

From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java

License:Open Source License

/**
 * //from w  ww .  ja v  a  2  s.  c  om
 * 
 * @param classifier
 * @throws Exception
 */
public void buildClassifier(Classifier classifier) throws Exception {
    System.out.println("LinkDetector: Building classifier...");

    weightTrainingInstances();

    if (trainingData == null) {
        throw new WekaException(
                "You must load training data or train on a set of articles before builing classifier.");
    } else {
        this.classifier = classifier;
        classifier.buildClassifier(trainingData);
    }
}

From source file:org.wikipedia.miner.annotation.weighting.LinkDetector.java

License:Open Source License

/**
 * Tests the link detector on a set of Wikipedia articles, to see how well it makes the same 
 * decisions as the original article editors did. You need to train the link detector and build 
 * a classifier before using this.//from   w w w. j ava2 s. c om
 * 
 * @param testSet the set of articles to use for testing. You should make sure these are reasonably tidy, and roughly representative (in size, link distribution, etc) as the documents you intend to process automatically.
 * @param snippetLength the portion of each article that should be considered for testing (see ArticleCleaner). 
 * @param td a topic detector (along with a fully trained and built disambiguator) 
 * @param rc a cache in which relatedness measures will be saved so they aren't repeatedly calculated. Make this null if using extremely large testing sets, so that caches will be reset from document to document, and won't grow too large.
 * @return Result a result (including recall, precision, f-measure) of how well the classifier did.   
 * @throws Exception if there is a problem with the classifier
 */
public Result<Integer> test(ArticleSet testSet, int snippetLength, TopicDetector td, RelatednessCache rc)
        throws Exception {

    if (classifier == null)
        throw new WekaException("You must build (or load) classifier first.");

    Result<Integer> r = new Result<Integer>();

    ProgressNotifier pn = new ProgressNotifier(testSet.getArticleIds().size(), "Testing");
    for (int id : testSet.getArticleIds()) {

        Article art = null;

        try {
            art = new Article(wikipedia.getDatabase(), id);
        } catch (Exception e) {
            System.err.println("Warning: " + id + " is not a valid article");
        }

        if (art != null)
            r.addIntermediateResult(test(art, snippetLength, td, rc));

        pn.update();
    }

    return r;
}

From source file:xlong.urlclassify.others.LibLINEAR.java

License:Open Source License

/**
 * Computes the distribution for a given instance.
 *
 * @param instance       the instance for which distribution is computed
 * @return          the distribution//from  w  w  w.  j  a v  a2  s .com
 * @throws Exception       if the distribution can't be computed successfully
 */
public double[] distributionForInstance(Instance instance) throws Exception {

    if (!getDoNotReplaceMissingValues()) {
        m_ReplaceMissingValues.input(instance);
        m_ReplaceMissingValues.batchFinished();
        instance = m_ReplaceMissingValues.output();
    }

    if (getConvertNominalToBinary() && m_NominalToBinary != null) {
        m_NominalToBinary.input(instance);
        m_NominalToBinary.batchFinished();
        instance = m_NominalToBinary.output();
    }

    if (m_Filter != null) {
        m_Filter.input(instance);
        m_Filter.batchFinished();
        instance = m_Filter.output();
    }

    FeatureNode[] x = instanceToArray(instance);
    double[] result = new double[instance.numClasses()];
    if (m_ProbabilityEstimates) {
        if (m_SolverType != SolverType.L2R_LR && m_SolverType != SolverType.L2R_LR_DUAL
                && m_SolverType != SolverType.L1R_LR) {
            throw new WekaException("probability estimation is currently only "
                    + "supported for L2-regularized logistic regression");
        }

        int[] labels = m_Model.getLabels();
        double[] prob_estimates = new double[instance.numClasses()];

        Linear.predictProbability(m_Model, x, prob_estimates);

        // Return order of probabilities to canonical weka attribute order
        for (int k = 0; k < labels.length; k++) {
            result[labels[k]] = prob_estimates[k];
        }
    } else {
        int prediction = Linear.predict(m_Model, x);
        assert (instance.classAttribute().isNominal());
        result[prediction] = 1;
    }

    return result;
}