Example usage for weka.core Attribute indexOfValue

List of usage examples for weka.core Attribute indexOfValue

Introduction

In this page you can find the example usage for weka.core Attribute indexOfValue.

Prototype

public final int indexOfValue(String value) 

Source Link

Document

Returns the index of a given attribute value.

Usage

From source file:adams.gui.visualization.instances.InstancesTableModel.java

License:Open Source License

/**
 * Sets the value in the cell at columnIndex and rowIndex to aValue. but only
 * the value and the value can be changed. Ignores operation if value hasn't
 * changed.// w w w  . j a  v  a 2  s . co m
 *
 * @param aValue the new value
 * @param rowIndex the row index
 * @param columnIndex the column index
 * @param notify whether to notify the listeners
 */
public void setValueAt(Object aValue, int rowIndex, int columnIndex, boolean notify) {
    int type;
    int index;
    String tmp;
    Instance inst;
    Attribute att;
    Object oldValue;
    boolean different;
    int offset;

    offset = 1;
    if (m_ShowWeightsColumn)
        offset++;

    oldValue = getValueAt(rowIndex, columnIndex);
    different = !("" + oldValue).equals("" + aValue);
    if (!different)
        return;

    if (!m_IgnoreChanges)
        addUndoPoint();

    type = getType(rowIndex, columnIndex);
    index = columnIndex - offset;
    inst = m_Data.instance(rowIndex);
    att = inst.attribute(index);

    // missing?
    if (aValue == null) {
        inst.setValue(index, Utils.missingValue());
    } else {
        tmp = aValue.toString();

        switch (type) {
        case Attribute.DATE:
            try {
                att.parseDate(tmp);
                inst.setValue(index, att.parseDate(tmp));
            } catch (Exception e) {
                // ignore
            }
            break;

        case Attribute.NOMINAL:
            if (att.indexOfValue(tmp) > -1)
                inst.setValue(index, att.indexOfValue(tmp));
            break;

        case Attribute.STRING:
            inst.setValue(index, tmp);
            break;

        case Attribute.NUMERIC:
            try {
                inst.setValue(index, Double.parseDouble(tmp));
            } catch (Exception e) {
                // ignore
            }
            break;

        case Attribute.RELATIONAL:
            try {
                inst.setValue(index, inst.attribute(index).addRelation((Instances) aValue));
            } catch (Exception e) {
                // ignore
            }
            break;

        default:
            throw new IllegalArgumentException("Unsupported Attribute type: " + type + "!");
        }
    }

    // notify only if the value has changed!
    if (notify)
        notifyListener(new TableModelEvent(this, rowIndex, columnIndex));
}

From source file:adams.ml.data.WekaConverter.java

License:Open Source License

/**
 * Turns an ADAMS dataset row into a Weka Instance.
 *
 * @param data   the dataset to use as template
 * @param row      the row to convert/*from  w w  w .ja v a  2  s.c  o m*/
 * @return      the generated instance
 * @throws Exception   if conversion fails
 */
public static Instance toInstance(Instances data, Row row) throws Exception {
    Instance result;
    double[] values;
    int i;
    Cell cell;
    Attribute att;

    values = new double[data.numAttributes()];
    for (i = 0; i < data.numAttributes(); i++) {
        values[i] = Utils.missingValue();

        if (!row.hasCell(i))
            continue;
        cell = row.getCell(i);
        if (cell.isMissing())
            continue;

        att = data.attribute(i);
        switch (att.type()) {
        case Attribute.NUMERIC:
            values[i] = cell.toDouble();
            break;
        case Attribute.DATE:
            values[i] = cell.toAnyDateType().getTime();
            break;
        case Attribute.NOMINAL:
            values[i] = att.indexOfValue(cell.getContent());
            break;
        case Attribute.STRING:
            values[i] = att.addStringValue(cell.getContent());
            break;
        default:
            throw new Exception("Unhandled Weka attribute type: " + Attribute.typeToString(att));
        }
    }

    result = new DenseInstance(1.0, values);
    result.setDataset(data);

    return result;
}

From source file:ca.uottawa.balie.FeatureRepresentationNominal.java

License:Open Source License

public Object[] GetWekaInstance(String[] pi_Inst, FastVector pi_Attributes, int pi_MinFreq) {
    Object[] inst = new Object[pi_Attributes.size() - 1];

    for (int i = 0; i != pi_Attributes.size() - 1; ++i) {
        Attribute attr = (Attribute) pi_Attributes.elementAt(i);
        if (pi_Inst[i] != null && attr.indexOfValue(pi_Inst[i]) != -1) {
            inst[i] = pi_Inst[i];/*from   w w w . j a va2  s .  co  m*/
        } else {
            inst[i] = null;
        }
    }

    return inst;
}

From source file:com.actelion.research.orbit.imageAnalysis.tasks.ObjectTrainWorker.java

License:Open Source License

@Override
protected void doWork() {
    if (dontRun) {
        dontRun = false;/*from w  w w  .j  a v  a  2  s  .c o  m*/
        return;
    }
    trainSet = null;
    if (modelToBuild != null && modelToBuild.getClassifier() != null)
        modelToBuild.getClassifier().setBuild(false);
    List<double[]> trainData = new ArrayList<double[]>();
    int mipLayer = -1; // used for checking if all iFrames (with trainData) have the same mapLayer (otherwise the model cannot be trained)
    for (ImageFrame iFrame : iFrames) {
        int sampleSize = Math.min(3, iFrame.recognitionFrame.bimg.getImage().getSampleModel().getNumBands()); // was always 1 before! (max 3 because alpha should be ignored)
        for (int i = 0; i < iFrame.recognitionFrame.getClassShapes().size(); i++) {
            checkPaused();
            List<Shape> shapes = iFrame.recognitionFrame.getClassShapes().get(i).getShapeList();
            if (shapes != null && shapes.size() > 0) {
                if (mipLayer < 0) {
                    mipLayer = iFrame.getMipLayer();
                    logger.trace("iFrame candidate mipLayer {} from iFrame with width {}", mipLayer,
                            iFrame.recognitionFrame.bimg.getWidth());
                } else {
                    if (mipLayer != iFrame.getMipLayer()) {
                        logger.error(
                                "Cell classifier cannot be trained on different image layers. Please use only training data of the same image layer.");
                        return;
                    }
                }
                if (mipLayer != modelToBuild.getMipLayer()) {
                    // only same layer as segmentation allowed. Otherwise the cell features must be scaled, too (which is not yet the case).
                    logger.error("Cell classifier must be trained on same layer as segmentation");
                    return;
                }
            }
            trainData.addAll(new ObjectFeatureBuilderTiled(null).buildFeatures(shapes, i + 1,
                    iFrame.recognitionFrame, iFrame.recognitionFrame.getClassImage(), sampleSize, 0, 0)); // classes 1.0, 2.0, ...
        }
    }
    logger.trace("train levelNum: {}", mipLayer);
    if (trainData.size() == 0) {
        logger.error("trainset is empty, classifier cannot be trained.");
        trainSet = null;
        return;
    }
    if (isCancelled()) {
        cleanUp();
        return;
    }
    timeEst = 1000 * 10L;
    setProgress(10);

    logger.debug("trainData contains " + trainData.size() + " samples");

    Attribute classAttr = null;
    // create the first time a new trainSet. All further trainings will append new instances.
    if (trainSet == null) {
        // build traindata header
        double[] firstRowAll = trainData.get(0);
        double[] firstRow = Arrays.copyOfRange(firstRowAll, 0,
                firstRowAll.length - ObjectFeatureBuilderTiled.SkipTailForClassification);
        ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(firstRow.length);
        for (int a = 0; a < firstRow.length - 1; a++) {
            Attribute attr = new Attribute("a" + a);
            // if (a<firstRow.length-2) attr.setWeight(0.1d); else attr.setWeight(1.0d);
            attrInfo.add(attr);
        }
        List<String> classValues = new ArrayList<String>(
                iFrames.get(0).recognitionFrame.getClassShapes().size());
        for (int i = 0; i < iFrames.get(0).recognitionFrame.getClassShapes().size(); i++) {
            classValues.add((i + 1) + ".0"); // "1.0", "2.0", ...
        }
        classAttr = new Attribute("class", classValues);
        attrInfo.add(classAttr);

        trainSet = new Instances("trainSet pattern classes", attrInfo, trainData.size());
        trainSet.setClassIndex(firstRow.length - 1);
    } else
        classAttr = trainSet.attribute("class");

    // add instances
    for (double[] valsAll : trainData) {
        // skip some non relevant attributes like centerX/Y
        double[] vals = Arrays.copyOfRange(valsAll, 0,
                valsAll.length - ObjectFeatureBuilderTiled.SkipTailForClassification);
        vals[vals.length - 1] = valsAll[valsAll.length - 1]; // class value

        double classV = classAttr.indexOfValue(Double.toString(vals[vals.length - 1]));
        vals[vals.length - 1] = classV;
        Instance inst = new DenseInstance(1.0d, vals);
        trainSet.add(inst);
    }
    // trainSet = trainSet.resample(rand);
    logger.debug("trainSet contains " + trainSet.numInstances() + " instances");

    if (logger.isTraceEnabled())
        logger.trace(trainSet.toString());

    // building classifier
    if (isCancelled()) {
        cleanUp();
        return;
    }
    checkPaused();
    timeEst = 1000 * 5L;
    setProgress(20);
    logger.info("Start training classifier... ");
    classifier = new ClassifierWrapper(new weka.classifiers.functions.SMO());
    try {
        classifier.buildClassifier(trainSet);
        classifier.setBuild(true);
        modelToBuild.setClassifier(classifier);
        modelToBuild.setStructure(trainSet.stringFreeStructure());
        modelToBuild.setCellClassification(true);
        modelToBuild.setMipLayer(mipLayer);
        setProgress(85);

        // evaluation
        StringBuilder cnamesInfo = new StringBuilder(
                "Evaluation for object classification model with classes: ");
        for (int i = 0; i < modelToBuild.getClassShapes().size(); i++) {
            cnamesInfo.append(modelToBuild.getClassShapes().get(i).getName());
            if (i < modelToBuild.getClassShapes().size() - 1)
                cnamesInfo.append(", ");
        }
        logger.info(cnamesInfo.toString());
        Evaluation evaluation = new Evaluation(trainSet);
        evaluation.evaluateModel(classifier.getClassifier(), trainSet);
        logger.info(evaluation.toSummaryString());
        if (evaluation.pctCorrect() < OrbitUtils.ACCURACY_WARNING) {
            String w = "Warning: The model classifies the training objects only with an accuracy of "
                    + evaluation.pctCorrect()
                    + "%.\nThat means that the marked objects are not diverse enough.\nYou might want to remove some marked objects and mark some more representative ones.\nHowever, you can still use this model if you want (check the object classification).";
            logger.warn(w);
            if (withGUI && !ScaleoutMode.SCALEOUTMODE.get()) {
                JOptionPane.showMessageDialog(null, w, "Warning: Low accuracy", JOptionPane.WARNING_MESSAGE);
            }
        }

    } catch (Exception e) {
        classifier = null;
        logger.error("error training classifier: ", e);
    }
    logger.info("training done.");
    timeEst = 0L;
    setProgress(100);

}

From source file:com.actelion.research.orbit.imageAnalysis.tasks.TrainWorker.java

License:Open Source License

private void trainClassifier() throws OrbitImageServletException {
    logger.debug("start trainClassifier");
    if (modelToBuild != null && modelToBuild.getClassifier() != null)
        modelToBuild.getClassifier().setBuild(false);
    trainSet = null;/*from www  .j ava  2s.  c  o  m*/
    List<double[]> trainData = new ArrayList<double[]>();
    int mipLayer = -1; // used for checking if all iFrames (with trainData) have the same mapLayer (otherwise the model cannot be trained)
    for (ImageFrame iFrame : iFrames) {
        if (logger.isTraceEnabled())
            logger.trace(
                    iFrame.getTitle() + ": #ClassShapes: " + iFrame.recognitionFrame.getClassShapes().size());
        for (int i = 0; i < iFrame.recognitionFrame.getClassShapes().size(); i++) {
            //  checkPaused();
            if (iFrame.recognitionFrame.getClassShapes().get(i).getShapeList().size() > 0) { // set and check mip level only for iFrames with shapes (training data)
                if (mipLayer < 0) {
                    mipLayer = iFrame.getMipLayer();
                    logger.trace("iFrame candidate mipLayer {} from iFrame with width {}", mipLayer,
                            iFrame.recognitionFrame.bimg.getWidth());
                } else {
                    if (mipLayer != iFrame.getMipLayer()) {
                        logger.error(
                                "Model cannot be trained on different image layers. Please use only training data of the same image layer.");
                        return;
                    }
                }
            }
            List<Shape> shapes = iFrame.recognitionFrame.getClassShapes().get(i).getShapeList();
            trainData.addAll(getFeatures(shapes, i + 1, iFrame.recognitionFrame.bimg)); // classes 1.0, 2.0, ...
        }
    }
    logger.trace("train levelNum: {}", mipLayer);

    if (trainData.size() == 0) {
        logger.error("trainset is empty, classifier cannot be trained.");
        trainSet = null;
        return;
    }
    if (isCancelled()) {
        logger.debug("canceled");
        cleanUp();
        return;
    }
    timeEst = 1000 * 10L;
    setProgress(10);

    logger.debug("trainData contains " + trainData.size() + " samples");

    // limit training instances
    if (trainData.size() > MAXINST) {
        Collections.shuffle(trainData, rand);
        trainData = trainData.subList(0, MAXINST);
        logger.debug("trainSet shirked to " + trainData.size() + " instances");
    }

    Attribute classAttr = null;
    // create the first time a new trainSet. All further trainings will append new instances.
    if (trainSet == null) {
        // build traindata header
        double[] firstRow = trainData.get(0);
        ArrayList<Attribute> attrInfo = new ArrayList<Attribute>(firstRow.length);
        for (int a = 0; a < firstRow.length - 1; a++) {
            Attribute attr = new Attribute("a" + a);
            // if (a<firstRow.length-2) attr.setWeight(0.1d); else attr.setWeight(1.0d);
            attrInfo.add(attr);
        }
        List<String> classValues = new ArrayList<String>(
                iFrames.get(0).recognitionFrame.getClassShapes().size());
        for (int i = 0; i < iFrames.get(0).recognitionFrame.getClassShapes().size(); i++) {
            classValues.add((i + 1) + ".0"); // "1.0", "2.0", ...
        }
        classAttr = new Attribute("class", classValues);
        attrInfo.add(classAttr);

        trainSet = new Instances("trainSet pattern classes", attrInfo, trainData.size());
        trainSet.setClassIndex(firstRow.length - 1);
    } else
        classAttr = trainSet.attribute("class");

    // add instances
    for (double[] vals : trainData) {
        double classV = classAttr.indexOfValue(Double.toString(vals[vals.length - 1]));
        vals[vals.length - 1] = classV;
        //Instance inst = new Instance(1.0d, vals);
        Instance inst = new DenseInstance(1.0d, vals);
        trainSet.add(inst);
    }
    trainSet = trainSet.resample(rand);
    logger.debug("trainSet contains " + trainSet.numInstances() + " instances");

    // building classifier
    if (isCancelled()) {
        cleanUp();
        return;
    }
    checkPaused();
    timeEst = 1000 * 5L;
    setProgress(20);
    logger.info("Start training classifier... ");
    Classifier c;
    /*
    // experiments with deep learning... do not use in production.
    if (AparUtils.DEEPORBIT) {
    FeatureDescription fd = modelToBuild!=null? modelToBuild.getFeatureDescription(): new FeatureDescription();
    TissueFeatures tissueFeaturre = AparUtils.createTissueFeatures(fd, null);
    int numOutNeurons = modelToBuild.getClassShapes().size();
    int numInNeurons = tissueFeaturre.prepareDoubleArray().length-1;
    logger.debug("numNeuronsIn:"+numInNeurons+"  numNeuronsOut:"+numOutNeurons);
    MultiLayerPerceptron neuralNet = new MultiLayerPerceptron(numInNeurons,100, numOutNeurons);
    for (int a=0; a<numOutNeurons; a++) {
      neuralNet.getOutputNeurons()[a].setLabel("class"+a);
    }
    neuralNet.connectInputsToOutputs();
            
    MomentumBackpropagation mb = new MomentumBackpropagation();
    mb.setLearningRate(0.2d);
    mb.setMomentum(0.7d);
    //mb.setMaxIterations(20);
    mb.setMaxError(0.12);
    neuralNet.setLearningRule(mb);
    c = new WekaNeurophClassifier(neuralNet);
            
    } else {
    c = new weka.classifiers.functions.SMO();
    }
    */
    c = new weka.classifiers.functions.SMO();

    //weka.classifiers.functions.LibSVM c = new weka.classifiers.functions.LibSVM();
    //Classifier c = new weka.classifiers.trees.J48();
    classifier = new ClassifierWrapper(c);
    //classifier = new weka.classifiers.bayes.BayesNet();
    //classifier = new weka.classifiers.functions.MultilayerPerceptron();
    //((weka.classifiers.functions.SMO)classifier).setKernel(new weka.classifiers.functions.supportVector.RBFKernel());
    try {

        classifier.buildClassifier(trainSet);
        classifier.setBuild(true);
        modelToBuild.setClassifier(classifier);
        modelToBuild.setStructure(trainSet.stringFreeStructure());
        modelToBuild.setCellClassification(false);
        modelToBuild.setMipLayer(mipLayer);
        logger.debug("training done");

        // evaluation
        StringBuilder cnamesInfo = new StringBuilder("Evaluation for model with classes: ");
        for (int i = 0; i < modelToBuild.getClassShapes().size(); i++) {
            cnamesInfo.append(modelToBuild.getClassShapes().get(i).getName());
            if (i < modelToBuild.getClassShapes().size() - 1)
                cnamesInfo.append(", ");
        }
        logger.info(cnamesInfo.toString());
        Evaluation evaluation = new Evaluation(trainSet);
        evaluation.evaluateModel(classifier.getClassifier(), trainSet);
        logger.info(evaluation.toSummaryString());
        if (evaluation.pctCorrect() < OrbitUtils.ACCURACY_WARNING) {
            final String w = "Warning: The model classifies the training shapes only with an accuracy of "
                    + evaluation.pctCorrect()
                    + "%.\nThat means that the drawn class shapes are not diverse enough.\nYou might want to remove some class shapes and mark some more representative regions.\nHowever, you can still use this model if you want (check the classification).";
            logger.warn(w);
            if (withGUI && !ScaleoutMode.SCALEOUTMODE.get()) {
                SwingUtilities.invokeLater(new Runnable() {
                    @Override
                    public void run() {
                        JOptionPane.showMessageDialog(null, w, "Warning: Low accuracy",
                                JOptionPane.WARNING_MESSAGE);
                    }
                });
            }
        }

    } catch (Exception e) {
        classifier = null;
        logger.error("error training classifier", e);
    }
    //   logger.trace(classifier.toString());

}

From source file:edu.cuny.qc.speech.AuToBI.util.ClassifierUtils.java

License:Open Source License

/**
 * Given a (possibly empty) Instances object containing the required weka Attributes, generates a weka Instance for a
 * single data point.// w w  w  .j av  a  2  s  . c  om
 *
 * @param instances  the weka Instances object containing attributes
 * @param data_point the data point to convert
 * @return a weka instance with assigned attributes
 */
protected static Instance assignWekaAttributes(Instances instances, Word data_point) {
    double[] instance = new double[instances.numAttributes()];

    for (int i = 0; i < instances.numAttributes(); ++i) {
        Attribute attribute = instances.attribute(i);
        if (data_point.hasAttribute(attribute.name())
                && !data_point.getAttribute(attribute.name()).toString().equals("?")) {
            switch (attribute.type()) {
            case Attribute.NOMINAL:
                int index = attribute.indexOfValue(data_point.getAttribute(attribute.name()).toString());
                instance[i] = (double) index;
                break;
            case Attribute.NUMERIC:
                // Check if value is really a number.
                try {
                    instance[i] = Double.valueOf(data_point.getAttribute(attribute.name()).toString());
                } catch (NumberFormatException e) {
                    AuToBIUtils.error("Number expected for feature: " + attribute.name());
                }
                break;
            case Attribute.STRING:
                instance[i] = attribute.addStringValue(data_point.getAttribute(attribute.name()).toString());
                break;
            default:
                AuToBIUtils.error("Unknown attribute type");
            }
        } else {
            instance[i] = Utils.missingValue();
        }
    }

    Instance inst = new DenseInstance(1, instance);
    inst.setDataset(instances);
    return inst;
}

From source file:en_deep.mlprocess.manipulation.featmodif.ReplaceMissing.java

License:Open Source License

/**
 * Set the output format if the class is nominal.
 *//*from   w w w. j  ava2 s  .  c om*/
private void setOutputFormat() {

    FastVector newAtts;
    Instances outputFormat;

    newAtts = new FastVector();

    BitSet attrSrc = new BitSet();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {

        Attribute att = null;
        Attribute srcAtt = getInputFormat().attribute(j);

        if (!m_Columns.isInRange(j) || srcAtt.indexOfValue(m_ReplVal) >= 0) {
            att = (Attribute) srcAtt.copy();
        } else if (srcAtt.isNominal()) {

            Enumeration<String> valsEnum = srcAtt.enumerateValues();
            ArrayList<String> valsList = new ArrayList<String>();

            while (valsEnum.hasMoreElements()) {
                valsList.add(valsEnum.nextElement());
            }
            valsList.add(m_ReplVal);

            att = new Attribute(srcAtt.name(), valsList);
        } else { // string attributes
            att = (Attribute) srcAtt.copy();
            att.addStringValue(m_ReplVal);
        }

        newAtts.addElement(att);
        attrSrc.set(j);
    }

    outputFormat = new Instances(getInputFormat().relationName(), newAtts, 0);
    outputFormat.setClassIndex(getInputFormat().classIndex());

    setOutputFormat(outputFormat);

    m_StringToCopy = new AttributeLocator(getInputFormat(), Attribute.STRING, MathUtils.findTrue(attrSrc));
}

From source file:gov.va.chir.tagline.TagLineEvaluator.java

License:Open Source License

private int[] getAttributeIndexValues(final Attribute attrDocId, final Set<Object> ids) {
    int[] indices = new int[ids.size()];

    int i = 0;//from ww  w .  j  a  v a 2  s .  c  o  m

    for (Object id : ids) {
        indices[i] = attrDocId.indexOfValue(String.valueOf(id));
        i++;
    }

    return indices;
}

From source file:joelib2.algo.datamining.yale.ExampleSetHelper.java

License:Open Source License

public static Instances createMolInstances(MoleculeVector molecules, String[] attributes, int[] attributeTypes)
        throws WekaException {
    // load descriptor binning
    DescriptorBinning binning = DescriptorBinning.getDescBinning(molecules);

    int length = molecules.getSize();

    if (attributes.length != attributeTypes.length) {
        throw new WekaException("Different number of attributes and attribute types.");

        //return null;
    }/*from w w  w .j av a2  s. c om*/

    Enumeration enumeration = binning.getDescriptors();
    FastVector attributesV = new FastVector(binning.numberOfDescriptors());
    Molecule mol;
    BasicPairData pairData;

    for (int i = 0; i < attributes.length; i++) {
        if (attributeTypes[i] == Attribute.NUMERIC) {
            // numeric
            attributesV.addElement(new Attribute((String) enumeration.nextElement(), attributesV.size()));
        } else if (attributeTypes[i] == Attribute.NOMINAL) {
            // nominal
            // create a list with all nominal values
            Hashtable hashed = new Hashtable();

            for (int j = 0; j < length; j++) {
                mol = molecules.getMol(j);

                // get unparsed data
                pairData = (BasicPairData) mol.getData(attributes[i], false);

                if (pairData != null) {
                    if (pairData.getKeyValue() instanceof String) {
                        hashed.put(pairData.getKeyValue(), "");
                    } else {
                        hashed.put(pairData.toString(), "");
                    }
                }
            }

            // store list of nominal values in the Weka data structure
            FastVector attributeValues = new FastVector(hashed.size());
            String tmp;

            for (Enumeration e = hashed.keys(); e.hasMoreElements();) {
                tmp = (String) e.nextElement();
                attributeValues.addElement(tmp);

                //System.out.println("NOMINAL " + tmp);
            }

            attributesV.addElement(new Attribute(attributes[i], attributeValues, attributesV.size()));
        }
    }

    int size = attributesV.size();
    Attribute attribute;

    // create molecule instances
    Instances instances = new Instances("MoleculeInstances", attributesV, attributesV.size());

    // iterate over all instances (to generate them)
    double[] instance;

    for (int i = 0; i < length; i++) {
        mol = molecules.getMol(i);
        instance = new double[size];

        for (int j = 0; j < size; j++) {
            attribute = (Attribute) attributesV.elementAt(j);

            // get parsed data
            pairData = (BasicPairData) mol.getData(attribute.name(), true);

            // add nominal or numeric or missing value
            if (pairData == null) {
                instance[attribute.index()] = Instance.missingValue();
            } else {
                if (attribute.isNominal()) {
                    // nominal
                    String tmpS = pairData.toString().trim();

                    if (tmpS.indexOf("\n") != -1) {
                        throw new WekaException("Descriptor " + attribute.name()
                                + " contains multiple lines and is not a valid nominal value.");
                    } else {
                        instance[attribute.index()] = attribute.indexOfValue(pairData.toString());

                        if (instance[attribute.index()] == -1) {
                            // invalid nominal value
                            logger.error("Invalid nominal value.");

                            return null;
                        }
                    }
                } else {
                    // numeric
                    if (pairData instanceof NativeValue) {
                        double tmpD = ((NativeValue) pairData).getDoubleNV();

                        if (Double.isNaN(tmpD)) {
                            instance[attribute.index()] = Instance.missingValue();
                        } else {
                            instance[attribute.index()] = tmpD;
                        }
                    } else {
                        throw new WekaException("Descriptor " + attribute.name() + " is not a native value.");
                    }
                }
            }

            attribute.index();
        }

        // add created molecule instance to molecule instances
        instances.add(new Instance(1, instance));
    }

    return instances;
}

From source file:joelib2.algo.datamining.yale.ExampleSetHelper.java

License:Open Source License

public static Instances matrix2instances(double[][] matrix, String[] descriptors, int[] attributeTypes) {
    FastVector attributesV = new FastVector(descriptors.length);
    int molecules = matrix[0].length;

    for (int i = 0; i < descriptors.length; i++) {
        if (attributeTypes[i] == Attribute.NUMERIC) {
            // numeric
            attributesV.addElement(new Attribute(descriptors[i], attributesV.size()));
        } else if (attributeTypes[i] == Attribute.NOMINAL) {
            // nominal
            // create a list with all nominal values
            Hashtable hashed = new Hashtable();

            for (int j = 0; j < molecules; j++) {
                hashed.put(new Double(matrix[i][j]), "");
            }/*w  w w  .  j a  v  a 2  s . c o  m*/

            // store list of nominal values in the Weka data structure
            FastVector attributeValues = new FastVector(hashed.size());
            Double tmp;

            for (Enumeration e = hashed.keys(); e.hasMoreElements();) {
                tmp = (Double) e.nextElement();
                attributeValues.addElement(tmp.toString());

                //System.out.println("NOMINAL " + tmp);
            }

            attributesV.addElement(new Attribute(descriptors[i], attributeValues, attributesV.size()));
        }
    }

    int descriptorSize = attributesV.size();
    Attribute attribute = null;

    // create molecule instances
    Instances instances = new Instances("MatrixInstances", attributesV, attributesV.size());

    // iterate over all instances (to generate them)
    double[] instance;

    for (int i = 0; i < molecules; i++) {
        instance = new double[descriptorSize];

        for (int j = 0; j < descriptorSize; j++) {
            attribute = (Attribute) attributesV.elementAt(j);

            if (Double.isNaN(matrix[j][i])) {
                instance[attribute.index()] = Instance.missingValue();
            } else {
                if (attributeTypes[j] == Attribute.NUMERIC) {
                    // numeric
                    instance[attribute.index()] = matrix[j][i];
                } else if (attributeTypes[j] == Attribute.NOMINAL) {
                    // nominal
                    instance[attribute.index()] = attribute.indexOfValue(Double.toString(matrix[j][i]));

                    if (instance[attribute.index()] == -1) {
                        // invalid nominal value
                        logger.error("Invalid nominal value.");

                        return null;
                    }
                }
            }

            attribute.index();
        }

        // add created molecule instance to molecule instances
        Instance inst = new Instance(1, instance);
        instances.add(inst);

        //System.out.println("instance (attr.:"+inst.numAttributes()+", vals:"+inst.numValues()+"): "+inst);
    }

    //System.out.println(instances.toString());
    return instances;
}