Example usage for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:predictors.HelixPredictor.java

License:Open Source License

/**
 * Analyzes a given segment and returns the TMH probability.
 * /*from   w  w w.  j a va  2 s . c  o  m*/
 * @param pssm
 * @param start
 * @param end
 * @return
 */
public double getSegmentProbability(Pssm pssm, int start, int end) {
    double tmhProbability = -1;

    try {
        Instance window = this.buildInstance(pssm, start, end);

        window.isMissing((Attribute) this.attributes.get(this.attributes.size() - 1));
        window.setDataset(this.dataset);

        tmhProbability = this.classifier.distributionForInstance(window)[Mappings.indexTmh];
        ;
    } catch (Exception e) {
        ErrorUtils.printError(HelixPredictor.class, "Prediction failed for segment (" + start + "-" + end + ")",
                e);

        return -1.0;
    }

    return tmhProbability;
}

From source file:predictors.TopologyPredictor.java

License:Open Source License

/**
 * Predicts the N-terminal topology for a given protein.
 * /*from  w  w  w .  j a v  a2 s  .  co m*/
 * @param protein
 * @param cutoff
 */
public void predict(Protein protein, double cutoff) {
    if (protein == null) {
        return;
    }
    if (protein.getPssm() == null) {
        return;
    }
    if (protein.getPrediction() == null) {
        return;
    }

    if (!protein.isPredTmp()) {
        return;
    }

    Pssm pssm = protein.getPssm();
    char[] prediction = protein.getPrediction();

    try {
        ArrayList<Segment> solSegments = findSegments(prediction);
        Instance instance = this.buildInstance(pssm, prediction, solSegments, 0);

        instance.isMissing((Attribute) this.attributes.get(this.attributes.size() - 1));
        instance.setDataset(this.dataset);

        double[] probabilities = this.classifier.distributionForInstance(instance);

        char top = Character.UNASSIGNED;

        protein.setTopologyRaw((int) (1000 * probabilities[TopologyPredictor.indexInside]));

        if (!protein.hasPredSigP() && probabilities[TopologyPredictor.indexInside] >= cutoff) {
            top = Mappings.intToTop(Mappings.indexInside);
        } else {
            top = Mappings.intToTop(Mappings.indexOutside);
        }

        for (int i = 0; i < prediction.length; ++i) {
            char type = prediction[i];

            if (Mappings.ssToInt(type) == Mappings.indexNotTmh) {
                prediction[i] = top;
            } else if (Mappings.ssToInt(type) == Mappings.indexTmh) {
                if (top == Mappings.intToTop(Mappings.indexInside)) {
                    top = Mappings.intToTop(Mappings.indexOutside);
                } else {
                    top = Mappings.intToTop(Mappings.indexInside);
                }

                while (i < prediction.length && type == prediction[i]) {
                    ++i;
                }

                --i;
            }
        }
    } catch (Exception e) {
        ErrorUtils.printError(TopologyPredictor.class, "Prediction failed for " + protein.getHeader(), e);

        return;
    }
}

From source file:predictors.TopologyPredictor.java

License:Open Source License

/**
 * Analyzes a given window and saves it in the database.
 * /*from   ww w. j a  va 2s  .  c o  m*/
 * @param pssm
 * @param structure
 * @param structureIndex
 * @param startPos
 */
private void addProteinToDatabse(Pssm pssm, char[] structure, int structureIndex, int startPos) {
    ArrayList<Segment> solSegments = findSegments(structure);
    Instance segment = this.buildInstance(pssm, structure, solSegments, startPos);

    segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex);

    segment.setDataset(this.dataset);

    this.dataset.add(segment);
}

From source file:preprocess.StringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * /*from w ww .j  av a  2 s  .  c  o  m*/
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        //if ((getInputFormat().attribute(j).type() == Attribute.STRING) 
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than two lines down to avoid hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    //Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    //Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:processes.ClusterProcess.java

private Instance toInstance(User user, Instances dataSet) {
    Instance tempInstance = new Instance(userPoints.numAttributes());
    tempInstance.setDataset(userPoints);
    String userDataString = "0" + user.getTasteString(LastFMDataHandler.getInitialTagCount());
    String[] dataArray = userDataString.split(",");
    for (int index = 0; index < dataArray.length; index++) {
        tempInstance.setValue(index, Integer.parseInt(dataArray[index]));
    }/*from   w ww.ja v a2 s .co  m*/
    return tempInstance;
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

protected Instance makeInstance(Map<String, Double> features) {
    Instance result = new Instance(1, transform(features));
    result.setDataset(instances);
    return result;
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

public static Instance convert(Instance input, Instances src, Instances dst) {
    Instance result = new Instance(dst.numAttributes());
    result.setDataset(dst);
    for (int i = 0; i < dst.numAttributes(); ++i) {
        Attribute srcAttr = src.attribute(dst.attribute(i).name());
        if (srcAttr.isNumeric()) {
            double val = input.value(srcAttr);
            result.setValue(i, val);
        } else {/*www .  j  a  va  2  s  .co  m*/
            String val = input.stringValue(srcAttr);
            result.setValue(i, val);
        }
    }
    return result;
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

@Override
public void loadExamples(File f) throws IOException {
    ArffLoader l = new ArffLoader();
    l.setFile(f);/*from w w  w  .  ja v  a2s  .  c o  m*/
    Instances structure = l.getStructure();
    Instance i;
    while ((i = l.getNextInstance(structure)) != null) {
        if (!instances.checkInstance(i)) {
            i = convert(i, structure, instances);
        } else {
            i.setDataset(instances);
        }
        if (instances.checkInstance(i)) {
            if (i.classValue() == 0) {
                i.setWeight(getRejectedWeight());
            }
            instances.add(i);
        } else {
            System.err.println("Ignoring incompatible instance");
        }
    }
    updateModel();
    tableModel.fireTableDataChanged();
}

From source file:qa.experiment.ProcessFeatureVector.java

public String trainAndPredict(String[] processNames, String question) throws Exception {
    FastVector fvWekaAttribute = generateWEKAFeatureVector(processNames);
    Instances trainingSet = new Instances("Rel", fvWekaAttribute, bowFeature.size() + 1);
    trainingSet.setClassIndex(bowFeature.size());

    int cnt = 0;/*from   www .  ja v a  2 s .com*/
    for (int i = 0; i < arrProcessFeature.size(); i++) {
        String[] names = arrProcessFeature.get(i).getProcessName().split("\\|");
        int sim = isNameFuzzyMatch(processNames, names);
        if (sim != -1) {
            // System.out.println("match " + arrProcessFeature.get(i).getProcessName());
            ArrayList<String> featureVector = arrProcessFeature.get(i).getFeatureVectors();
            for (int j = 0; j < featureVector.size(); j++) {
                Instance trainInstance = new Instance(bowFeature.size() + 1);
                String[] attrValues = featureVector.get(j).split("\t");
                // System.out.println(trainInstance.numAttributes());
                // System.out.println(fvWekaAttribute.size());
                for (int k = 0; k < bowFeature.size(); k++) {
                    trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(k),
                            Integer.parseInt(attrValues[k]));
                }
                trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(bowFeature.size()),
                        processNames[sim]);
                trainingSet.add(trainInstance);

                //System.out.println(cnt);
                cnt++;
            }
        }
    }

    Classifier cl = new NaiveBayes();
    cl.buildClassifier(trainingSet);
    Instance inst = new Instance(bowFeature.size() + 1);
    //String[] tokenArr = tokens.toArray(new String[tokens.size()]);
    for (int j = 0; j < bowFeature.size(); j++) {
        List<String> tokens = slem.tokenize(question);
        String[] tokArr = tokens.toArray(new String[tokens.size()]);
        int freq = getFrequency(bowFeature.get(j), tokArr);
        inst.setValue((Attribute) fvWekaAttribute.elementAt(j), freq);
    }

    inst.setDataset(trainingSet);
    int idxMax = ArrUtil.getIdxMax(cl.distributionForInstance(inst));
    return processNames[idxMax];
}

From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java

License:Open Source License

/**
 * Update the existing classifier with new instance. For online models, it directly updates. For offline learning models, it re-generates the model with updated training set
 *
 * @param instance/*from w  ww  . j av  a2  s. c o m*/
 */
public void updateClassifier(TrainingInstance instance) throws NadeefClassifierException {
    // transform training instance into real instance
    Instance wekaInstance = new Instance(numberOfAttributes);
    wekaInstance.setDataset(instances);
    // add values from old tuple
    for (Cell cell : instance.getDirtyTuple().getCells()) {
        if (isPermitted(cell.getColumn())) {
            if (isPermitted(cell.getColumn())) {
                if (cell.getValue() instanceof String) {
                    wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString());
                } else {
                    double doubleValue = Double.parseDouble(cell.getValue().toString());
                    wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue);
                }
            }
        }
    }

    // add new value, check its type from dirty value
    if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) {
        wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue());
    } else {
        double doubleValue = Double.parseDouble(instance.getUpdatedValue());
    }
    // add similarity
    wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore());
    // add class label
    wekaInstance.setValue(numberOfAttributes - 1, instance.getLabel().toString());

    updateClassifier(wekaInstance);
}