Example usage for weka.core Instance setDataset

List of usage examples for weka.core Instance setDataset

Introduction

In this page you can find the example usage for weka.core Instance setDataset.

Prototype

public void setDataset(Instances instances);

Source Link

Document

Sets the reference to the dataset.

Usage

From source file:predictors.HelixPredictor.java

License:Open Source License

/**
 * Analyzes a given segment and returns the TMH probability.
 * /*from   w  w w.  j a va  2 s . c  o  m*/
 * @param pssm
 * @param start
 * @param end
 * @return
 */
public double getSegmentProbability(Pssm pssm, int start, int end) {
    double tmhProbability = -1;

    try {
        Instance window = this.buildInstance(pssm, start, end);

        window.isMissing((Attribute) this.attributes.get(this.attributes.size() - 1));
        window.setDataset(this.dataset);

        tmhProbability = this.classifier.distributionForInstance(window)[Mappings.indexTmh];
        ;
    } catch (Exception e) {
        ErrorUtils.printError(HelixPredictor.class, "Prediction failed for segment (" + start + "-" + end + ")",
                e);

        return -1.0;
    }

    return tmhProbability;
}

From source file:predictors.TopologyPredictor.java

License:Open Source License

/**
 * Predicts the N-terminal topology for a given protein.
 * /*from  w  w  w .  j a v  a2 s  .  co m*/
 * @param protein
 * @param cutoff
 */
public void predict(Protein protein, double cutoff) {
    if (protein == null) {
        return;
    }
    if (protein.getPssm() == null) {
        return;
    }
    if (protein.getPrediction() == null) {
        return;
    }

    if (!protein.isPredTmp()) {
        return;
    }

    Pssm pssm = protein.getPssm();
    char[] prediction = protein.getPrediction();

    try {
        ArrayList<Segment> solSegments = findSegments(prediction);
        Instance instance = this.buildInstance(pssm, prediction, solSegments, 0);

        instance.isMissing((Attribute) this.attributes.get(this.attributes.size() - 1));
        instance.setDataset(this.dataset);

        double[] probabilities = this.classifier.distributionForInstance(instance);

        char top = Character.UNASSIGNED;

        protein.setTopologyRaw((int) (1000 * probabilities[TopologyPredictor.indexInside]));

        if (!protein.hasPredSigP() && probabilities[TopologyPredictor.indexInside] >= cutoff) {
            top = Mappings.intToTop(Mappings.indexInside);
        } else {
            top = Mappings.intToTop(Mappings.indexOutside);
        }

        for (int i = 0; i < prediction.length; ++i) {
            char type = prediction[i];

            if (Mappings.ssToInt(type) == Mappings.indexNotTmh) {
                prediction[i] = top;
            } else if (Mappings.ssToInt(type) == Mappings.indexTmh) {
                if (top == Mappings.intToTop(Mappings.indexInside)) {
                    top = Mappings.intToTop(Mappings.indexOutside);
                } else {
                    top = Mappings.intToTop(Mappings.indexInside);
                }

                while (i < prediction.length && type == prediction[i]) {
                    ++i;
                }

                --i;
            }
        }
    } catch (Exception e) {
        ErrorUtils.printError(TopologyPredictor.class, "Prediction failed for " + protein.getHeader(), e);

        return;
    }
}

From source file:predictors.TopologyPredictor.java

License:Open Source License

/**
 * Analyzes a given window and saves it in the database.
 * /*from   ww w. j a  va 2s  .  c o  m*/
 * @param pssm
 * @param structure
 * @param structureIndex
 * @param startPos
 */
private void addProteinToDatabse(Pssm pssm, char[] structure, int structureIndex, int startPos) {
    ArrayList<Segment> solSegments = findSegments(structure);
    Instance segment = this.buildInstance(pssm, structure, solSegments, startPos);

    segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex);

    segment.setDataset(this.dataset);

    this.dataset.add(segment);
}

From source file:preprocess.StringToWordVector.java

License:Open Source License

/**
 * Converts the instance w/o normalization.
 * /*from w ww .j  av a  2 s  .  c  o  m*/
 * @oaram instance the instance to convert
 * @param v
 * @return the conerted instance
 */
private int convertInstancewoDocNorm(Instance instance, FastVector v) {

    // Convert the instance into a sorted set of indexes
    TreeMap contained = new TreeMap();

    // Copy all non-converted attributes from input to output
    int firstCopy = 0;
    for (int i = 0; i < getInputFormat().numAttributes(); i++) {
        if (!m_SelectedRange.isInRange(i)) {
            if (getInputFormat().attribute(i).type() != Attribute.STRING) {
                // Add simple nominal and numeric attributes directly
                if (instance.value(i) != 0.0) {
                    contained.put(new Integer(firstCopy), new Double(instance.value(i)));
                }
            } else {
                if (instance.isMissing(i)) {
                    contained.put(new Integer(firstCopy), new Double(Instance.missingValue()));
                } else {

                    // If this is a string attribute, we have to first add
                    // this value to the range of possible values, then add
                    // its new internal index.
                    if (outputFormatPeek().attribute(firstCopy).numValues() == 0) {
                        // Note that the first string value in a
                        // SparseInstance doesn't get printed.
                        outputFormatPeek().attribute(firstCopy)
                                .addStringValue("Hack to defeat SparseInstance bug");
                    }
                    int newIndex = outputFormatPeek().attribute(firstCopy)
                            .addStringValue(instance.stringValue(i));
                    contained.put(new Integer(firstCopy), new Double(newIndex));
                }
            }
            firstCopy++;
        }
    }

    for (int j = 0; j < instance.numAttributes(); j++) {
        //if ((getInputFormat().attribute(j).type() == Attribute.STRING) 
        if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) {

            m_Tokenizer.tokenize(instance.stringValue(j));

            while (m_Tokenizer.hasMoreElements()) {
                String word = (String) m_Tokenizer.nextElement();
                if (this.m_lowerCaseTokens == true)
                    word = word.toLowerCase();
                word = m_Stemmer.stem(word);
                Integer index = (Integer) m_Dictionary.get(word);
                if (index != null) {
                    if (m_OutputCounts) { // Separate if here rather than two lines down to avoid hashtable lookup
                        Double count = (Double) contained.get(index);
                        if (count != null) {
                            contained.put(index, new Double(count.doubleValue() + 1.0));
                        } else {
                            contained.put(index, new Double(1));
                        }
                    } else {
                        contained.put(index, new Double(1));
                    }
                }
            }
        }
    }

    //Doing TFTransform
    if (m_TFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = Math.log(val + 1);
                contained.put(index, new Double(val));
            }
        }
    }

    //Doing IDFTransform
    if (m_IDFTransform == true) {
        Iterator it = contained.keySet().iterator();
        for (int i = 0; it.hasNext(); i++) {
            Integer index = (Integer) it.next();
            if (index.intValue() >= firstCopy) {
                double val = ((Double) contained.get(index)).doubleValue();
                val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]);
                contained.put(index, new Double(val));
            }
        }
    }

    // Convert the set to structures needed to create a sparse instance.
    double[] values = new double[contained.size()];
    int[] indices = new int[contained.size()];
    Iterator it = contained.keySet().iterator();
    for (int i = 0; it.hasNext(); i++) {
        Integer index = (Integer) it.next();
        Double value = (Double) contained.get(index);
        values[i] = value.doubleValue();
        indices[i] = index.intValue();
    }

    Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes());
    inst.setDataset(outputFormatPeek());

    v.addElement(inst);

    return firstCopy;
}

From source file:processes.ClusterProcess.java

private Instance toInstance(User user, Instances dataSet) {
    Instance tempInstance = new Instance(userPoints.numAttributes());
    tempInstance.setDataset(userPoints);
    String userDataString = "0" + user.getTasteString(LastFMDataHandler.getInitialTagCount());
    String[] dataArray = userDataString.split(",");
    for (int index = 0; index < dataArray.length; index++) {
        tempInstance.setValue(index, Integer.parseInt(dataArray[index]));
    }/*from   w ww.ja v a2 s .co  m*/
    return tempInstance;
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

protected Instance makeInstance(Map<String, Double> features) {
    Instance result = new Instance(1, transform(features));
    result.setDataset(instances);
    return result;
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

public static Instance convert(Instance input, Instances src, Instances dst) {
    Instance result = new Instance(dst.numAttributes());
    result.setDataset(dst);
    for (int i = 0; i < dst.numAttributes(); ++i) {
        Attribute srcAttr = src.attribute(dst.attribute(i).name());
        if (srcAttr.isNumeric()) {
            double val = input.value(srcAttr);
            result.setValue(i, val);
        } else {/*www .  j  a  va  2  s  .co  m*/
            String val = input.stringValue(srcAttr);
            result.setValue(i, val);
        }
    }
    return result;
}

From source file:put.semantic.fcanew.ml.WekaClassifier.java

@Override
public void loadExamples(File f) throws IOException {
    ArffLoader l = new ArffLoader();
    l.setFile(f);/*from w w  w  .  ja v  a2s  .  c o  m*/
    Instances structure = l.getStructure();
    Instance i;
    while ((i = l.getNextInstance(structure)) != null) {
        if (!instances.checkInstance(i)) {
            i = convert(i, structure, instances);
        } else {
            i.setDataset(instances);
        }
        if (instances.checkInstance(i)) {
            if (i.classValue() == 0) {
                i.setWeight(getRejectedWeight());
            }
            instances.add(i);
        } else {
            System.err.println("Ignoring incompatible instance");
        }
    }
    updateModel();
    tableModel.fireTableDataChanged();
}

From source file:qa.experiment.ProcessFeatureVector.java

public String trainAndPredict(String[] processNames, String question) throws Exception {
    FastVector fvWekaAttribute = generateWEKAFeatureVector(processNames);
    Instances trainingSet = new Instances("Rel", fvWekaAttribute, bowFeature.size() + 1);
    trainingSet.setClassIndex(bowFeature.size());

    int cnt = 0;/*from   www .  ja v a  2 s .com*/
    for (int i = 0; i < arrProcessFeature.size(); i++) {
        String[] names = arrProcessFeature.get(i).getProcessName().split("\\|");
        int sim = isNameFuzzyMatch(processNames, names);
        if (sim != -1) {
            // System.out.println("match " + arrProcessFeature.get(i).getProcessName());
            ArrayList<String> featureVector = arrProcessFeature.get(i).getFeatureVectors();
            for (int j = 0; j < featureVector.size(); j++) {
                Instance trainInstance = new Instance(bowFeature.size() + 1);
                String[] attrValues = featureVector.get(j).split("\t");
                // System.out.println(trainInstance.numAttributes());
                // System.out.println(fvWekaAttribute.size());
                for (int k = 0; k < bowFeature.size(); k++) {
                    trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(k),
                            Integer.parseInt(attrValues[k]));
                }
                trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(bowFeature.size()),
                        processNames[sim]);
                trainingSet.add(trainInstance);

                //System.out.println(cnt);
                cnt++;
            }
        }
    }

    Classifier cl = new NaiveBayes();
    cl.buildClassifier(trainingSet);
    Instance inst = new Instance(bowFeature.size() + 1);
    //String[] tokenArr = tokens.toArray(new String[tokens.size()]);
    for (int j = 0; j < bowFeature.size(); j++) {
        List<String> tokens = slem.tokenize(question);
        String[] tokArr = tokens.toArray(new String[tokens.size()]);
        int freq = getFrequency(bowFeature.get(j), tokArr);
        inst.setValue((Attribute) fvWekaAttribute.elementAt(j), freq);
    }

    inst.setDataset(trainingSet);
    int idxMax = ArrUtil.getIdxMax(cl.distributionForInstance(inst));
    return processNames[idxMax];
}

From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java

License:Open Source License

/**
 * Update the existing classifier with new instance. For online models, it directly updates. For offline learning models, it re-generates the model with updated training set
 *
 * @param instance/*from w  ww  . j av  a2  s. c o m*/
 */
public void updateClassifier(TrainingInstance instance) throws NadeefClassifierException {
    // transform training instance into real instance
    Instance wekaInstance = new Instance(numberOfAttributes);
    wekaInstance.setDataset(instances);
    // add values from old tuple
    for (Cell cell : instance.getDirtyTuple().getCells()) {
        if (isPermitted(cell.getColumn())) {
            if (isPermitted(cell.getColumn())) {
                if (cell.getValue() instanceof String) {
                    wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString());
                } else {
                    double doubleValue = Double.parseDouble(cell.getValue().toString());
                    wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue);
                }
            }
        }
    }

    // add new value, check its type from dirty value
    if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) {
        wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue());
    } else {
        double doubleValue = Double.parseDouble(instance.getUpdatedValue());
    }
    // add similarity
    wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore());
    // add class label
    wekaInstance.setValue(numberOfAttributes - 1, instance.getLabel().toString());

    updateClassifier(wekaInstance);
}