List of usage examples for weka.core Instance setDataset
public void setDataset(Instances instances);
From source file:predictors.HelixPredictor.java
License:Open Source License
/** * Analyzes a given segment and returns the TMH probability. * /*from w w w. j a va 2 s . c o m*/ * @param pssm * @param start * @param end * @return */ public double getSegmentProbability(Pssm pssm, int start, int end) { double tmhProbability = -1; try { Instance window = this.buildInstance(pssm, start, end); window.isMissing((Attribute) this.attributes.get(this.attributes.size() - 1)); window.setDataset(this.dataset); tmhProbability = this.classifier.distributionForInstance(window)[Mappings.indexTmh]; ; } catch (Exception e) { ErrorUtils.printError(HelixPredictor.class, "Prediction failed for segment (" + start + "-" + end + ")", e); return -1.0; } return tmhProbability; }
From source file:predictors.TopologyPredictor.java
License:Open Source License
/** * Predicts the N-terminal topology for a given protein. * /*from w w w . j a v a2 s . co m*/ * @param protein * @param cutoff */ public void predict(Protein protein, double cutoff) { if (protein == null) { return; } if (protein.getPssm() == null) { return; } if (protein.getPrediction() == null) { return; } if (!protein.isPredTmp()) { return; } Pssm pssm = protein.getPssm(); char[] prediction = protein.getPrediction(); try { ArrayList<Segment> solSegments = findSegments(prediction); Instance instance = this.buildInstance(pssm, prediction, solSegments, 0); instance.isMissing((Attribute) this.attributes.get(this.attributes.size() - 1)); instance.setDataset(this.dataset); double[] probabilities = this.classifier.distributionForInstance(instance); char top = Character.UNASSIGNED; protein.setTopologyRaw((int) (1000 * probabilities[TopologyPredictor.indexInside])); if (!protein.hasPredSigP() && probabilities[TopologyPredictor.indexInside] >= cutoff) { top = Mappings.intToTop(Mappings.indexInside); } else { top = Mappings.intToTop(Mappings.indexOutside); } for (int i = 0; i < prediction.length; ++i) { char type = prediction[i]; if (Mappings.ssToInt(type) == Mappings.indexNotTmh) { prediction[i] = top; } else if (Mappings.ssToInt(type) == Mappings.indexTmh) { if (top == Mappings.intToTop(Mappings.indexInside)) { top = Mappings.intToTop(Mappings.indexOutside); } else { top = Mappings.intToTop(Mappings.indexInside); } while (i < prediction.length && type == prediction[i]) { ++i; } --i; } } } catch (Exception e) { ErrorUtils.printError(TopologyPredictor.class, "Prediction failed for " + protein.getHeader(), e); return; } }
From source file:predictors.TopologyPredictor.java
License:Open Source License
/** * Analyzes a given window and saves it in the database. * /*from ww w. j a va 2s . c o m*/ * @param pssm * @param structure * @param structureIndex * @param startPos */ private void addProteinToDatabse(Pssm pssm, char[] structure, int structureIndex, int startPos) { ArrayList<Segment> solSegments = findSegments(structure); Instance segment = this.buildInstance(pssm, structure, solSegments, startPos); segment.setValue((Attribute) this.attributes.get(this.attributes.size() - 1), structureIndex); segment.setDataset(this.dataset); this.dataset.add(segment); }
From source file:preprocess.StringToWordVector.java
License:Open Source License
/** * Converts the instance w/o normalization. * /*from w ww .j av a 2 s . c o m*/ * @oaram instance the instance to convert * @param v * @return the conerted instance */ private int convertInstancewoDocNorm(Instance instance, FastVector v) { // Convert the instance into a sorted set of indexes TreeMap contained = new TreeMap(); // Copy all non-converted attributes from input to output int firstCopy = 0; for (int i = 0; i < getInputFormat().numAttributes(); i++) { if (!m_SelectedRange.isInRange(i)) { if (getInputFormat().attribute(i).type() != Attribute.STRING) { // Add simple nominal and numeric attributes directly if (instance.value(i) != 0.0) { contained.put(new Integer(firstCopy), new Double(instance.value(i))); } } else { if (instance.isMissing(i)) { contained.put(new Integer(firstCopy), new Double(Instance.missingValue())); } else { // If this is a string attribute, we have to first add // this value to the range of possible values, then add // its new internal index. if (outputFormatPeek().attribute(firstCopy).numValues() == 0) { // Note that the first string value in a // SparseInstance doesn't get printed. outputFormatPeek().attribute(firstCopy) .addStringValue("Hack to defeat SparseInstance bug"); } int newIndex = outputFormatPeek().attribute(firstCopy) .addStringValue(instance.stringValue(i)); contained.put(new Integer(firstCopy), new Double(newIndex)); } } firstCopy++; } } for (int j = 0; j < instance.numAttributes(); j++) { //if ((getInputFormat().attribute(j).type() == Attribute.STRING) if (m_SelectedRange.isInRange(j) && (instance.isMissing(j) == false)) { m_Tokenizer.tokenize(instance.stringValue(j)); while (m_Tokenizer.hasMoreElements()) { String word = (String) m_Tokenizer.nextElement(); if (this.m_lowerCaseTokens == true) word = word.toLowerCase(); word = m_Stemmer.stem(word); Integer index = (Integer) m_Dictionary.get(word); if (index != null) { if (m_OutputCounts) { // Separate if here rather than two lines down to avoid hashtable lookup Double count = (Double) contained.get(index); if (count != null) { contained.put(index, new Double(count.doubleValue() + 1.0)); } else { contained.put(index, new Double(1)); } } else { contained.put(index, new Double(1)); } } } } } //Doing TFTransform if (m_TFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = Math.log(val + 1); contained.put(index, new Double(val)); } } } //Doing IDFTransform if (m_IDFTransform == true) { Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); if (index.intValue() >= firstCopy) { double val = ((Double) contained.get(index)).doubleValue(); val = val * Math.log(m_NumInstances / (double) m_DocsCounts[index.intValue()]); contained.put(index, new Double(val)); } } } // Convert the set to structures needed to create a sparse instance. double[] values = new double[contained.size()]; int[] indices = new int[contained.size()]; Iterator it = contained.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { Integer index = (Integer) it.next(); Double value = (Double) contained.get(index); values[i] = value.doubleValue(); indices[i] = index.intValue(); } Instance inst = new SparseInstance(instance.weight(), values, indices, outputFormatPeek().numAttributes()); inst.setDataset(outputFormatPeek()); v.addElement(inst); return firstCopy; }
From source file:processes.ClusterProcess.java
private Instance toInstance(User user, Instances dataSet) { Instance tempInstance = new Instance(userPoints.numAttributes()); tempInstance.setDataset(userPoints); String userDataString = "0" + user.getTasteString(LastFMDataHandler.getInitialTagCount()); String[] dataArray = userDataString.split(","); for (int index = 0; index < dataArray.length; index++) { tempInstance.setValue(index, Integer.parseInt(dataArray[index])); }/*from w ww.ja v a2 s .co m*/ return tempInstance; }
From source file:put.semantic.fcanew.ml.WekaClassifier.java
protected Instance makeInstance(Map<String, Double> features) { Instance result = new Instance(1, transform(features)); result.setDataset(instances); return result; }
From source file:put.semantic.fcanew.ml.WekaClassifier.java
public static Instance convert(Instance input, Instances src, Instances dst) { Instance result = new Instance(dst.numAttributes()); result.setDataset(dst); for (int i = 0; i < dst.numAttributes(); ++i) { Attribute srcAttr = src.attribute(dst.attribute(i).name()); if (srcAttr.isNumeric()) { double val = input.value(srcAttr); result.setValue(i, val); } else {/*www . j a va 2 s .co m*/ String val = input.stringValue(srcAttr); result.setValue(i, val); } } return result; }
From source file:put.semantic.fcanew.ml.WekaClassifier.java
@Override public void loadExamples(File f) throws IOException { ArffLoader l = new ArffLoader(); l.setFile(f);/*from w w w . ja v a2s . c o m*/ Instances structure = l.getStructure(); Instance i; while ((i = l.getNextInstance(structure)) != null) { if (!instances.checkInstance(i)) { i = convert(i, structure, instances); } else { i.setDataset(instances); } if (instances.checkInstance(i)) { if (i.classValue() == 0) { i.setWeight(getRejectedWeight()); } instances.add(i); } else { System.err.println("Ignoring incompatible instance"); } } updateModel(); tableModel.fireTableDataChanged(); }
From source file:qa.experiment.ProcessFeatureVector.java
public String trainAndPredict(String[] processNames, String question) throws Exception { FastVector fvWekaAttribute = generateWEKAFeatureVector(processNames); Instances trainingSet = new Instances("Rel", fvWekaAttribute, bowFeature.size() + 1); trainingSet.setClassIndex(bowFeature.size()); int cnt = 0;/*from www . ja v a 2 s .com*/ for (int i = 0; i < arrProcessFeature.size(); i++) { String[] names = arrProcessFeature.get(i).getProcessName().split("\\|"); int sim = isNameFuzzyMatch(processNames, names); if (sim != -1) { // System.out.println("match " + arrProcessFeature.get(i).getProcessName()); ArrayList<String> featureVector = arrProcessFeature.get(i).getFeatureVectors(); for (int j = 0; j < featureVector.size(); j++) { Instance trainInstance = new Instance(bowFeature.size() + 1); String[] attrValues = featureVector.get(j).split("\t"); // System.out.println(trainInstance.numAttributes()); // System.out.println(fvWekaAttribute.size()); for (int k = 0; k < bowFeature.size(); k++) { trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(k), Integer.parseInt(attrValues[k])); } trainInstance.setValue((Attribute) fvWekaAttribute.elementAt(bowFeature.size()), processNames[sim]); trainingSet.add(trainInstance); //System.out.println(cnt); cnt++; } } } Classifier cl = new NaiveBayes(); cl.buildClassifier(trainingSet); Instance inst = new Instance(bowFeature.size() + 1); //String[] tokenArr = tokens.toArray(new String[tokens.size()]); for (int j = 0; j < bowFeature.size(); j++) { List<String> tokens = slem.tokenize(question); String[] tokArr = tokens.toArray(new String[tokens.size()]); int freq = getFrequency(bowFeature.get(j), tokArr); inst.setValue((Attribute) fvWekaAttribute.elementAt(j), freq); } inst.setDataset(trainingSet); int idxMax = ArrUtil.getIdxMax(cl.distributionForInstance(inst)); return processNames[idxMax]; }
From source file:qa.qcri.nadeef.core.utils.classification.ClassifierBase.java
License:Open Source License
/** * Update the existing classifier with new instance. For online models, it directly updates. For offline learning models, it re-generates the model with updated training set * * @param instance/*from w ww . j av a2 s. c o m*/ */ public void updateClassifier(TrainingInstance instance) throws NadeefClassifierException { // transform training instance into real instance Instance wekaInstance = new Instance(numberOfAttributes); wekaInstance.setDataset(instances); // add values from old tuple for (Cell cell : instance.getDirtyTuple().getCells()) { if (isPermitted(cell.getColumn())) { if (isPermitted(cell.getColumn())) { if (cell.getValue() instanceof String) { wekaInstance.setValue(attributeIndex.get(cell.getColumn()), cell.getValue().toString()); } else { double doubleValue = Double.parseDouble(cell.getValue().toString()); wekaInstance.setValue(attributeIndex.get(cell.getColumn()), doubleValue); } } } } // add new value, check its type from dirty value if (instance.getDirtyTuple().getCell(instance.getAttribute()).getValue() instanceof String) { wekaInstance.setValue(numberOfAttributes - 3, instance.getUpdatedValue()); } else { double doubleValue = Double.parseDouble(instance.getUpdatedValue()); } // add similarity wekaInstance.setValue(numberOfAttributes - 2, instance.getSimilarityScore()); // add class label wekaInstance.setValue(numberOfAttributes - 1, instance.getLabel().toString()); updateClassifier(wekaInstance); }