List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.ClusteringTask.java
License:Open Source License
/** * Creates the weka data set for clustering of variables (metabolites) * * @param rawData/* w w w .j a v a 2 s . c o m*/ * Data extracted from selected Raw data files and rows. * @return Weka library data set */ private Instances createVariableWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < this.selectedRawDataFiles.length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < selectedRows.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringStep.getModule().getClass().equals(HierarClusterer.class)) { DecimalFormat twoDForm = new DecimalFormat("#.##"); double MZ = Double.valueOf(twoDForm.format(selectedRows[i].getAverageMZ())); double RT = Double.valueOf(twoDForm.format(selectedRows[i].getAverageRT())); String rowName = "MZ->" + MZ + "/RT->" + RT; values[data.numAttributes() - 1] = data.attribute("name").addStringValue(rowName); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
From source file:netkit.classifiers.nonrelational.LocalWeka.java
License:Apache License
/** * Induce the weka classifier by creating a training Instances object according * to the schema of the nodes to be classified. * * @param graph Graph whose nodes are to be estimated * @param split The split between training and test. Used to get the nodetype and class attribute. */// w ww . ja v a 2 s.co m public void induceModel(Graph graph, DataSplit split) { super.induceModel(graph, split); Node[] trainingSet = split.getTrainSet(); // return if no training is to be done. if (trainingSet == null || trainingSet.length == 0) return; // Create a FastVector of the possible values of the class attribute FastVector clsValues = new FastVector(attribute.size()); for (String token : attribute.getTokens()) clsValues.addElement(token); // Create the array that defines the attributes. We do not include the 'key' attribute Attributes attribs = trainingSet[0].getAttributes(); FastVector attInfo = new FastVector(attribs.attributeCount() - 1); for (Attribute attrib : attribs) { // do not include the KEY attribute if (attrib == attribs.getKey()) continue; if (attrib.getType() == Type.CATEGORICAL) { String[] tokens = ((AttributeCategorical) attrib).getTokens(); FastVector values = new FastVector(tokens.length); for (String token : tokens) values.addElement(token); attInfo.addElement(new weka.core.Attribute(attrib.getName(), values)); } else attInfo.addElement(new weka.core.Attribute(attrib.getName())); } // Create the training Instances object + set the class attribute index Instances train = new Instances("train", attInfo, split.getTrainSetSize()); train.setClassIndex(vectorClsIdx); // Create the training instance objects for (Node node : split.getTrainSet()) { double[] v = new double[attInfo.size()]; makeVector(node, v); train.add(new Instance(1, v)); } // Finally induce the weka classifier try { classifier.buildClassifier(train); } catch (Exception e) { throw new RuntimeException("Failed to build classifier " + classifier.getClass().getName(), e); } // Now set up the test environment. It is a test Instances object containing // only a single test instance. We also keep a reference to the double array // that represents the attribute values. cVector = new double[attInfo.size()]; testInstance = new Instance(1, cVector); testInstances = new Instances("test", attInfo, 1); testInstances.setClassIndex(vectorClsIdx); testInstances.add(testInstance); testInstance = testInstances.firstInstance(); }
From source file:netkit.classifiers.relational.NetworkWeka.java
License:Apache License
/** * Induce the weka classifier by creating a training Instances object according * to the schema of the nodes to be classified. * * @param graph Graph whose nodes are to be estimated * @param split The split between training and test. Used to get the nodetype and class attribute. */// w w w. ja v a 2 s .c o m public void induceModel(Graph graph, DataSplit split) { super.induceModel(graph, split); Node[] trainingSet = split.getTrainSet(); if (trainingSet == null || trainingSet.length == 0) return; Attributes attribs = trainingSet[0].getAttributes(); FastVector attInfo = new FastVector(tmpVector.length); logger.finer("Setting up WEKA attributes"); if (useIntrinsic) { for (Attribute attrib : attribs) { // do not include the KEY attribute if (attrib == attribs.getKey()) continue; switch (attrib.getType()) { case CATEGORICAL: String[] tokens = ((AttributeCategorical) attrib).getTokens(); FastVector values = new FastVector(tokens.length); for (String token : tokens) values.addElement(token); attInfo.addElement(new weka.core.Attribute(attrib.getName(), values)); logger.finer("Adding WEKA attribute " + attrib.getName() + ":Categorical"); break; default: attInfo.addElement(new weka.core.Attribute(attrib.getName())); logger.finer("Adding WEKA attribute " + attrib.getName() + ":Numerical"); break; } } } else { String[] tokens = attribute.getTokens(); FastVector values = new FastVector(tokens.length); for (String token : tokens) values.addElement(token); attInfo.addElement(new weka.core.Attribute(attribute.getName(), values)); logger.finer("Adding WEKA attribute " + attribute.getName() + ":Categorical"); } for (Aggregator agg : aggregators) { Attribute attrib = agg.getAttribute(); switch (agg.getType()) { case CATEGORICAL: String[] tokens = ((AttributeCategorical) attrib).getTokens(); FastVector values = new FastVector(tokens.length); for (String token : tokens) values.addElement(token); attInfo.addElement(new weka.core.Attribute(agg.getName(), values)); logger.finer("Adding WEKA attribute " + agg.getName() + ":Categorical"); break; default: attInfo.addElement(new weka.core.Attribute(agg.getName())); logger.finer("Adding WEKA attribute " + agg.getName() + ":Numerical"); break; } } Instances train = new Instances("train", attInfo, split.getTrainSetSize()); train.setClassIndex(vectorClsIdx); for (Node node : split.getTrainSet()) { double[] v = new double[attInfo.size()]; makeVector(node, v); train.add(new Instance(1, v)); } try { classifier.buildClassifier(train); } catch (Exception e) { throw new RuntimeException("Failed to build classifier " + classifier.getClass().getName(), e); } testInstance = new Instance(1, tmpVector); testInstances = new Instances("test", attInfo, 1); testInstances.setClassIndex(vectorClsIdx); testInstances.add(testInstance); testInstance = testInstances.firstInstance(); }
From source file:news.classifier.WekaLearner.java
public double classifyInstance(double[] instance) throws Exception { wClassifier.buildClassifier(wTrainingSet); Instances ins = new Instances(wTrainingSet, 0); Instance row = new DenseInstance(1.0, instance); ins.add(row); return wClassifier.classifyInstance(ins.lastInstance()); }
From source file:nl.uva.sne.commons.ClusterUtils.java
private static Instances createInstances(String inDir) throws Exception { List<Term> terms = dir2Terms(inDir); Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Create documents"); List<List<String>> allDocs = new ArrayList<>(); Map<String, List<String>> docs = new HashMap<>(); for (Term tv : terms) { try {//from www . j av a2 s . c o m Set<String> doc = SemanticUtils.getDocument(tv); allDocs.add(new ArrayList<>(doc)); docs.put(tv.getUID(), new ArrayList<>(doc)); } catch (JWNLException ex) { Logger.getLogger(ClusterUtils.class.getName()).log(Level.SEVERE, null, ex); } } Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Extract features"); Set<String> allWords = new HashSet<>(); Map<String, Map<String, Double>> featureVectors = new HashMap<>(); for (String k : docs.keySet()) { List<String> doc = docs.get(k); Map<String, Double> featureVector = new TreeMap<>(); for (String term : doc) { allWords.add(term); if (!featureVector.containsKey(term)) { double score = SemanticUtils.tfIdf(doc, allDocs, term); featureVector.put(term, score); } } featureVectors.put(k, featureVector); } // for (String t : featureVectors.keySet()) { // Map<String, Double> featureV = featureVectors.get(t); // for (String word : allWords) { // if (!featureV.containsKey(word)) { // featureV.put(word, 0.0); // } // } // System.err.println(t+" "+featureV.size()); // featureVectors.put(t, featureV); // } ArrayList<Attribute> attributes = new ArrayList<>(); attributes.add(new Attribute("UID", (ArrayList<String>) null)); for (String t : allWords) { attributes.add(new Attribute(t)); } Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Create Instances"); Instances data = new Instances("Rel", attributes, terms.size()); for (String t : featureVectors.keySet()) { Map<String, Double> featureV = featureVectors.get(t); double[] vals = new double[data.numAttributes()]; vals[0] = data.attribute(0).addStringValue(t); int index = 1; for (String w : featureV.keySet()) { vals[index] = featureV.get(w); index++; } data.add(new DenseInstance(1.0, vals)); } Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Normalize vectors"); Normalize filter = new Normalize(); filter.setInputFormat(data); data = Filter.useFilter(data, filter); return data; }
From source file:nl.uva.sne.commons.ClusterUtils.java
private static Instances createInstancesWithClasses(String inDir) throws IOException, ParseException, Exception { File dir = new File(inDir); File[] classFolders = dir.listFiles(); List<List<String>> allDocs = new ArrayList<>(); Map<String, List<String>> docs = new HashMap<>(); Set<String> classes = new HashSet<>(); for (File f : classFolders) { if (f.isDirectory()) { List<Term> terms = dir2Terms(f.getAbsolutePath()); classes.add(f.getName());/*from w w w . j a v a2 s .c o m*/ for (Term tv : terms) { Set<String> doc = SemanticUtils.getDocument(tv); allDocs.add(new ArrayList<>(doc)); docs.put(tv.getUID() + "," + f.getName(), new ArrayList<>(doc)); } } else { List<Term> terms = new ArrayList<>(); if (FilenameUtils.getExtension(f.getName()).endsWith("json")) { terms.add(TermFactory.create(new FileReader(f))); } classes.add("NON"); for (Term tv : terms) { Set<String> doc = SemanticUtils.getDocument(tv); allDocs.add(new ArrayList<>(doc)); docs.put(tv.getUID() + "," + "NON", new ArrayList<>(doc)); // docs.put(tv.getUID(), new ArrayList<>(doc)); } } } Set<String> allWords = new HashSet<>(); Map<String, Map<String, Double>> featureVectors = new HashMap<>(); for (String k : docs.keySet()) { List<String> doc = docs.get(k); Map<String, Double> featureVector = new TreeMap<>(); for (String term : doc) { allWords.add(term); if (!featureVector.containsKey(term)) { double score = SemanticUtils.tfIdf(doc, allDocs, term); featureVector.put(term, score); } } featureVectors.put(k, featureVector); } for (String t : featureVectors.keySet()) { Map<String, Double> featureV = featureVectors.get(t); for (String word : allWords) { if (!featureV.containsKey(word)) { featureV.put(word, 0.0); } } // System.err.println(t + " " + featureV.size()); featureVectors.put(t, featureV); } ArrayList<Attribute> attributes = buildAttributes(allWords, classes); Instances data = new Instances("Rel", attributes, docs.size()); data.setClassIndex(data.numAttributes() - 1); for (String t : featureVectors.keySet()) { String[] parts = t.split(","); String id = parts[0]; String theClass = parts[parts.length - 1]; int index = 0; double[] vals = new double[data.numAttributes()]; vals[index] = data.attribute(0).addStringValue(id); index++; Map<String, Double> featureV = featureVectors.get(t); for (String w : featureV.keySet()) { vals[index] = featureV.get(w); index++; } DenseInstance inst = new DenseInstance(1.0, vals); inst.setDataset(data); inst.setClassValue(theClass); data.add(inst); } return data; }
From source file:nlpmusic.StringClusterer.java
public static Instances listLoad(ArrayList<String> list) { FastVector attributes = new FastVector(); attributes.addElement(new Attribute("attr", (FastVector) null)); Instances datas = new Instances("Strings", attributes, 0); for (String str : list) { DenseInstance inst = new DenseInstance(1); inst.setValue(datas.attribute(0), str); datas.add(inst); }/*from w w w . ja v a2 s. c o m*/ return datas; }
From source file:OAT.trading.classification.Weka.java
License:Open Source License
private Instances getInstances(List<TrainingSample> trainingSet) { Instances data = new Instances("trainingSet", attributes, 0); for (TrainingSample trainingSample : trainingSet) { double[] vars = Arrays.copyOf(trainingSample.getInputVector(), attributes.size()); int classIndex = attributes.size() - 1; vars[classIndex] = (Double) trainingSample.getDesiredOutput() < 0.5 ? classes.indexOf("0") : classes.indexOf("1"); data.add(new Instance(1.0, vars)); }//from ww w.j a va2 s . c o m data.setClassIndex(attributes.size() - 1); return data; }
From source file:OAT.trading.classification.Weka.java
License:Open Source License
private Instances getInstances(InputSample input) { Instances data = new Instances("inputSet", attributes, 0); double[] vars = Arrays.copyOf(input.getInputVector(), attributes.size()); int classIndex = attributes.size() - 1; vars[classIndex] = classes.indexOf("0"); data.add(new Instance(1.0, vars)); data.setClassIndex(attributes.size() - 1); return data;/* www .j a v a 2 s. co m*/ }
From source file:OnTheFlyMethods.FastImplementations.RedefinedWeightedNodePruning.java
License:Open Source License
protected boolean verifyValidEntities(int entityId, int xxx, List<AbstractBlock> newBlocks, ExecuteBlockComparisons ebc, Instances trainingInstances) { int index;/*from www.j a v a 2 s . co m*/ retainedNeighbors.clear(); if (!cleanCleanER) { // for (int neighborId : validEntities) { // if (isValidComparison(entityId, neighborId,ebc)) { // totalComparisons++; // duplicatePropagation.isSuperfluous(getComparison(entityId, neighborId)); // } // } } else { if (entityId < datasetLimit) { // //Iterator<Integer> temp = validEntitiesB.iterator(); int size = validEntities.size(); Iterator<Integer> it = validEntitiesB.iterator(); for (int neighborId : validEntities) { Integer value = map.get(entityId); if (value != null && value == neighborId) { // System.out.println("----"); continue; } value = map.get(neighborId); if (value != null && value == entityId) { // System.out.println("----"); continue; } map.put(entityId, neighborId); // if(entityId==1178 && neighborId==2562) // System.out.println("ok"); // // index=temp.next(); int blockIndex = it.next(); if (isValidComparison(entityId, neighborId, ebc)) { totalComparisons++; duplicatePropagation.isSuperfluous(getComparison(entityId, neighborId)); // if(apagar++%1000==0) // System.out.println(apagar); Comparison c; if (entityId < datasetLimit) c = new Comparison(true, entityId, neighborId - datasetLimit); else c = new Comparison(true, entityId - datasetLimit, neighborId); final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(blockIndex, c); if (commonBlockIndices == null) continue; // if(!retainedEntitiesD1.contains(comparison.getEntityId1())) // retainedEntitiesD1.add(comparison.getEntityId1()); // if(!retainedEntitiesD2.contains(comparison.getEntityId2())) // retainedEntitiesD2.add(comparison.getEntityId2()); //////////////////////////// // if(c.getEntityId1()==1 && c.getEntityId2()==12088) // System.out.println(); double[] instanceValues = new double[8]; // int entityId2 = comparison.getEntityId2() + entityIndex.getDatasetLimit(); double ibf1 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(c.getEntityId1(), 0)); double ibf2 = Math.log(noOfBlocks / entityIndex.getNoOfEntityBlocks(c.getEntityId2(), 1)); instanceValues[0] = commonBlockIndices.size() * ibf1 * ibf2; double raccb = 0; for (Integer index1 : commonBlockIndices) { raccb += 1.0 / comparisonsPerBlock[index1]; } if (raccb < 1.0E-6) { raccb = 1.0E-6; } instanceValues[1] = raccb; String temp = Integer.toString(entityId) + "00" + Integer.toString(neighborId - datasetLimit); instanceValues[2] = commonBlockIndices.size() / (redundantCPE[c.getEntityId1()] + redundantCPE[c.getEntityId2()] - commonBlockIndices.size()); instanceValues[3] = nonRedundantCPE[c.getEntityId1()]; instanceValues[4] = nonRedundantCPE[c.getEntityId2()]; // instanceValues[5] = ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2()); instanceValues[5] = getWeight(entityId, neighborId, ebc); instanceValues[6] = (Math.sqrt( Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) * getWeight(entityId, neighborId, ebc); instanceValues[7] = adp.isSuperfluous(getComparison(entityId, neighborId)) ? 1 : 0; Instance newInstance = new DenseInstance(1.0, instanceValues); newInstance.setDataset(trainingInstances); trainingInstances.add(newInstance); } } } else { Iterator<Integer> it = validEntitiesB.iterator(); for (int neighborId : validEntities) { Integer value = map.get(entityId); if (value != null && value == neighborId) { // System.out.println("----"); continue; } value = map.get(neighborId); if (value != null && value == entityId) { // System.out.println("----"); continue; } map.put(entityId, neighborId); int blockIndex = it.next(); // if (isValidComparison(entityId, neighborId,ebc)) { // totalComparisons++; // duplicatePropagation.isSuperfluous(getComparison(entityId, neighborId)); // // if(apagar++%1000==0) // // System.out.println(apagar); // // // // if(apagar==3) // // System.out.println(); // // // Comparison c ; // if(entityId<datasetLimit) // c = new Comparison(true, entityId, neighborId-datasetLimit); // else // c = new Comparison(true, entityId-datasetLimit, neighborId); // final List<Integer> commonBlockIndices = entityIndex.getCommonBlockIndices(blockIndex, c); // if(commonBlockIndices==null) // continue; // // if(!retainedEntitiesD1.contains(comparison.getEntityId1())) // // retainedEntitiesD1.add(comparison.getEntityId1()); // // if(!retainedEntitiesD2.contains(comparison.getEntityId2())) // // retainedEntitiesD2.add(comparison.getEntityId2()); // //////////////////////////// // double[] instanceValues = new double[8]; // // // int entityId2 = comparison.getEntityId2() + entityIndex.getDatasetLimit(); // // double ibf1 = Math.log(noOfBlocks/entityIndex.getNoOfEntityBlocks(entityId, 0)); // double ibf2 = Math.log(noOfBlocks/entityIndex.getNoOfEntityBlocks(neighborId-datasetLimit, 1)); // // instanceValues[0] = commonBlockIndices.size()*ibf1*ibf2; // // double raccb = 0; // for (Integer index1 : commonBlockIndices) { // raccb += 1.0 / comparisonsPerBlock[index1]; // } // if (raccb < 1.0E-6) { // raccb = 1.0E-6; // } // instanceValues[1] = raccb; // // instanceValues[2] = commonBlockIndices.size() / (redundantCPE[c.getEntityId1()] + redundantCPE[neighborId-datasetLimit] - commonBlockIndices.size()); // instanceValues[3] = nonRedundantCPE[entityId]; // instanceValues[4] = nonRedundantCPE[neighborId-datasetLimit]; // instanceValues[5]= (Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) * getWeight(entityId, neighborId, ebc); //// instanceValues[5] = ebc.getSimilarityAttribute(c.getEntityId1(), c.getEntityId2()); // instanceValues[5]= getWeight(entityId, neighborId, ebc); // instanceValues[6]= (Math.sqrt(Math.pow(averageWeight[entityId], 2) + Math.pow(averageWeight[neighborId], 2)) / 4) * getWeight(entityId, neighborId, ebc); // // instanceValues[7] = adp.isSuperfluous(getComparison(entityId, neighborId))?1:0; // // Instance newInstance = new DenseInstance(1.0, instanceValues); // newInstance.setDataset(trainingInstances); // trainingInstances.add(newInstance); // //return true; // } } } } return false; }