List of usage examples for weka.core Instances attribute
publicAttribute attribute(String name)
From source file:cn.edu.xmu.dm.d3c.clustering.SimpleKMeans.java
License:Open Source License
/** * Move the centroid to it's new coordinates. Generate the centroid coordinates based * on it's members (objects assigned to the cluster of the centroid) and the distance * function being used./*from w w w . j a v a 2 s .c o m*/ * @param centroidIndex index of the centroid which the coordinates will be computed * @param members the objects that are assigned to the cluster of this centroid * @param updateClusterInfo if the method is supposed to update the m_Cluster arrays * @return the centroid coordinates */ protected double[] moveCentroid(int centroidIndex, Instances members, boolean updateClusterInfo) { double[] vals = new double[members.numAttributes()]; //used only for Manhattan Distance Instances sortedMembers = null; int middle = 0; boolean dataIsEven = false; if (m_DistanceFunction instanceof ManhattanDistance) { middle = (members.numInstances() - 1) / 2; dataIsEven = ((members.numInstances() % 2) == 0); if (m_PreserveOrder) { sortedMembers = members; } else { sortedMembers = new Instances(members); } } for (int j = 0; j < members.numAttributes(); j++) { //in case of Euclidian distance the centroid is the mean point //in case of Manhattan distance the centroid is the median point //in both cases, if the attribute is nominal, the centroid is the mode if (m_DistanceFunction instanceof EuclideanDistance || members.attribute(j).isNominal()) { vals[j] = members.meanOrMode(j); } else if (m_DistanceFunction instanceof ManhattanDistance) { //singleton special case if (members.numInstances() == 1) { vals[j] = members.instance(0).value(j); } else { sortedMembers.kthSmallestValue(j, middle + 1); vals[j] = sortedMembers.instance(middle).value(j); if (dataIsEven) { sortedMembers.kthSmallestValue(j, middle + 2); vals[j] = (vals[j] + sortedMembers.instance(middle + 1).value(j)) / 2; } } } if (updateClusterInfo) { m_ClusterMissingCounts[centroidIndex][j] = members.attributeStats(j).missingCount; m_ClusterNominalCounts[centroidIndex][j] = members.attributeStats(j).nominalCounts; if (members.attribute(j).isNominal()) { if (m_ClusterMissingCounts[centroidIndex][j] > m_ClusterNominalCounts[centroidIndex][j][Utils .maxIndex(m_ClusterNominalCounts[centroidIndex][j])]) { vals[j] = Utils.missingValue(); // mark mode as missing } } else { if (m_ClusterMissingCounts[centroidIndex][j] == members.numInstances()) { vals[j] = Utils.missingValue(); // mark mean as missing } } } } if (updateClusterInfo) m_ClusterCentroids.add(new DenseInstance(1.0, vals)); return vals; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
protected Instances runExp(Instances samplePoints, int round, String postfix, boolean resuming) { Instances retval = null; try {/*from w w w . j av a 2s. c o m*/ //DataIOFile.saveDataToArffFile("data/zyqTestRange.arff", samplePoints); if (resuming) { samplePoints = manager.collectPerfs(samplePoints, perfAttName); } retval = manager.runExp(samplePoints, perfAttName); //we output the result set for future debugging and testing purposes DataIOFile.saveDataToArffFile("data/trainingBestConf" + round + "_" + postfix + ".arff", samplePoints); //evict all bad configurations Attribute perfAtt = retval.attribute(perfAttName); Iterator<Instance> itr = retval.iterator(); ArrayList<Integer> toRemove = new ArrayList<Integer>(); Instance next; while (itr.hasNext()) { next = itr.next(); if (next.value(perfAtt) == -1) toRemove.add(retval.indexOf(next)); } while (!toRemove.isEmpty()) retval.remove(toRemove.remove(0)); } catch (IOException e) { e.printStackTrace(); } if (allInstances == null) { allInstances = new Instances(retval); } else { allInstances.addAll(retval); } return retval; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double minDis; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDis = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 100)) / 100.0, minDis);//www.j av a2 s. c om } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDis, lower = center.value(i) - minDis; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java
License:Open Source License
public static ArrayList<String> preprocessInstances(Instances retval) { double[][] cMatrix; ArrayList<String> result = new ArrayList<String>(); ArrayList<String> deleteAttNames = new ArrayList<String>(); PrincipalComponents pc = new PrincipalComponents(); HashMap<Integer, ArrayList<Integer>> filter = new HashMap<Integer, ArrayList<Integer>>(); try {//from w ww .j av a2s. c o m pc.buildEvaluator(retval); cMatrix = pc.getCorrelationMatrix(); for (int i = 0; i < cMatrix.length; i++) { ArrayList<Integer> record = new ArrayList<Integer>(); for (int j = i + 1; j < cMatrix.length; j++) if (cMatrix[i][j] >= correlationFactorThreshold || cMatrix[i][j] <= -correlationFactorThreshold) { record.add(j); } if (record.size() != 0) { filter.put(i, record); } } Iterator<Map.Entry<Integer, ArrayList<Integer>>> iter = filter.entrySet().iterator(); while (iter.hasNext()) { Map.Entry<Integer, ArrayList<Integer>> entry = iter.next(); ArrayList<Integer> arr = entry.getValue(); for (int i = 0; i < arr.size(); i++) if (arr.get(i) != cMatrix.length - 1 && !deleteAttNames.contains(retval.attribute(arr.get(i)).name())) { deleteAttNames.add(retval.attribute(arr.get(i)).name()); } if (arr.contains(cMatrix.length - 1)) { result.add(retval.attribute(Integer.parseInt(entry.getKey().toString())).name()); } } for (int i = 0; i < deleteAttNames.size(); i++) { retval.deleteAttributeAt(retval.attribute(deleteAttNames.get(i)).index()); } } catch (Exception e) { e.printStackTrace(); } return result; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java
License:Open Source License
private static ArrayList<Attribute> scaleDownNeighbordists(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = -1; if (previousSet.attribute(PerformanceAttName) != null) pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double[] minDists = new double[2]; double val; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDists[0] = 1 - Double.MAX_VALUE; minDists[1] = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) { val = ins.value(i) - center.value(i); if (val < 0) minDists[0] = Math.max((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDists[0]); else minDists[1] = Math.min((double) ((int) ((ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDists[1]); }/*from www .j ava2s . c o m*/ } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDists[1], lower = center.value(i) + minDists[0]; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.bestConf.sampler.ConfigSampler.java
License:Open Source License
private static ArrayList<Attribute> scaleDownMindists(Instances previousSet, Instance center) { ArrayList<Attribute> localAtts = new ArrayList<Attribute>(); int attNum = center.numAttributes(); int pos = previousSet.attribute(PerformanceAttName).index(); //traverse each dimension Enumeration<Instance> enu; double minDis; for (int i = 0; i < attNum; i++) { if (i == pos) continue; enu = previousSet.enumerateInstances(); minDis = Double.MAX_VALUE; while (enu.hasMoreElements()) { Instance ins = enu.nextElement(); if (!ins.equals(center)) minDis = Math.min((double) ((int) (Math.abs(ins.value(i) - center.value(i)) * 1000)) / 1000.0, minDis);//from w w w .j av a 2s. c om } //now we set the range Properties p1 = new Properties(); double upper = center.value(i) + minDis, lower = center.value(i) - minDis; TreeSet<Double> detourSet = new TreeSet<Double>(); detourSet.add(upper); detourSet.add(lower); detourSet.add(previousSet.attribute(i).getUpperNumericBound()); detourSet.add(previousSet.attribute(i).getLowerNumericBound()); switch (detourSet.size()) { case 1: upper = lower = detourSet.first(); break; case 2: upper = detourSet.last(); lower = detourSet.first(); break; case 3: upper = lower = detourSet.higher(detourSet.first()); break; default://case 4: upper = detourSet.lower(detourSet.last()); lower = detourSet.higher(detourSet.first()); break; } p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]"); ProtectedProperties prop1 = new ProtectedProperties(p1); localAtts.add(new Attribute(previousSet.attribute(i).name(), prop1)); } return localAtts; }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
public Instances runExp(Instances samplePoints, String perfAttName) { Instances retVal = null;/*from w w w .java2 s.co m*/ if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); } int pos = samplePoints.numInstances(); int count = 0; for (int i = 0; i < pos; i++) { Instance ins = samplePoints.get(i); HashMap hm = new HashMap(); int tot = 0; for (int j = 0; j < ins.numAttributes(); j++) { hm.put(ins.attribute(j).name(), ins.value(ins.attribute(j))); } boolean testRet; if (Double.isNaN(ins.value(ins.attribute(ins.numAttributes() - 1)))) { testRet = this.startTest(hm, i, isInterrupt); double y = 0; if (!testRet) {// the setting does not work, we skip it y = -1; count++; if (count >= targetTestErrorNum) { System.out.println( "There must be somthing wrong with the system. Please check and restart....."); System.exit(1); } } else { y = getPerformanceByType(performanceType); count = 0; } ins.setValue(samplePoints.numAttributes() - 1, y); writePerfstoFile(ins); } else { continue; } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); return retVal; }
From source file:cn.ict.zyq.bestConf.cluster.Main.AutoTestAdjust.java
License:Open Source License
@Override public Instances collectPerfs(Instances samplePoints, String perfAttName) { Instances retVal = null;/*from www. j a va 2 s. co m*/ if (samplePoints.attribute(perfAttName) == null) { Attribute performance = new Attribute(perfAttName); samplePoints.insertAttributeAt(performance, samplePoints.numAttributes()); } File perfFolder = new File(perfsfilepath); int tot = 0; if (perfFolder.exists()) { //let's get all the name set for the sample points Iterator<Instance> itr = samplePoints.iterator(); TreeSet<String> insNameSet = new TreeSet<String>(); HashMap<String, Integer> mapping = new HashMap<String, Integer>(); int pos = 0; while (itr.hasNext()) { String mdstr = getMD5(itr.next()); insNameSet.add(mdstr); mapping.put(mdstr, new Integer(pos++)); } //now we collect File[] perfFiles = perfFolder.listFiles(new PerfsFileFilter(insNameSet)); tot = perfFiles.length; if (tot > 0) isInterrupt = true; for (int i = 0; i < tot; i++) { Instance ins = samplePoints.get(mapping.get(perfFiles[i].getName())); double[] results = getPerf(perfFiles[i].getAbsolutePath()); if (results != null) { ins.setValue(samplePoints.numAttributes() - 1, results[0]); } } } retVal = samplePoints; retVal.setClassIndex(retVal.numAttributes() - 1); System.out.println("Total number of collected performances is : " + tot); return retVal; }
From source file:com.dhamacher.sentimentanalysis4tweets.preprocessing.TweetClassifier.java
License:Apache License
/** * Method that converts a text message into an instance. * * @param text the message content to convert * @param data the header information// ww w. j a va 2s . c o m * @return the generated Instance */ private Instance makeInstance(String text, Instances data) { Instance instance = new Instance(2); Attribute messageAtt = data.attribute("content"); instance.setValue(messageAtt, messageAtt.addStringValue(text)); instance.setDataset(data); return instance; }
From source file:com.emar.recsys.user.model.WekaExperiment.java
License:Open Source License
/** * Expects the following parameters:/* w w w . j av a 2s . c o m*/ * <ul> * <li>-classifier "classifier incl. parameters"</li> * <li>-exptype "classification|regression"</li> * <li>-splittype "crossvalidation|randomsplit"</li> * <li>-runs "# of runs"</li> * <li>-folds "# of cross-validation folds"</li> * <li>-percentage "percentage for randomsplit"</li> * <li>-result "arff file for storing the results"</li> * <li>-t "dataset" (can be supplied multiple times)</li> * </ul> * * @param args * the commandline arguments * @throws Exception * if something goes wrong */ public static void main(String[] args) throws Exception { // parameters provided? if (args.length == 0) { System.out.println("\nUsage: ExperimentDemo\n" + "\t -classifier <classifier incl. parameters>\n" + "\t -exptype <classification|regression>\n" + "\t -splittype <crossvalidation|randomsplit>\n" + "\t -runs <# of runs>\n" + "\t -folds <folds for CV>\n" + "\t -percentage <percentage for randomsplit>\n" + "\t -result <ARFF file for storing the results>\n" + "\t -t dataset (can be supplied multiple times)\n"); System.exit(1); } // 1. setup the experiment System.out.println("Setting up..."); Experiment exp = new Experiment(); exp.setPropertyArray(new Classifier[0]); exp.setUsePropertyIterator(true); String option; // classification or regression option = Utils.getOption("exptype", args); if (option.length() == 0) throw new IllegalArgumentException("No experiment type provided!"); SplitEvaluator se = null; Classifier sec = null; boolean classification = false; if (option.equals("classification")) { classification = true; se = new ClassifierSplitEvaluator(); sec = ((ClassifierSplitEvaluator) se).getClassifier(); } else if (option.equals("regression")) { se = new RegressionSplitEvaluator(); sec = ((RegressionSplitEvaluator) se).getClassifier(); } else { throw new IllegalArgumentException("Unknown experiment type '" + option + "'!"); } // crossvalidation or randomsplit option = Utils.getOption("splittype", args); if (option.length() == 0) throw new IllegalArgumentException("No split type provided!"); if (option.equals("crossvalidation")) { CrossValidationResultProducer cvrp = new CrossValidationResultProducer(); option = Utils.getOption("folds", args); if (option.length() == 0) throw new IllegalArgumentException("No folds provided!"); cvrp.setNumFolds(Integer.parseInt(option)); cvrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class), CrossValidationResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(cvrp); exp.setPropertyPath(propertyPath); } else if (option.equals("randomsplit")) { RandomSplitResultProducer rsrp = new RandomSplitResultProducer(); rsrp.setRandomizeData(true); option = Utils.getOption("percentage", args); if (option.length() == 0) throw new IllegalArgumentException("No percentage provided!"); rsrp.setTrainPercent(Double.parseDouble(option)); rsrp.setSplitEvaluator(se); PropertyNode[] propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class), RandomSplitResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(rsrp); exp.setPropertyPath(propertyPath); } else { throw new IllegalArgumentException("Unknown split type '" + option + "'!"); } // runs option = Utils.getOption("runs", args); if (option.length() == 0) throw new IllegalArgumentException("No runs provided!"); exp.setRunLower(1); exp.setRunUpper(Integer.parseInt(option)); // classifier option = Utils.getOption("classifier", args); if (option.length() == 0) throw new IllegalArgumentException("No classifier provided!"); String[] options = Utils.splitOptions(option); String classname = options[0]; options[0] = ""; Classifier c = (Classifier) Utils.forName(Classifier.class, classname, options); exp.setPropertyArray(new Classifier[] { c }); // datasets boolean data = false; DefaultListModel model = new DefaultListModel(); do { option = Utils.getOption("t", args); if (option.length() > 0) { File file = new File(option); if (!file.exists()) throw new IllegalArgumentException("File '" + option + "' does not exist!"); data = true; model.addElement(file); } } while (option.length() > 0); if (!data) throw new IllegalArgumentException("No data files provided!"); exp.setDatasets(model); // result option = Utils.getOption("result", args); if (option.length() == 0) throw new IllegalArgumentException("No result file provided!"); InstancesResultListener irl = new InstancesResultListener(); irl.setOutputFile(new File(option)); exp.setResultListener(irl); // 2. run experiment System.out.println("Initializing..."); exp.initialize(); System.out.println("Running..."); exp.runExperiment(); System.out.println("Finishing..."); exp.postProcess(); // 3. calculate statistics and output them System.out.println("Evaluating..."); PairedTTester tester = new PairedCorrectedTTester(); Instances result = new Instances(new BufferedReader(new FileReader(irl.getOutputFile()))); tester.setInstances(result); tester.setSortColumn(-1); tester.setRunColumn(result.attribute("Key_Run").index()); if (classification) tester.setFoldColumn(result.attribute("Key_Fold").index()); tester.setResultsetKeyColumns(new Range("" + (result.attribute("Key_Dataset").index() + 1))); tester.setDatasetKeyColumns(new Range("" + (result.attribute("Key_Scheme").index() + 1) + "," + (result.attribute("Key_Scheme_options").index() + 1) + "," + (result.attribute("Key_Scheme_version_ID").index() + 1))); tester.setResultMatrix(new ResultMatrixPlainText()); tester.setDisplayedResultsets(null); tester.setSignificanceLevel(0.05); tester.setShowStdDevs(true); // fill result matrix (but discarding the output) if (classification) tester.multiResultsetFull(0, result.attribute("Percent_correct").index()); else tester.multiResultsetFull(0, result.attribute("Correlation_coefficient").index()); // output results for reach dataset System.out.println("\nResult:"); ResultMatrix matrix = tester.getResultMatrix(); for (int i = 0; i < matrix.getColCount(); i++) { System.out.println(matrix.getColName(i)); System.out.println(" Perc. correct: " + matrix.getMean(i, 0)); System.out.println(" StdDev: " + matrix.getStdDev(i, 0)); } }