List of usage examples for weka.core.neighboursearch LinearNNSearch LinearNNSearch
public LinearNNSearch()
From source file:MultiLabelKNN.java
License:Open Source License
protected void buildInternal(MultiLabelInstances trainSet) throws Exception { if (trainSet.getNumInstances() < numOfNeighbors) { throw new IllegalArgumentException( "The number of training instances is less than the number of requested nearest neighbours"); }/* w ww . jav a 2 s . c om*/ train = new Instances(trainSet.getDataSet()); // label attributes don't influence distance estimation String labelIndicesString = ""; for (int i = 0; i < numLabels - 1; i++) { labelIndicesString += (labelIndices[i] + 1) + ","; } labelIndicesString += (labelIndices[numLabels - 1] + 1); dfunc.setAttributeIndices(labelIndicesString); dfunc.setInvertSelection(true); lnn = new LinearNNSearch(); lnn.setDistanceFunction(dfunc); lnn.setInstances(train); lnn.setMeasurePerformance(false); }
From source file:adams.flow.transformer.WekaNearestNeighborSearch.java
License:Open Source License
/** * Adds options to the internal list of options. *//* ww w . j a va 2s .co m*/ @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("search", "search", new LinearNNSearch()); m_OptionManager.add("max-neighbors", "maxNeighbors", 10, 1, null); m_OptionManager.add("storage", "storage", new StorageName()); m_OptionManager.add("var-name", "variableName", new VariableName()); }
From source file:com.edwardraff.WekaMNIST.java
License:Open Source License
public static void main(String[] args) throws IOException, Exception { String folder = args[0];//from www .jav a 2s .c om String trainPath = folder + "MNISTtrain.arff"; String testPath = folder + "MNISTtest.arff"; System.out.println("Weka Timings"); Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath)))); mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1); Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath)))); mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1); //normalize range like into [0, 1] Normalize normalizeFilter = new Normalize(); normalizeFilter.setInputFormat(mnistTrainWeka); mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter); mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter); long start, end; System.out.println("RBF SVM (Full Cache)"); SMO smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("RBF SVM (No Cache)"); smo = new SMO(); smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625)); smo.setC(8.0); smo.setBuildLogisticModels(false); evalModel(smo, mnistTrainWeka, mnistTestWeka); System.out.println("Decision Tree C45"); J48 wekaC45 = new J48(); wekaC45.setUseLaplace(false); wekaC45.setCollapseTree(false); wekaC45.setUnpruned(true); wekaC45.setMinNumObj(2); wekaC45.setUseMDLcorrection(true); evalModel(wekaC45, mnistTrainWeka, mnistTestWeka); System.out.println("Random Forest 50 trees"); int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way RandomForest wekaRF = new RandomForest(); wekaRF.setNumExecutionSlots(1); wekaRF.setMaxDepth(0/*0 for unlimited*/); wekaRF.setNumFeatures(featuresToUse); wekaRF.setNumTrees(50); evalModel(wekaRF, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (brute)"); IBk wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Ball Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("1-NN (Cover Tree)"); wekaNN = new IBk(1); wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree()); wekaNN.setCrossValidate(false); evalModel(wekaNN, mnistTrainWeka, mnistTestWeka); System.out.println("Logistic Regression LBFGS lambda = 1e-4"); Logistic logisticLBFGS = new Logistic(); logisticLBFGS.setRidge(1e-4); logisticLBFGS.setMaxIts(500); evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka); System.out.println("k-means (Loyd)"); int origClassIndex = mnistTrainWeka.classIndex(); mnistTrainWeka.setClassIndex(-1); mnistTrainWeka.deleteAttributeAt(origClassIndex); { long totalTime = 0; for (int i = 0; i < 10; i++) { SimpleKMeans wekaKMeans = new SimpleKMeans(); wekaKMeans.setNumClusters(10); wekaKMeans.setNumExecutionSlots(1); wekaKMeans.setFastDistanceCalc(true); start = System.currentTimeMillis(); wekaKMeans.buildClusterer(mnistTrainWeka); end = System.currentTimeMillis(); totalTime += (end - start); } System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average"); } }
From source file:hurtowniedanych.FXMLController.java
public void trainAndTestKNN() throws FileNotFoundException, IOException, Exception { InstanceQuery instanceQuery = new InstanceQuery(); instanceQuery.setUsername("postgres"); instanceQuery.setPassword("szupek"); instanceQuery.setCustomPropsFile(new File("./src/data/DatabaseUtils.props")); // Wskazanie pliku z ustawieniami dla PostgreSQL String query = "select ks.wydawnictwo,ks.gatunek, kl.mia-sto\n" + "from zakupy z,ksiazki ks,klienci kl\n" + "where ks.id_ksiazka=z.id_ksiazka and kl.id_klient=z.id_klient"; instanceQuery.setQuery(query);// www . j a va 2s . co m Instances data = instanceQuery.retrieveInstances(); data.setClassIndex(data.numAttributes() - 1); data.randomize(new Random()); double percent = 70.0; int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances trainData = new Instances(data, 0, trainSize); Instances testData = new Instances(data, trainSize, testSize); int lSasiadow = Integer.parseInt(textFieldKnn.getText()); System.out.println(lSasiadow); IBk ibk = new IBk(lSasiadow); // Ustawienie odleglosci EuclideanDistance euclidean = new EuclideanDistance(); // euklidesowej ManhattanDistance manhatan = new ManhattanDistance(); // miejska LinearNNSearch linearNN = new LinearNNSearch(); if (comboboxOdleglosc.getSelectionModel().getSelectedItem().equals("Manhatan")) { linearNN.setDistanceFunction(manhatan); } else { linearNN.setDistanceFunction(euclidean); } ibk.setNearestNeighbourSearchAlgorithm(linearNN); // ustawienie sposobu szukania sasiadow // Tworzenie klasyfikatora ibk.buildClassifier(trainData); Evaluation eval = new Evaluation(trainData); eval.evaluateModel(ibk, testData); spr.setVisible(true); labelKnn.setVisible(true); labelOdleglosc.setVisible(true); labelKnn.setText(textFieldKnn.getText()); labelOdleglosc.setText(comboboxOdleglosc.getSelectionModel().getSelectedItem().toString()); spr.setText(eval.toSummaryString("Wynik:", true)); }
From source file:jjj.asap.sas.models1.job.BuildCosineModels.java
License:Open Source License
@Override protected void run() throws Exception { // validate args if (!Bucket.isBucket("datasets", inputBucket)) { throw new FileNotFoundException(inputBucket); }/* w ww.j av a2s .c om*/ if (!Bucket.isBucket("models", outputBucket)) { throw new FileNotFoundException(outputBucket); } // init multi-threading Job.startService(); final Queue<Future<Object>> queue = new LinkedList<Future<Object>>(); // get the input from the bucket List<String> names = Bucket.getBucketItems("datasets", this.inputBucket); for (String dsn : names) { int essaySet = Contest.getEssaySet(dsn); int k = -1; switch (essaySet) { case 3: k = 13; break; case 5: case 7: k = 55; break; case 2: case 6: case 10: k = 21; break; case 1: case 4: case 8: case 9: k = 34; break; } if (k == -1) { throw new IllegalArgumentException("not k defined for " + essaySet); } LinearNNSearch search = new LinearNNSearch(); search.setDistanceFunction(new CosineDistance()); search.setSkipIdentical(false); IBk knn = new IBk(); knn.setKNN(k); knn.setDistanceWeighting(INVERSE); knn.setNearestNeighbourSearchAlgorithm(search); queue.add(Job.submit(new ModelBuilder(dsn, "KNN-" + k, knn, this.outputBucket))); } // wait on complete Progress progress = new Progress(queue.size(), this.getClass().getSimpleName()); while (!queue.isEmpty()) { try { queue.remove().get(); } catch (Exception e) { Job.log("ERROR", e.toString()); e.printStackTrace(System.err); } progress.tick(); } progress.done(); Job.stopService(); }
From source file:mulan.classifier.lazy.MultiLabelKNN.java
License:Open Source License
protected void buildInternal(MultiLabelInstances trainSet) throws Exception { train = new Instances(trainSet.getDataSet()); dfunc = new EuclideanDistance(); dfunc.setDontNormalize(dontNormalize); // label attributes don't influence distance estimation String labelIndicesString = ""; for (int i = 0; i < numLabels - 1; i++) { labelIndicesString += (labelIndices[i] + 1) + ","; }/*ww w . j a v a 2 s . c o m*/ labelIndicesString += (labelIndices[numLabels - 1] + 1); dfunc.setAttributeIndices(labelIndicesString); dfunc.setInvertSelection(true); lnn = new LinearNNSearch(); lnn.setDistanceFunction(dfunc); lnn.setInstances(train); lnn.setMeasurePerformance(false); }
From source file:mulan.classifier.transformation.MultiLabelStacking.java
License:Open Source License
/** * Used only in case of a kNN base classifier. * * @param trainingSet// ww w . j a v a 2 s. c o m * @throws Exception */ public void buildBaseLevelKNN(MultiLabelInstances trainingSet) throws Exception { train = new Instances(trainingSet.getDataSet()); EuclideanDistance dfunc = new EuclideanDistance(); dfunc.setDontNormalize(false); // label attributes don't influence distance estimation String labelIndicesString = ""; for (int i = 0; i < numLabels - 1; i++) { labelIndicesString += (labelIndices[i] + 1) + ","; } labelIndicesString += (labelIndices[numLabels - 1] + 1); dfunc.setAttributeIndices(labelIndicesString); dfunc.setInvertSelection(true); lnn = new LinearNNSearch(); lnn.setSkipIdentical(true); lnn.setDistanceFunction(dfunc); lnn.setInstances(train); lnn.setMeasurePerformance(false); // initialize the table holding the predictions of the first level // classifiers for each label for every instance of the training set baseLevelPredictions = new double[train.numInstances()][numLabels]; int numOfNeighbors = ((IBk) baseClassifier).getKNN(); /* * /old way using brknn * brknn = new BRkNN(numOfNeighbors); * brknn.setDebug(true); brknn.build(trainingSet); for (int i = 0; i < * train.numInstances(); i++) { MultiLabelOutput prediction = * brknn.makePrediction(train.instance(i)); baseLevelPredictions[i] = * prediction.getConfidences(); } */ // new way for (int i = 0; i < train.numInstances(); i++) { Instances knn = new Instances(lnn.kNearestNeighbours(train.instance(i), numOfNeighbors)); // Get the label confidence vector as the additional features. for (int j = 0; j < numLabels; j++) { // compute sum of counts for each label in KNN double count_for_label_j = 0; for (int k = 0; k < numOfNeighbors; k++) { String value = train.attribute(labelIndices[j]) .value((int) knn.instance(k).value(labelIndices[j])); if (value.equals("1")) { count_for_label_j++; } } baseLevelPredictions[i][j] = count_for_label_j / numOfNeighbors; } } }
From source file:test.org.moa.opencl.IBk.java
License:Open Source License
/** * Parses a given list of options. <p/> * <!-- options-start -->/*from w w w . j a v a2s. co m*/ * Valid options are: <p/> * * <pre> -I * Weight neighbours by the inverse of their distance * (use when k > 1)</pre> * * <pre> -F * Weight neighbours by 1 - their distance * (use when k > 1)</pre> * * <pre> -K <number of neighbors> * Number of nearest neighbours (k) used in classification. * (Default = 1)</pre> * * <pre> -E * Minimise mean squared error rather than mean absolute * error when using -X option with numeric prediction.</pre> * * <pre> -W <window size> * Maximum number of training instances maintained. * Training instances are dropped FIFO. (Default = no window)</pre> * * <pre> -X * Select the number of nearest neighbours between 1 * and the k value specified using hold-one-out evaluation * on the training data (use when k > 1)</pre> * * <pre> -A * The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch). * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String knnString = Utils.getOption('K', options); if (knnString.length() != 0) { setKNN(Integer.parseInt(knnString)); } else { setKNN(1); } String windowString = Utils.getOption('W', options); if (windowString.length() != 0) { setWindowSize(Integer.parseInt(windowString)); } else { setWindowSize(0); } if (Utils.getFlag('I', options)) { setDistanceWeighting(new SelectedTag(WEIGHT_INVERSE, TAGS_WEIGHTING)); } else if (Utils.getFlag('F', options)) { setDistanceWeighting(new SelectedTag(WEIGHT_SIMILARITY, TAGS_WEIGHTING)); } else { setDistanceWeighting(new SelectedTag(WEIGHT_NONE, TAGS_WEIGHTING)); } setCrossValidate(Utils.getFlag('X', options)); setMeanSquared(Utils.getFlag('E', options)); String nnSearchClass = Utils.getOption('A', options); if (nnSearchClass.length() != 0) { String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass); if (nnSearchClassSpec.length == 0) { throw new Exception("Invalid NearestNeighbourSearch algorithm " + "specification string."); } String className = nnSearchClassSpec[0]; nnSearchClassSpec[0] = ""; setNearestNeighbourSearchAlgorithm((NearestNeighbourSearch) Utils.forName(NearestNeighbourSearch.class, className, nnSearchClassSpec)); } else this.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); Utils.checkForRemainingOptions(options); }