Example usage for weka.core.neighboursearch LinearNNSearch LinearNNSearch

List of usage examples for weka.core.neighboursearch LinearNNSearch LinearNNSearch

Introduction

In this page you can find the example usage for weka.core.neighboursearch LinearNNSearch LinearNNSearch.

Prototype

public LinearNNSearch() 

Source Link

Document

Constructor.

Usage

From source file:MultiLabelKNN.java

License:Open Source License

protected void buildInternal(MultiLabelInstances trainSet) throws Exception {
    if (trainSet.getNumInstances() < numOfNeighbors) {
        throw new IllegalArgumentException(
                "The number of training instances is less than the number of requested nearest neighbours");
    }/* w ww  . jav  a 2  s  . c om*/
    train = new Instances(trainSet.getDataSet());

    // label attributes don't influence distance estimation
    String labelIndicesString = "";
    for (int i = 0; i < numLabels - 1; i++) {
        labelIndicesString += (labelIndices[i] + 1) + ",";
    }
    labelIndicesString += (labelIndices[numLabels - 1] + 1);
    dfunc.setAttributeIndices(labelIndicesString);
    dfunc.setInvertSelection(true);

    lnn = new LinearNNSearch();
    lnn.setDistanceFunction(dfunc);
    lnn.setInstances(train);
    lnn.setMeasurePerformance(false);
}

From source file:adams.flow.transformer.WekaNearestNeighborSearch.java

License:Open Source License

/**
 * Adds options to the internal list of options.
 *//* ww  w . j  a  va 2s .co m*/
@Override
public void defineOptions() {
    super.defineOptions();

    m_OptionManager.add("search", "search", new LinearNNSearch());

    m_OptionManager.add("max-neighbors", "maxNeighbors", 10, 1, null);

    m_OptionManager.add("storage", "storage", new StorageName());

    m_OptionManager.add("var-name", "variableName", new VariableName());
}

From source file:com.edwardraff.WekaMNIST.java

License:Open Source License

public static void main(String[] args) throws IOException, Exception {
    String folder = args[0];//from  www  .jav a 2s  .c  om
    String trainPath = folder + "MNISTtrain.arff";
    String testPath = folder + "MNISTtest.arff";

    System.out.println("Weka Timings");
    Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath))));
    mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1);
    Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath))));
    mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1);

    //normalize range like into [0, 1]
    Normalize normalizeFilter = new Normalize();
    normalizeFilter.setInputFormat(mnistTrainWeka);

    mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter);
    mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter);

    long start, end;

    System.out.println("RBF SVM (Full Cache)");
    SMO smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("RBF SVM (No Cache)");
    smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("Decision Tree C45");
    J48 wekaC45 = new J48();
    wekaC45.setUseLaplace(false);
    wekaC45.setCollapseTree(false);
    wekaC45.setUnpruned(true);
    wekaC45.setMinNumObj(2);
    wekaC45.setUseMDLcorrection(true);

    evalModel(wekaC45, mnistTrainWeka, mnistTestWeka);

    System.out.println("Random Forest 50 trees");
    int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way

    RandomForest wekaRF = new RandomForest();
    wekaRF.setNumExecutionSlots(1);
    wekaRF.setMaxDepth(0/*0 for unlimited*/);
    wekaRF.setNumFeatures(featuresToUse);
    wekaRF.setNumTrees(50);

    evalModel(wekaRF, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (brute)");
    IBk wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Ball Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Cover Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("Logistic Regression LBFGS lambda = 1e-4");
    Logistic logisticLBFGS = new Logistic();
    logisticLBFGS.setRidge(1e-4);
    logisticLBFGS.setMaxIts(500);

    evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka);

    System.out.println("k-means (Loyd)");
    int origClassIndex = mnistTrainWeka.classIndex();
    mnistTrainWeka.setClassIndex(-1);
    mnistTrainWeka.deleteAttributeAt(origClassIndex);
    {
        long totalTime = 0;
        for (int i = 0; i < 10; i++) {
            SimpleKMeans wekaKMeans = new SimpleKMeans();
            wekaKMeans.setNumClusters(10);
            wekaKMeans.setNumExecutionSlots(1);
            wekaKMeans.setFastDistanceCalc(true);

            start = System.currentTimeMillis();
            wekaKMeans.buildClusterer(mnistTrainWeka);
            end = System.currentTimeMillis();
            totalTime += (end - start);
        }
        System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average");
    }
}

From source file:hurtowniedanych.FXMLController.java

public void trainAndTestKNN() throws FileNotFoundException, IOException, Exception {

    InstanceQuery instanceQuery = new InstanceQuery();
    instanceQuery.setUsername("postgres");
    instanceQuery.setPassword("szupek");
    instanceQuery.setCustomPropsFile(new File("./src/data/DatabaseUtils.props")); // Wskazanie pliku z ustawieniami dla PostgreSQL

    String query = "select ks.wydawnictwo,ks.gatunek, kl.mia-sto\n" + "from zakupy z,ksiazki ks,klienci kl\n"
            + "where ks.id_ksiazka=z.id_ksiazka and kl.id_klient=z.id_klient";

    instanceQuery.setQuery(query);//  www .  j  a  va 2s . co  m
    Instances data = instanceQuery.retrieveInstances();
    data.setClassIndex(data.numAttributes() - 1);

    data.randomize(new Random());
    double percent = 70.0;
    int trainSize = (int) Math.round(data.numInstances() * percent / 100);
    int testSize = data.numInstances() - trainSize;
    Instances trainData = new Instances(data, 0, trainSize);
    Instances testData = new Instances(data, trainSize, testSize);

    int lSasiadow = Integer.parseInt(textFieldKnn.getText());
    System.out.println(lSasiadow);

    IBk ibk = new IBk(lSasiadow);

    // Ustawienie odleglosci
    EuclideanDistance euclidean = new EuclideanDistance(); // euklidesowej
    ManhattanDistance manhatan = new ManhattanDistance(); // miejska  

    LinearNNSearch linearNN = new LinearNNSearch();

    if (comboboxOdleglosc.getSelectionModel().getSelectedItem().equals("Manhatan")) {
        linearNN.setDistanceFunction(manhatan);
    } else {
        linearNN.setDistanceFunction(euclidean);
    }

    ibk.setNearestNeighbourSearchAlgorithm(linearNN); // ustawienie sposobu szukania sasiadow

    // Tworzenie klasyfikatora
    ibk.buildClassifier(trainData);

    Evaluation eval = new Evaluation(trainData);
    eval.evaluateModel(ibk, testData);
    spr.setVisible(true);
    labelKnn.setVisible(true);
    labelOdleglosc.setVisible(true);
    labelKnn.setText(textFieldKnn.getText());
    labelOdleglosc.setText(comboboxOdleglosc.getSelectionModel().getSelectedItem().toString());
    spr.setText(eval.toSummaryString("Wynik:", true));
}

From source file:jjj.asap.sas.models1.job.BuildCosineModels.java

License:Open Source License

@Override
protected void run() throws Exception {

    // validate args
    if (!Bucket.isBucket("datasets", inputBucket)) {
        throw new FileNotFoundException(inputBucket);
    }/* w ww.j  av a2s  .c  om*/
    if (!Bucket.isBucket("models", outputBucket)) {
        throw new FileNotFoundException(outputBucket);
    }

    // init multi-threading
    Job.startService();
    final Queue<Future<Object>> queue = new LinkedList<Future<Object>>();

    // get the input from the bucket
    List<String> names = Bucket.getBucketItems("datasets", this.inputBucket);
    for (String dsn : names) {

        int essaySet = Contest.getEssaySet(dsn);

        int k = -1;
        switch (essaySet) {

        case 3:
            k = 13;
            break;
        case 5:
        case 7:
            k = 55;
            break;
        case 2:
        case 6:
        case 10:
            k = 21;
            break;
        case 1:
        case 4:
        case 8:
        case 9:
            k = 34;
            break;
        }

        if (k == -1) {
            throw new IllegalArgumentException("not k defined for " + essaySet);
        }

        LinearNNSearch search = new LinearNNSearch();
        search.setDistanceFunction(new CosineDistance());
        search.setSkipIdentical(false);

        IBk knn = new IBk();
        knn.setKNN(k);
        knn.setDistanceWeighting(INVERSE);
        knn.setNearestNeighbourSearchAlgorithm(search);

        queue.add(Job.submit(new ModelBuilder(dsn, "KNN-" + k, knn, this.outputBucket)));
    }

    // wait on complete
    Progress progress = new Progress(queue.size(), this.getClass().getSimpleName());
    while (!queue.isEmpty()) {
        try {
            queue.remove().get();
        } catch (Exception e) {
            Job.log("ERROR", e.toString());
            e.printStackTrace(System.err);
        }
        progress.tick();
    }
    progress.done();
    Job.stopService();

}

From source file:mulan.classifier.lazy.MultiLabelKNN.java

License:Open Source License

protected void buildInternal(MultiLabelInstances trainSet) throws Exception {
    train = new Instances(trainSet.getDataSet());

    dfunc = new EuclideanDistance();
    dfunc.setDontNormalize(dontNormalize);

    // label attributes don't influence distance estimation
    String labelIndicesString = "";
    for (int i = 0; i < numLabels - 1; i++) {
        labelIndicesString += (labelIndices[i] + 1) + ",";
    }/*ww w  . j  a  v  a  2  s . c o m*/
    labelIndicesString += (labelIndices[numLabels - 1] + 1);
    dfunc.setAttributeIndices(labelIndicesString);
    dfunc.setInvertSelection(true);

    lnn = new LinearNNSearch();
    lnn.setDistanceFunction(dfunc);
    lnn.setInstances(train);
    lnn.setMeasurePerformance(false);
}

From source file:mulan.classifier.transformation.MultiLabelStacking.java

License:Open Source License

/**
 * Used only in case of a kNN base classifier.
 *
 * @param trainingSet//  ww w .  j a  v a  2 s.  c  o  m
 * @throws Exception
 */
public void buildBaseLevelKNN(MultiLabelInstances trainingSet) throws Exception {
    train = new Instances(trainingSet.getDataSet());
    EuclideanDistance dfunc = new EuclideanDistance();
    dfunc.setDontNormalize(false);

    // label attributes don't influence distance estimation
    String labelIndicesString = "";
    for (int i = 0; i < numLabels - 1; i++) {
        labelIndicesString += (labelIndices[i] + 1) + ",";
    }
    labelIndicesString += (labelIndices[numLabels - 1] + 1);
    dfunc.setAttributeIndices(labelIndicesString);
    dfunc.setInvertSelection(true);

    lnn = new LinearNNSearch();
    lnn.setSkipIdentical(true);
    lnn.setDistanceFunction(dfunc);
    lnn.setInstances(train);
    lnn.setMeasurePerformance(false);
    // initialize the table holding the predictions of the first level
    // classifiers for each label for every instance of the training set
    baseLevelPredictions = new double[train.numInstances()][numLabels];
    int numOfNeighbors = ((IBk) baseClassifier).getKNN();

    /*
     * /old way using brknn
     * brknn = new BRkNN(numOfNeighbors);
     * brknn.setDebug(true); brknn.build(trainingSet); for (int i = 0; i <
     * train.numInstances(); i++) { MultiLabelOutput prediction =
     * brknn.makePrediction(train.instance(i)); baseLevelPredictions[i] =
     * prediction.getConfidences(); }
     */

    // new way
    for (int i = 0; i < train.numInstances(); i++) {
        Instances knn = new Instances(lnn.kNearestNeighbours(train.instance(i), numOfNeighbors));

        // Get the label confidence vector as the additional features.

        for (int j = 0; j < numLabels; j++) {
            // compute sum of counts for each label in KNN
            double count_for_label_j = 0;
            for (int k = 0; k < numOfNeighbors; k++) {
                String value = train.attribute(labelIndices[j])
                        .value((int) knn.instance(k).value(labelIndices[j]));
                if (value.equals("1")) {
                    count_for_label_j++;
                }
            }
            baseLevelPredictions[i][j] = count_for_label_j / numOfNeighbors;
        }
    }

}

From source file:test.org.moa.opencl.IBk.java

License:Open Source License

/**
 * Parses a given list of options. <p/>
 *
 <!-- options-start -->/*from w  w  w  .  j a v  a2s. co  m*/
 * Valid options are: <p/>
 * 
 * <pre> -I
 *  Weight neighbours by the inverse of their distance
 *  (use when k &gt; 1)</pre>
 * 
 * <pre> -F
 *  Weight neighbours by 1 - their distance
 *  (use when k &gt; 1)</pre>
 * 
 * <pre> -K &lt;number of neighbors&gt;
 *  Number of nearest neighbours (k) used in classification.
 *  (Default = 1)</pre>
 * 
 * <pre> -E
 *  Minimise mean squared error rather than mean absolute
 *  error when using -X option with numeric prediction.</pre>
 * 
 * <pre> -W &lt;window size&gt;
 *  Maximum number of training instances maintained.
 *  Training instances are dropped FIFO. (Default = no window)</pre>
 * 
 * <pre> -X
 *  Select the number of nearest neighbours between 1
 *  and the k value specified using hold-one-out evaluation
 *  on the training data (use when k &gt; 1)</pre>
 * 
 * <pre> -A
 *  The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch).
 * </pre>
 * 
 <!-- options-end -->
 *
 * @param options the list of options as an array of strings
 * @throws Exception if an option is not supported
 */
public void setOptions(String[] options) throws Exception {

    String knnString = Utils.getOption('K', options);
    if (knnString.length() != 0) {
        setKNN(Integer.parseInt(knnString));
    } else {
        setKNN(1);
    }
    String windowString = Utils.getOption('W', options);
    if (windowString.length() != 0) {
        setWindowSize(Integer.parseInt(windowString));
    } else {
        setWindowSize(0);
    }
    if (Utils.getFlag('I', options)) {
        setDistanceWeighting(new SelectedTag(WEIGHT_INVERSE, TAGS_WEIGHTING));
    } else if (Utils.getFlag('F', options)) {
        setDistanceWeighting(new SelectedTag(WEIGHT_SIMILARITY, TAGS_WEIGHTING));
    } else {
        setDistanceWeighting(new SelectedTag(WEIGHT_NONE, TAGS_WEIGHTING));
    }
    setCrossValidate(Utils.getFlag('X', options));
    setMeanSquared(Utils.getFlag('E', options));

    String nnSearchClass = Utils.getOption('A', options);
    if (nnSearchClass.length() != 0) {
        String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
        if (nnSearchClassSpec.length == 0) {
            throw new Exception("Invalid NearestNeighbourSearch algorithm " + "specification string.");
        }
        String className = nnSearchClassSpec[0];
        nnSearchClassSpec[0] = "";

        setNearestNeighbourSearchAlgorithm((NearestNeighbourSearch) Utils.forName(NearestNeighbourSearch.class,
                className, nnSearchClassSpec));
    } else
        this.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());

    Utils.checkForRemainingOptions(options);
}