Example usage for weka.core Instances deleteAttributeAt

Introduction

In this page you can find the example usage for weka.core Instances deleteAttributeAt.

Prototype



public void deleteAttributeAt(int position)

Source Link

Document

Deletes an attribute at the given position (0 to numAttributes() - 1).

Usage

From source file:org.opentox.jaqpot3.qsar.trainer.FastRbfNnTrainer.java

License:Open Source License

@Override
public Model train(Instances training) throws JaqpotException {
    /*/*ww  w  .  j a v  a 2 s  .c o m*/
     * For this algorithm we need to remove all string and nominal attributes
     * and additionally we will remove the target attribute too.
     */

    Instances cleanedTraining = training;

    Attribute targetAttribute = cleanedTraining.attribute(targetUri.toString());
    if (targetAttribute == null) {
        throw new JaqpotException("The prediction feature you provided was not found in the dataset. "
                + "Prediction Feature provided by the client: " + targetUri.toString());
    } else {
        if (!targetAttribute.isNumeric()) {
            throw new JaqpotException("The prediction feature you provided is not numeric.");
        }
    }
    double[] targetValues = new double[cleanedTraining.numInstances()];
    for (int i = 0; i < cleanedTraining.numInstances(); i++) {
        targetValues[i] = cleanedTraining.instance(i).value(targetAttribute);
    }
    cleanedTraining.deleteAttributeAt(targetAttribute.index());

    Instances rbfNnNodes = new Instances(cleanedTraining);
    rbfNnNodes.delete();
    double[] potential = calculatePotential(cleanedTraining);

    int L = 1;
    int i_star = locationOfMax(potential);
    double potential_star = potential[i_star];
    double potential_star_1 = potential_star;
    do {
        rbfNnNodes.add(cleanedTraining.instance(i_star));
        potential = updatePotential(potential, i_star, cleanedTraining);
        i_star = locationOfMax(potential);
        double diff = potential[i_star] - e * potential_star_1;
        if (Double.isNaN(diff)) {
            throw new JaqpotException("Not converging");
        }
        if (potential[i_star] <= e * potential_star_1) {
            break;
        } else {
            L = L + 1;
            potential_star = potential[i_star];
        }
    } while (true);

    /* P-nearest neighbors */
    double[] pNn = null;
    double[] sigma = new double[rbfNnNodes.numInstances()];
    double s = 0;
    for (int i = 0; i < rbfNnNodes.numInstances(); i++) {
        pNn = new double[cleanedTraining.numInstances()];
        s = 0;
        for (int j = 0; j < cleanedTraining.numInstances(); j++) {
            if (j != i) {
                pNn[j] = squaredNormDifference(rbfNnNodes.instance(i), cleanedTraining.instance(j));
            } else {
                pNn[j] = 0;
            }
        }
        int[] minPoints = locationOfpMinimum(p, pNn); // indices refer to 'cleanedTraining'
        for (int q : minPoints) {
            s += squaredNormDifference(rbfNnNodes.instance(i), cleanedTraining.instance(q));
        }
        sigma[i] = Math.sqrt(s / p);
    }

    /* Caclulate the matrix X = (l_{i,j})_{i,j} */
    double[][] X = new double[cleanedTraining.numInstances()][rbfNnNodes.numInstances()];
    for (int i = 0; i < cleanedTraining.numInstances(); i++) {

        //for DoA
        for (int j = 0; j < rbfNnNodes.numInstances(); j++) {
            X[i][j] = rbf(sigma[j], cleanedTraining.instance(i), rbfNnNodes.instance(j));
        }
    }

    Jama.Matrix X_matr = new Matrix(X);
    Jama.Matrix Y_matr = new Matrix(targetValues, targetValues.length);
    Jama.Matrix coeffs = (X_matr.transpose().times(X_matr)).inverse().times(X_matr.transpose()).times(Y_matr);

    FastRbfNnModel actualModel = new FastRbfNnModel();
    actualModel.setAlpha(a);
    actualModel.setBeta(b);
    actualModel.setEpsilon(e);
    actualModel.setNodes(rbfNnNodes);
    actualModel.setSigma(sigma);
    actualModel.setLrCoefficients(coeffs.getColumnPackedCopy());

    Model m = new Model(Configuration.getBaseUri().augment("model", getUuid().toString()));
    m.setAlgorithm(getAlgorithm());
    m.setCreatedBy(getTask().getCreatedBy());
    m.setDataset(datasetUri);
    m.addDependentFeatures(dependentFeature);

    Feature predictedFeature = publishFeature(m, dependentFeature.getUnits(),
            "Created as prediction feature for the RBF NN model " + m.getUri(), datasetUri, featureService);
    m.addPredictedFeatures(predictedFeature);

    m.setIndependentFeatures(independentFeatures);
    try {
        m.setActualModel(new ActualModel(actualModel));
    } catch (NotSerializableException ex) {
        logger.error("The provided instance of model cannot be serialized! Critical Error!", ex);
    }
    m.setParameters(new HashSet<Parameter>());
    Parameter<Double> aParam = new Parameter("a", new LiteralValue<Double>(a))
            .setScope(Parameter.ParameterScope.OPTIONAL);
    aParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
    Parameter<Double> bParam = new Parameter("b", new LiteralValue<Double>(b))
            .setScope(Parameter.ParameterScope.OPTIONAL);
    bParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));
    Parameter<Double> eParam = new Parameter("e", new LiteralValue<Double>(e))
            .setScope(Parameter.ParameterScope.OPTIONAL);
    eParam.setUri(Services.anonymous().augment("parameter", RANDOM.nextLong()));

    m.getParameters().add(aParam);
    m.getParameters().add(bParam);
    m.getParameters().add(eParam);

    //save the instances being predicted to abstract trainer and set the features to be excluded for calculating DoA
    predictedInstances = training;
    excludeAttributesDoA.add(dependentFeature.getUri().toString());

    return m;
}

From source file:org.uclab.mm.kcl.ddkat.datapreprocessor.MissingValueHandler.java

License:Apache License

/**
 * Method to replace the identified missing values.
 *
 * @throws Exception the exception//from   ww w.j ava2s .  com
 */
public void replaceMissingValues() throws Exception {

    this.confirmationMessage = new ArrayList<String>();

    Instances outputData;

    String inputFile = BASE_DIR + "OriginalDataSet.csv";

    // load CSV file
    CSVLoader fileLoader = new CSVLoader();
    fileLoader.setSource(new File(inputFile));
    outputData = fileLoader.getDataSet();

    int numInstances = outputData.numInstances();
    int numAttributes = outputData.numAttributes();

    final int NON_NUMERIC = -1;
    int[] m_AttributeIndices = null;

    Range m_Attributes = new Range("first-last");

    // attributes must be numeric
    m_Attributes.setUpper(outputData.numAttributes() - 1);
    m_AttributeIndices = m_Attributes.getSelection();

    for (int i = 0; i < m_AttributeIndices.length; i++) {
        // ignore class
        if (m_AttributeIndices[i] == outputData.classIndex()) {
            m_AttributeIndices[i] = NON_NUMERIC;
            continue;
        }
        // not numeric -> ignore it
        if (!outputData.attribute(m_AttributeIndices[i]).isNumeric())
            m_AttributeIndices[i] = NON_NUMERIC;
    }

    double sum;
    int missingCounter;
    double attributeMean;

    // identify the missing values               
    for (int attributeIndex = 0; attributeIndex < numAttributes; attributeIndex++) {

        // non-numeric attribute?
        if (m_AttributeIndices[attributeIndex] == NON_NUMERIC) {
            continue;
        }

        double tempArr[] = outputData.attributeToDoubleArray(attributeIndex);
        sum = 0;
        missingCounter = 0;
        for (int i = 0; i < tempArr.length; i++) {
            sum = sum + tempArr[i];
            if (tempArr[i] == 0)
                missingCounter++;
        }

        attributeMean = sum / (numInstances - missingCounter);

        for (int instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) {

            // replace the missing values with attribute mean values
            if (outputData.instance(instanceIndex).value(attributeIndex) == 0) {
                outputData.instance(instanceIndex).setValue(attributeIndex, attributeMean);
            }
        }
    }

    outputData.deleteAttributeAt(outputData.numAttributes() - 1);
    outputData.deleteAttributeAt(outputData.numAttributes() - 1);

    saveFilledData(inputFile, outputData);

}

From source file:sirius.trainer.features.subsetselection.GreedyForwardSubsetSearch.java

License:Open Source License

@Override
public List<Feature> selectSubset(List<Feature> wholeList, String outputFileLocation, ApplicationData appData)
        throws Exception {
    //Based on the wholeList, starts with a empty list and add one Feature at a time for the wholeList
    //If the MCC value is increased then keep the Feature else remove it   
    //Train on Dataset1 and Test on Dataset3 in appData         
    String classifierName = "weka.classifiers.meta.AttributeSelectedClassifier";
    String[] classifierOptions = new String[13];
    BufferedWriter output = new BufferedWriter(new FileWriter(outputFileLocation));
    List<Feature> selectedList = new ArrayList<Feature>();
    HashSet<Integer> selectedIndex = new HashSet<Integer>();
    double lastMCC = 0.0;
    FeatureTableModel currentFeatureTableModel = new FeatureTableModel(false);
    for (int x = 0; x < wholeList.size(); x++) {
        currentFeatureTableModel.add(wholeList.get(x));
    }//from  www .  j  a  v  a  2  s . com
    appData.setStep2FeatureTableModel(currentFeatureTableModel);
    new GenerateFeatures(null, appData, null, null, null, null);
    //Wait till GenerateFeatures is finished
    while (appData.getOneThread() != null) {
        try {
            Thread.sleep(1000);
        } catch (Exception e) {
        }
    }

    for (int x = 0; x < wholeList.size(); x++) {
        Instances currentInstances = new Instances(new BufferedReader(
                new FileReader(appData.getWorkingDirectory() + File.separator + "Dataset1.arff")));
        for (int y = wholeList.size() - 1; y >= 0; y--) {
            if (selectedIndex.contains(y) == false && y != x)
                currentInstances.deleteAttributeAt(y);
        }
        appData.setDataset1Instances(currentInstances);
        classifierOptions[0] = "-E";
        classifierOptions[1] = "weka.attributeSelection.GainRatioAttributeEval";
        classifierOptions[2] = "-S";
        classifierOptions[3] = "weka.attributeSelection.Ranker -T 0.0 -N -1";
        classifierOptions[4] = "-W";
        classifierOptions[5] = "weka.classifiers.trees.RandomForest";
        classifierOptions[6] = "--";
        classifierOptions[7] = "-I";
        classifierOptions[8] = "1000";//10, 100, 1000
        classifierOptions[9] = "-K";
        classifierOptions[10] = "0";
        classifierOptions[11] = "-S";
        classifierOptions[12] = "1";
        double MCC1000 = ((PredictionStats) RunClassifierWithNoLocationIndex
                .startClassifierOneWithNoLocationIndex(null, appData, null, null, true, null, 0, 0.5,
                        classifierName, classifierOptions, false, null, new Random().nextInt())).getMaxMCC();
        while (appData.getOneThread() != null) {
            try {
                Thread.sleep(1000);
            } catch (Exception e) {
            }
        }
        output.write("" + MCC1000);
        output.newLine();
        output.flush();
        if (MCC1000 > lastMCC) {
            selectedList.add(wholeList.get(x));
            selectedIndex.add(x);
            lastMCC = MCC1000;
        }
    }
    output.close();
    System.out.println(lastMCC);
    return selectedList;
}

From source file:tr.gov.ulakbim.jDenetX.streams.generators.multilabel.MetaMultilabelGenerator.java

License:Open Source License

/**
 * GenerateMultilabelHeader.//from  w w  w.ja  v a  2s .c om
 */
protected MultilabelInstancesHeader generateMultilabelHeader(Instances si) {
    Instances mi = new Instances(si, 0, 0);
    mi.setClassIndex(-1);
    mi.deleteAttributeAt(mi.numAttributes() - 1);
    FastVector bfv = new FastVector();
    bfv.addElement("0");
    bfv.addElement("1");
    for (int i = 0; i < this.m_N; i++) {
        mi.insertAttributeAt(new Attribute("class" + i, bfv), i);
    }
    this.multilabelStreamTemplate = mi;
    this.multilabelStreamTemplate.setRelationName("SYN_Z" + this.labelCardinalityOption.getValue() + "L"
            + this.m_N + "X" + m_A + "S" + metaRandomSeedOption.getValue() + ": -C " + this.m_N);
    this.multilabelStreamTemplate.setClassIndex(this.m_N);
    return new MultilabelInstancesHeader(multilabelStreamTemplate, m_N);
}

From source file:tubes1.myClassifiers.myC45.java

public TreeNode C45Node(Instances i, double parentGain) {
    TreeNode treeNode = new TreeNode();

    int[] count = calculateCount(i);
    for (int j = 0; j < count.length; j++) {
        int c = count[j];
        if (c == i.numInstances()) {
            treeNode.label = j;/*from   ww w .j  a  v a2 s  .  co  m*/
            return treeNode;
        }
    }

    if (i.numAttributes() <= 1) {
        int maxc = -1;
        int maxcj = -1;
        for (int j = 0; j < count.length; j++) {
            if (count[j] > maxc) {
                maxc = count[j];
                maxcj = j;
            }
        }
        treeNode.label = maxcj;
        return treeNode;
    }

    Attribute bestA = null;
    double bestAIG = -1;
    double entropyOfSet = entropy(i);
    for (int j = 0; j < i.numAttributes(); j++) {
        Attribute a = i.attribute(j);
        if (a != i.classAttribute()) {
            double aIG = infoGain(i, a, entropyOfSet);
            if (aIG > bestAIG) {
                bestAIG = aIG;
                bestA = a;
            }
        }
    }
    double childGainRatio = gainRatio(bestAIG, entropyOfSet);
    treeNode.decision = bestA;
    if (childGainRatio > parentGain) {
        Instances[] subSets = splitData(i, bestA);
        for (Instances subSet : subSets) {
            if (subSet.numInstances() > 0) {
                double attributeValue = subSet.firstInstance().value(bestA);
                subSet.deleteAttributeAt(bestA.index());
                TreeNode newBranch = C45Node(subSet, childGainRatio);
                newBranch.branchValue = attributeValue;
                treeNode.addBranch(newBranch);
            }
        }
    } else {
        TreeNode newBranch = new TreeNode();
        newBranch.label = vote(i, bestA);
        newBranch.branchValue = treeNode.branchValue;
        treeNode.addBranch(newBranch);
    }
    return treeNode;
}

From source file:tubes1.myClassifiers.myID3.java

public TreeNode id3Node(Instances i) {
    TreeNode treeNode = new TreeNode();

    int[] count = calculateCount(i);
    for (int j = 0; j < count.length; j++) {
        int c = count[j];
        if (c == i.numInstances()) {
            treeNode.label = j;/*  ww  w.ja  va  2 s .c o m*/
            return treeNode;
        }
    }

    if (i.numAttributes() <= 1) {
        int maxc = -1;
        int maxcj = -1;
        for (int j = 0; j < count.length; j++) {
            if (count[j] > maxc) {
                maxc = count[j];
                maxcj = j;
            }
        }
        treeNode.label = maxcj;
        return treeNode;
    }

    Attribute bestA = null;
    double bestAIG = -1;
    double entropyOfSet = entropy(i);
    for (int j = 0; j < i.numAttributes(); j++) {
        Attribute a = i.attribute(j);
        if (a != i.classAttribute()) {
            double aIG = infoGain(i, a, entropyOfSet);
            if (aIG > bestAIG) {
                bestAIG = aIG;
                bestA = a;
            }
        }
    }
    treeNode.decision = bestA;
    Instances[] subSets = splitData(i, bestA);
    for (Instances subSet : subSets) {
        if (subSet.numInstances() > 0) {
            double attributeValue = subSet.firstInstance().value(bestA);
            subSet.deleteAttributeAt(bestA.index());
            TreeNode newBranch = id3Node(subSet);
            newBranch.branchValue = attributeValue;
            treeNode.addBranch(newBranch);
        } else {
        }
    }
    return treeNode;
}