Example usage for weka.core Instances classIndex

Introduction

In this page you can find the example usage for weka.core Instances classIndex.

Prototype


publicint classIndex()

Source Link

Document

Returns the class attribute's index.

Usage

From source file:Classifier.supervised.LinearRegression.java

License:Open Source License

/**
 * Builds a regression model for the given data.
 *
 * @param data the training data to be used for generating the
 * linear regression function// w w w . ja v  a  2 s .c o  m
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {
    m_ModelBuilt = false;

    if (!m_checksTurnedOff) {
        // can classifier handle the data?
        getCapabilities().testWithFail(data);

        // remove instances with missing class
        data = new Instances(data);
        data.deleteWithMissingClass();
    }

    // Preprocess instances
    if (!m_checksTurnedOff) {
        m_TransformFilter = new NominalToBinary();
        m_TransformFilter.setInputFormat(data);
        data = Filter.useFilter(data, m_TransformFilter);
        m_MissingFilter = new ReplaceMissingValues();
        m_MissingFilter.setInputFormat(data);
        data = Filter.useFilter(data, m_MissingFilter);
        data.deleteWithMissingClass();
    } else {
        m_TransformFilter = null;
        m_MissingFilter = null;
    }

    m_ClassIndex = data.classIndex();
    m_TransformedData = data;

    // Turn all attributes on for a start
    m_SelectedAttributes = new boolean[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != m_ClassIndex) {
            m_SelectedAttributes[i] = true;
        }
    }
    m_Coefficients = null;

    // Compute means and standard deviations
    m_Means = new double[data.numAttributes()];
    m_StdDevs = new double[data.numAttributes()];
    for (int j = 0; j < data.numAttributes(); j++) {
        if (j != data.classIndex()) {
            m_Means[j] = data.meanOrMode(j);
            m_StdDevs[j] = Math.sqrt(data.variance(j));
            if (m_StdDevs[j] == 0) {
                m_SelectedAttributes[j] = false;
            }
        }
    }

    m_ClassStdDev = Math.sqrt(data.variance(m_TransformedData.classIndex()));
    m_ClassMean = data.meanOrMode(m_TransformedData.classIndex());

    // Perform the regression
    findBestModel();

    // Save memory
    if (m_Minimal) {
        m_TransformedData = null;
        m_Means = null;
        m_StdDevs = null;
    } else {
        m_TransformedData = new Instances(data, 0);
    }

    m_ModelBuilt = true;
}

From source file:cn.edu.xjtu.dbmine.TextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set.//from w w  w . j a  va  2s . co  m
 *
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    FastVector classes = new FastVector();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.addElement(enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.elementAt(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {
            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]);

                double[] newInst = null;
                if (m_OutputFilename)
                    newInst = new double[3];
                else
                    newInst = new double[2];
                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                BufferedInputStream is;
                is = new BufferedInputStream(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                /*while ((c = is.read()) != -1) {
                  txtStr.append((char) c);
                }*/
                FileReader fr = new FileReader(txt);
                BufferedReader br = new BufferedReader(fr);
                String line;
                while ((line = br.readLine()) != null) {
                    txtStr.append(line + "\n");
                }

                newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
                if (m_OutputFilename)
                    newInst[1] = (double) data.attribute(1)
                            .addStringValue(subdirPath + File.separator + files[j]);
                newInst[data.classIndex()] = (double) k;
                data.add(new Instance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath
                        + File.separator + files[j]);
            }
        }
    }

    return data;
}

From source file:cn.ict.zyq.bestConf.COMT2.COMT2.java

License:Open Source License

private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception {
    double retval = 0., y;
    Enumeration<Instance> enu = omega.enumerateInstances();
    int idxClass = omega.classIndex();
    Instance ins;/*  w ww .jav  a  2s  . c o m*/
    while (enu.hasMoreElements()) {
        ins = enu.nextElement();
        y = ins.value(idxClass);
        retval += Math.pow(y - model.classifyInstance(ins), 2) - Math.pow(y - modelPi.classifyInstance(ins), 2);
    }
    return retval;
}

From source file:com.edwardraff.WekaMNIST.java

License:Open Source License

public static void main(String[] args) throws IOException, Exception {
    String folder = args[0];/*from w  w w.j  a v a  2  s.c  o  m*/
    String trainPath = folder + "MNISTtrain.arff";
    String testPath = folder + "MNISTtest.arff";

    System.out.println("Weka Timings");
    Instances mnistTrainWeka = new Instances(new BufferedReader(new FileReader(new File(trainPath))));
    mnistTrainWeka.setClassIndex(mnistTrainWeka.numAttributes() - 1);
    Instances mnistTestWeka = new Instances(new BufferedReader(new FileReader(new File(testPath))));
    mnistTestWeka.setClassIndex(mnistTestWeka.numAttributes() - 1);

    //normalize range like into [0, 1]
    Normalize normalizeFilter = new Normalize();
    normalizeFilter.setInputFormat(mnistTrainWeka);

    mnistTestWeka = Normalize.useFilter(mnistTestWeka, normalizeFilter);
    mnistTrainWeka = Normalize.useFilter(mnistTrainWeka, normalizeFilter);

    long start, end;

    System.out.println("RBF SVM (Full Cache)");
    SMO smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 0/*0 causes Weka to cache the whole matrix...*/, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("RBF SVM (No Cache)");
    smo = new SMO();
    smo.setKernel(new RBFKernel(mnistTrainWeka, 1, 0.015625));
    smo.setC(8.0);
    smo.setBuildLogisticModels(false);
    evalModel(smo, mnistTrainWeka, mnistTestWeka);

    System.out.println("Decision Tree C45");
    J48 wekaC45 = new J48();
    wekaC45.setUseLaplace(false);
    wekaC45.setCollapseTree(false);
    wekaC45.setUnpruned(true);
    wekaC45.setMinNumObj(2);
    wekaC45.setUseMDLcorrection(true);

    evalModel(wekaC45, mnistTrainWeka, mnistTestWeka);

    System.out.println("Random Forest 50 trees");
    int featuresToUse = (int) Math.sqrt(28 * 28);//Weka uses different defaults, so lets make sure they both use the published way

    RandomForest wekaRF = new RandomForest();
    wekaRF.setNumExecutionSlots(1);
    wekaRF.setMaxDepth(0/*0 for unlimited*/);
    wekaRF.setNumFeatures(featuresToUse);
    wekaRF.setNumTrees(50);

    evalModel(wekaRF, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (brute)");
    IBk wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Ball Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new BallTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("1-NN (Cover Tree)");
    wekaNN = new IBk(1);
    wekaNN.setNearestNeighbourSearchAlgorithm(new CoverTree());
    wekaNN.setCrossValidate(false);

    evalModel(wekaNN, mnistTrainWeka, mnistTestWeka);

    System.out.println("Logistic Regression LBFGS lambda = 1e-4");
    Logistic logisticLBFGS = new Logistic();
    logisticLBFGS.setRidge(1e-4);
    logisticLBFGS.setMaxIts(500);

    evalModel(logisticLBFGS, mnistTrainWeka, mnistTestWeka);

    System.out.println("k-means (Loyd)");
    int origClassIndex = mnistTrainWeka.classIndex();
    mnistTrainWeka.setClassIndex(-1);
    mnistTrainWeka.deleteAttributeAt(origClassIndex);
    {
        long totalTime = 0;
        for (int i = 0; i < 10; i++) {
            SimpleKMeans wekaKMeans = new SimpleKMeans();
            wekaKMeans.setNumClusters(10);
            wekaKMeans.setNumExecutionSlots(1);
            wekaKMeans.setFastDistanceCalc(true);

            start = System.currentTimeMillis();
            wekaKMeans.buildClusterer(mnistTrainWeka);
            end = System.currentTimeMillis();
            totalTime += (end - start);
        }
        System.out.println("\tClustering took: " + (totalTime / 10.0) / 1000.0 + " on average");
    }
}

From source file:com.entopix.maui.filters.MauiFilter.java

License:Open Source License

/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input instance
 * structure (any instances contained in the object are ignored - only the
 * structure is required)./*from  w w  w. j ava2 s  . c o  m*/
 * @return true if the outputFormat may be collected immediately
 */
public boolean setInputFormat(Instances instanceInfo) throws MauiFilterException {

    if (instanceInfo.classIndex() >= 0) {
        throw new MauiFilterException("Don't know what do to if class index set!");
    }

    if (!instanceInfo.attribute(keyphrasesAtt).isString() || !instanceInfo.attribute(documentAtt).isString()) {
        throw new MauiFilterException(
                "Keyphrase attribute and document attribute " + "need to be string attributes.");
    }

    try {
        phraseFilter = new MauiPhraseFilter();
        int[] arr = new int[1];
        arr[0] = documentAtt;
        phraseFilter.setAttributeIndicesArray(arr);
        phraseFilter.setInputFormat(instanceInfo);
    } catch (Exception e) {
        throw new MauiFilterException("Exception loading MauiPhraseFilter");
    }

    try {
        if (vocabularyName.equals("none")) {
            numbersFilter = new NumbersFilter();
            numbersFilter.setInputFormat(phraseFilter.getOutputFormat());
            super.setInputFormat(numbersFilter.getOutputFormat());
        } else {
            super.setInputFormat(phraseFilter.getOutputFormat());
        }
    } catch (Exception e) {
        throw new MauiFilterException("Exception loading NumbersFilter");
    }

    return false;

}

From source file:com.mycompany.neuralnetwork.NeuralNetworkClassifier.java

@Override
public void buildClassifier(Instances instances) throws Exception {
    int inputCount = instances.numAttributes() - 1;

    List<Integer> nodesPerLayer = new ArrayList<>();

    for (int i = 0; i < layers - 1; i++) {
        nodesPerLayer.add(inputCount);/* w ww .j  a  v  a  2 s. c  o m*/
    }

    nodesPerLayer.add(instances.numDistinctValues(instances.classIndex()));

    network = new Network(inputCount, nodesPerLayer);

    ArrayList<Double> errorsPerIteration = new ArrayList<>();
    for (int j = 0; j < iterations; j++) {
        double errorsPer = 0;
        for (int k = 0; k < instances.numInstances(); k++) {
            Instance instance = instances.instance(k);

            List<Double> input = new ArrayList<>();
            for (int i = 0; i < instance.numAttributes(); i++) {
                if (Double.isNaN(instance.value(i)) && i != instance.classIndex())
                    input.add(0.0);
                else if (i != instance.classIndex())
                    input.add(instance.value(i));
            }

            errorsPer += network.train(input, instance.value(instance.classIndex()), learningFactor);
        }

        errorsPerIteration.add(errorsPer);

    }

    //Display Errors This is used to collect the data for the graph 
    //for (Double d : errorsPerIteration) 
    //{
    //  System.out.println(d);
    //}
}

From source file:com.openkm.kea.filter.KEAFilter.java

License:Open Source License

/**
 * Sets the format of the input instances.
 *
 * @param instanceInfo an Instances object containing the input
 * instance structure (any instances contained in the object are
 * ignored - only the structure is required).
 * @return true if the outputFormat may be collected immediately 
 *///from   ww w  .  ja  va2 s.co  m
public boolean setInputFormat(Instances instanceInfo) throws Exception {

    if (instanceInfo.classIndex() >= 0) {
        throw new Exception("Don't know what do to if class index set!");
    }
    if (!instanceInfo.attribute(m_KeyphrasesAtt).isString()
            || !instanceInfo.attribute(m_DocumentAtt).isString()) {
        throw new Exception("Keyphrase attribute and document attribute " + "need to be string attributes.");
    }
    m_PunctFilter = new KEAPhraseFilter();
    int[] arr = new int[1];
    arr[0] = m_DocumentAtt;
    m_PunctFilter.setAttributeIndicesArray(arr);
    m_PunctFilter.setInputFormat(instanceInfo);
    m_PunctFilter.setDisallowInternalPeriods(getDisallowInternalPeriods());

    if (m_vocabulary.equals("none")) {
        m_NumbersFilter = new NumbersFilter();
        m_NumbersFilter.setInputFormat(m_PunctFilter.getOutputFormat());
        super.setInputFormat(m_NumbersFilter.getOutputFormat());
    } else {
        super.setInputFormat(m_PunctFilter.getOutputFormat());
    }

    return false;

}

From source file:com.rapidminer.tools.WekaTools.java

License:Open Source License

/**
 * Creates a RapidMiner example set from Weka instances. Only a label can be used
 * as special attributes, other types of special attributes are not
 * supported. If <code>attributeNamePrefix</code> is not null, the given
 * string prefix plus a number is used as attribute names.
 *//* w w w.  j  a  va  2  s.  c  om*/
public static ExampleSet toRapidMinerExampleSet(Instances instances, String attributeNamePrefix,
        int datamanagement) {
    int classIndex = instances.classIndex();

    // create example table

    // 1. Extract attributes
    List<Attribute> attributes = new ArrayList<Attribute>();
    int number = 1; // use for attribute names
    for (int i = 0; i < instances.numAttributes(); i++) {
        weka.core.Attribute wekaAttribute = instances.attribute(i);
        int rapidMinerAttributeValueType = Ontology.REAL;
        if (wekaAttribute.isNominal())
            rapidMinerAttributeValueType = Ontology.NOMINAL;
        else if (wekaAttribute.isString())
            rapidMinerAttributeValueType = Ontology.STRING;
        Attribute attribute = AttributeFactory.createAttribute(wekaAttribute.name(),
                rapidMinerAttributeValueType);
        if ((i != classIndex) && (attributeNamePrefix != null) && (attributeNamePrefix.length() > 0)) {
            attribute.setName(attributeNamePrefix + "_" + (number++));
        }
        if (wekaAttribute.isNominal()) {
            for (int a = 0; a < wekaAttribute.numValues(); a++) {
                String nominalValue = wekaAttribute.value(a);
                attribute.getMapping().mapString(nominalValue);
            }
        }
        attributes.add(attribute);
    }

    Attribute label = null;
    if (classIndex >= 0) {
        label = attributes.get(classIndex);
        label.setName("label");
    }

    // 2. Guarantee alphabetical mapping to numbers
    for (int j = 0; j < attributes.size(); j++) {
        Attribute attribute = attributes.get(j);
        if (attribute.isNominal())
            attribute.getMapping().sortMappings();
    }

    // 3. Read data
    MemoryExampleTable table = new MemoryExampleTable(attributes);
    DataRowFactory factory = new DataRowFactory(datamanagement, '.');
    // create data
    List<DataRow> dataList = new LinkedList<DataRow>();
    int numberOfRapidMinerAttributes = instances.numAttributes();
    for (int i = 0; i < instances.numInstances(); i++) {
        Instance instance = instances.instance(i);
        DataRow dataRow = factory.create(numberOfRapidMinerAttributes);
        for (int a = 0; a < instances.numAttributes(); a++) {
            Attribute attribute = table.getAttribute(a);
            double wekaValue = instance.value(a);
            if (attribute.isNominal()) {
                String nominalValue = instances.attribute(a).value((int) wekaValue);
                dataRow.set(attribute, attribute.getMapping().mapString(nominalValue));
            } else {
                dataRow.set(attribute, wekaValue);
            }
        }
        dataRow.trim();
        dataList.add(dataRow);
    }

    // handle label extra
    table.readExamples(new ListDataRowReader(dataList.iterator()));

    // create and return example set
    return table.createExampleSet(label);
}

From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java

License:Open Source License

/**
 * builds the classifier//from  ww  w.j  a v a 2 s .  com
 * 
 * @param data the training instances
 * @throws Exception if something goes wrong
 */
@Override
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // save original header (needed for clusters to classes output)
    m_OriginalHeader = data.stringFreeStructure();

    // remove class attribute for clusterer
    Instances clusterData = new Instances(data);
    clusterData.setClassIndex(-1);
    clusterData.deleteAttributeAt(data.classIndex());
    m_ClusteringHeader = clusterData.stringFreeStructure();

    if (m_ClusteringHeader.numAttributes() == 0) {
        System.err.println("Data contains only class attribute, defaulting to ZeroR model.");
        m_ZeroR = new ZeroR();
        m_ZeroR.buildClassifier(data);
    } else {
        m_ZeroR = null;

        // build clusterer
        m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer);
        m_ActualClusterer.buildClusterer(clusterData);

        if (!getLabelAllClusters()) {

            // determine classes-to-clusters mapping
            ClusterEvaluation eval = new ClusterEvaluation();
            eval.setClusterer(m_ActualClusterer);
            eval.evaluateClusterer(clusterData);
            double[] clusterAssignments = eval.getClusterAssignments();
            int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()];
            int[] clusterTotals = new int[eval.getNumClusters()];
            double[] best = new double[eval.getNumClusters() + 1];
            double[] current = new double[eval.getNumClusters() + 1];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance instance = data.instance(i);
                if (!instance.classIsMissing()) {
                    counts[(int) clusterAssignments[i]][(int) instance.classValue()]++;
                    clusterTotals[(int) clusterAssignments[i]]++;
                }
            }
            best[eval.getNumClusters()] = Double.MAX_VALUE;
            ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0);
            m_ClustersToClasses = new double[best.length];
            System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length);
        } else {
            m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance clusterInstance = clusterData.instance(i);
                Instance originalInstance = data.instance(i);
                if (!originalInstance.classIsMissing()) {
                    double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance);
                    for (int j = 0; j < probs.length; j++) {
                        m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j];
                    }
                }
            }
            for (int i = 0; i < m_ClusterClassProbs.length; i++) {
                Utils.normalize(m_ClusterClassProbs[i]);
            }
        }
    }
}

From source file:com.tum.classifiertest.DataCache.java

License:Open Source License

/**
 * Creates a DataCache by copying data from a weka.core.Instances object.
 *//*  w  w w.  ja v  a 2 s  .c  om*/
public DataCache(Instances origData) throws Exception {

    classIndex = origData.classIndex();
    numAttributes = origData.numAttributes();
    numClasses = origData.numClasses();
    numInstances = origData.numInstances();

    attNumVals = new int[origData.numAttributes()];
    for (int i = 0; i < attNumVals.length; i++) {
        if (origData.attribute(i).isNumeric()) {
            attNumVals[i] = 0;
        } else if (origData.attribute(i).isNominal()) {
            attNumVals[i] = origData.attribute(i).numValues();
        } else
            throw new Exception("Only numeric and nominal attributes are supported.");
    }

    /* Array is indexed by attribute first, to speed access in RF splitting. */
    vals = new float[numAttributes][numInstances];
    for (int a = 0; a < numAttributes; a++) {
        for (int i = 0; i < numInstances; i++) {
            if (origData.instance(i).isMissing(a))
                vals[a][i] = Float.MAX_VALUE; // to make sure missing values go to the end
            else
                vals[a][i] = (float) origData.instance(i).value(a); // deep copy
        }
    }

    instWeights = new double[numInstances];
    instClassValues = new int[numInstances];
    for (int i = 0; i < numInstances; i++) {
        instWeights[i] = origData.instance(i).weight();
        instClassValues[i] = (int) origData.instance(i).classValue();
    }

    /* compute the sortedInstances for the whole dataset */

    sortedIndices = new int[numAttributes][];

    for (int a = 0; a < numAttributes; a++) { // ================= attr by attr

        if (a == classIndex)
            continue;

        if (attNumVals[a] > 0) { // ------------------------------------- nominal

            // Handling nominal attributes: as of FastRF 0.99, they're sorted as well
            // missing values are coded as Float.MAX_VALUE and go to the end

            sortedIndices[a] = new int[numInstances];
            //int count = 0;

            sortedIndices[a] = FastRfUtils.sort(vals[a]);

            /*for (int i = 0; i < numInstances; i++) {
              if ( !this.isValueMissing(a, i) ) {
                sortedIndices[a][count] = i;
                count++;
              }
            }
                    
            for (int i = 0; i < numInstances; i++) {
              if ( this.isValueMissing(a, i) ) {
                sortedIndices[a][count] = i;
                count++;
              }
            }*/

        } else { // ----------------------------------------------------- numeric

            // Sorted indices are computed for numeric attributes
            // missing values are coded as Float.MAX_VALUE and go to the end
            sortedIndices[a] = FastRfUtils.sort(vals[a]);

        } // ---------------------------------------------------------- attr kind

    } // ========================================================= attr by attr

    // System.out.println(" Done.");

}