Example usage for weka.core Instance numAttributes

List of usage examples for weka.core Instance numAttributes

Introduction

In this page you can find the example usage for weka.core Instance numAttributes.

Prototype

public int numAttributes();

Source Link

Document

Returns the number of attributes.

Usage

From source file:com.mycompany.tubesann.MyANN.java

public double classifyInstance(Instance instance) throws Exception {
    double result = 0;
    for (int i = 0; i < instance.numAttributes() - 1; i++) {
        startNode[i].setInput(instance.value(i));
    }//from   www  .j  a  va 2 s. com
    List<Double> output = new ArrayList<Double>();
    for (int i = 0; i < finalNode.length; i++) {
        output.add(finalNode[i].calculate());
        //  System.out.println("Output "+i+" "+output.get(i));
    }
    if (rule == 1) {
        boolean found = false;
        int i = 0;
        while (!found && i < output.size()) {
            if (output.get(i) == 1) {
                result = (double) i;
                found = true;
            }
            i++;
        }
    } else {
        int imax = 0;
        //System.out.println("output i= "+0+" output= "+output.get(0));
        for (int i = 1; i < output.size(); i++) {
            //System.out.println("output i= "+i+" output= "+output.get(i));
            if (output.get(i) > output.get(imax)) {
                imax = i;
            }
        }
        result = (double) imax;
        //double max = Collections.max(output);
        //result = (double) output.indexOf(max);
    }
    return result;
}

From source file:com.openkm.kea.filter.KEAFilter.java

License:Open Source License

/**
 * Converts an instance.//from  www . j  a  va  2 s  .  c  o m
 */
private FastVector convertInstance(Instance instance, boolean training) throws Exception {

    FastVector vector = new FastVector();

    if (m_Debug) {
        log.info("-- Converting instance");
    }

    // Get the key phrases for the document
    HashMap<String, Counter> hashKeyphrases = null;
    HashMap<String, Counter> hashKeysEval = null;
    if (!instance.isMissing(m_KeyphrasesAtt)) {
        String keyphrases = instance.stringValue(m_KeyphrasesAtt);
        hashKeyphrases = getGivenKeyphrases(keyphrases, false);
        hashKeysEval = getGivenKeyphrases(keyphrases, true);
    }

    // Get the phrases for the document
    HashMap<String, FastVector> hash = new HashMap<String, FastVector>();
    int length = getPhrases(hash, instance.stringValue(m_DocumentAtt));
    //   hash = getComposits(hash);

    /* Experimental:
     To compute how many of the manual keyphrases appear in the documents:
            
    log.info("Doc phrases found " + hash.size());
    log.info("Manual keyphrases: ");
    Iterator iter = hashKeyphrases.keySet().iterator();
    int count = 0;
    while (iter.hasNext()) {
       String id = (String)iter.next();
       if (hash.containsKey(id)) {
    count++;
       }
    }
            
    double max_recall = (double)count/(double)hashKeyphrases.size();
            
            
    m_max_recall += max_recall;
    doc++;
    double avg_m_max_recall = m_max_recall/(double)doc;
            
    String file = instance.stringValue(2);
    log.info(count + " out of " + hashKeyphrases.size() + " are in the document ");
    log.info("Max recall : " + avg_m_max_recall + " on " + doc + " documents ");
    */

    // Compute number of extra attributes
    int numFeatures = 5;
    if (m_Debug) {
        if (m_KFused) {
            numFeatures = numFeatures + 1;
        }
    }
    if (m_STDEVfeature) {
        numFeatures = numFeatures + 1;
    }
    if (m_NODEfeature) {
        numFeatures = numFeatures + 1;
    }
    if (m_LENGTHfeature) {
        numFeatures = numFeatures + 1;
    }

    // Set indices of key attributes
    //int phraseAttIndex = m_DocumentAtt;
    int tfidfAttIndex = m_DocumentAtt + 2;
    int distAttIndex = m_DocumentAtt + 3;
    int probsAttIndex = m_DocumentAtt + numFeatures - 1;
    //int classAttIndex = numFeatures;

    // Go through the phrases and convert them into instances
    Iterator<String> it = hash.keySet().iterator();
    while (it.hasNext()) {
        String id = it.next();
        FastVector phraseInfo = (FastVector) hash.get(id);

        double[] vals = featVals(id, phraseInfo, training, hashKeysEval, hashKeyphrases, length, hash);

        Instance inst = new Instance(instance.weight(), vals);

        inst.setDataset(m_ClassifierData);

        // Get probability of a phrase being key phrase
        double[] probs = m_Classifier.distributionForInstance(inst);

        // If simple Naive Bayes used, change here to
        //double prob = probs[1];
        double prob = probs[0];

        // Compute attribute values for final instance
        double[] newInst = new double[instance.numAttributes() + numFeatures];
        int pos = 0;
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (i == m_DocumentAtt) {

                // output of values for a given phrase:

                // Add phrase
                int index = outputFormatPeek().attribute(pos).addStringValue(id);
                newInst[pos++] = index;

                // Add original version
                String orig = (String) phraseInfo.elementAt(2);

                if (orig != null) {
                    index = outputFormatPeek().attribute(pos).addStringValue(orig);
                } else {
                    index = outputFormatPeek().attribute(pos).addStringValue(id);
                }
                newInst[pos++] = index;

                // Add TFxIDF
                newInst[pos++] = inst.value(m_TfidfIndex);

                // Add distance
                newInst[pos++] = inst.value(m_FirstOccurIndex);

                // Add other features
                if (m_Debug) {
                    if (m_KFused) {
                        newInst[pos++] = inst.value(m_KeyFreqIndex);
                    }
                }
                if (m_STDEVfeature) {
                    newInst[pos++] = inst.value(m_STDEVIndex);
                }
                if (m_NODEfeature) {
                    newInst[pos++] = inst.value(m_NodeIndex);
                }
                if (m_LENGTHfeature) {
                    newInst[pos++] = inst.value(m_LengthIndex);
                }

                // Add probability 
                probsAttIndex = pos;
                newInst[pos++] = prob;

                // Set rank to missing (computed below)
                newInst[pos++] = Instance.missingValue();

            } else if (i == m_KeyphrasesAtt) {
                newInst[pos++] = inst.classValue();
            } else {
                newInst[pos++] = instance.value(i);
            }
        }
        Instance ins = new Instance(instance.weight(), newInst);
        ins.setDataset(outputFormatPeek());
        vector.addElement(ins);
    }

    // Add dummy instances for keyphrases that don't occur
    // in the document
    if (hashKeysEval != null) {
        Iterator<String> phrases = hashKeysEval.keySet().iterator();
        while (phrases.hasNext()) {
            String phrase = phrases.next();
            double[] newInst = new double[instance.numAttributes() + numFeatures];
            int pos = 0;
            for (int i = 0; i < instance.numAttributes(); i++) {
                if (i == m_DocumentAtt) {
                    // log.info("Here: " + phrase);
                    // Add phrase
                    int index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                    newInst[pos++] = (double) index;

                    // Add original version
                    index = outputFormatPeek().attribute(pos).addStringValue(phrase);
                    newInst[pos++] = (double) index;

                    // Add TFxIDF
                    newInst[pos++] = Instance.missingValue();

                    // Add distance
                    newInst[pos++] = Instance.missingValue();

                    // Add other features
                    if (m_Debug) {
                        if (m_KFused) {
                            newInst[pos++] = Instance.missingValue();
                        }
                    }
                    if (m_STDEVfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }
                    if (m_NODEfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }
                    if (m_LENGTHfeature) {
                        newInst[pos++] = Instance.missingValue();
                    }

                    // Add probability and rank
                    newInst[pos++] = -Double.MAX_VALUE;
                    // newInst[pos++] = Instance.missingValue();
                } else if (i == m_KeyphrasesAtt) {
                    newInst[pos++] = 1; // Keyphrase
                } else {
                    newInst[pos++] = instance.value(i);
                }

                Instance inst = new Instance(instance.weight(), newInst);
                inst.setDataset(outputFormatPeek());
                vector.addElement(inst);
            }

        }
    }

    // Sort phrases according to their distance (stable sort)
    double[] vals = new double[vector.size()];
    for (int i = 0; i < vals.length; i++) {
        vals[i] = ((Instance) vector.elementAt(i)).value(distAttIndex);
    }
    FastVector newVector = new FastVector(vector.size());
    int[] sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their tfxidf value (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = -((Instance) vector.elementAt(i)).value(tfidfAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Sort phrases according to their probability (stable sort)
    for (int i = 0; i < vals.length; i++) {
        vals[i] = 1 - ((Instance) vector.elementAt(i)).value(probsAttIndex);
    }
    newVector = new FastVector(vector.size());
    sortedIndices = Utils.stableSort(vals);
    for (int i = 0; i < vals.length; i++) {
        newVector.addElement(vector.elementAt(sortedIndices[i]));
    }
    vector = newVector;

    // Compute rank of phrases. Check for subphrases that are ranked
    // lower than superphrases and assign probability -1 and set the
    // rank to Integer.MAX_VALUE
    int rank = 1;
    for (int i = 0; i < vals.length; i++) {
        Instance currentInstance = (Instance) vector.elementAt(i);
        // Short cut: if phrase very unlikely make rank very low and continue
        if (Utils.grOrEq(vals[i], 1.0)) {
            currentInstance.setValue(probsAttIndex + 1, Integer.MAX_VALUE);
            continue;
        }

        // Otherwise look for super phrase starting with first phrase
        // in list that has same probability, TFxIDF value, and distance as
        // current phrase. We do this to catch all superphrases
        // that have same probability, TFxIDF value and distance as current phrase.
        int startInd = i;
        while (startInd < vals.length) {
            Instance inst = (Instance) vector.elementAt(startInd);
            if ((inst.value(tfidfAttIndex) != currentInstance.value(tfidfAttIndex))
                    || (inst.value(probsAttIndex) != currentInstance.value(probsAttIndex))
                    || (inst.value(distAttIndex) != currentInstance.value(distAttIndex))) {
                break;
            }
            startInd++;
        }
        currentInstance.setValue(probsAttIndex + 1, rank++);

    }
    return vector;
}

From source file:com.openkm.kea.filter.KEAPhraseFilter.java

License:Open Source License

/** 
 * Converts an instance by removing all non-alphanumeric characters
 * from its string attribute values.//from  ww w .  j  av a2  s.  c o m
 */
private void convertInstance(Instance instance) throws Exception {

    double[] instVals = new double[instance.numAttributes()];

    for (int i = 0; i < instance.numAttributes(); i++) {
        if (!instance.attribute(i).isString() || instance.isMissing(i)) {
            instVals[i] = instance.value(i);
        } else {
            if (!m_SelectCols.isInRange(i)) {
                int index = getOutputFormat().attribute(i).addStringValue(instance.stringValue(i));
                instVals[i] = (double) index;
                continue;
            }
            // aly: str = text of the document
            String str = instance.stringValue(i);

            String tokenized = tokenize(str);

            // aly: resultStr is the clean version of str
            // log.info(resultStr.toString());
            int index = getOutputFormat().attribute(i).addStringValue(tokenized);
            instVals[i] = (double) index;
        }
    }
    Instance inst = new Instance(instance.weight(), instVals);
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:com.openkm.kea.filter.NumbersFilter.java

License:Open Source License

/** 
 * Converts an instance. A phrase boundary is inserted where
 * a number is found.//from  w  w w .  j a v  a2 s.  c o m
 */
private void convertInstance(Instance instance) throws Exception {

    double[] instVals = new double[instance.numAttributes()];

    for (int i = 0; i < instance.numAttributes(); i++) {
        if ((!instance.attribute(i).isString()) || instance.isMissing(i)) {
            instVals[i] = instance.value(i);
        } else {
            String str = instance.stringValue(i);

            StringBuffer resultStr = new StringBuffer();
            StringTokenizer tok = new StringTokenizer(str, " \t\n", true);
            while (tok.hasMoreTokens()) {
                String token = tok.nextToken();

                // Everything that doesn't contain at least
                // one letter is considered to be a number
                boolean isNumber = true;
                for (int j = 0; j < token.length(); j++) {
                    if (Character.isLetter(token.charAt(j))) {
                        isNumber = false;
                        break;
                    }
                }
                if (!isNumber) {
                    resultStr.append(token);
                } else {
                    if (token.equals(" ") || token.equals("\t") || token.equals("\n")) {
                        resultStr.append(token);
                    } else {
                        resultStr.append(" \n ");
                    }
                }
            }
            int index = getOutputFormat().attribute(i).addStringValue(resultStr.toString());
            instVals[i] = (double) index;
        }
    }
    Instance inst = new Instance(instance.weight(), instVals);
    inst.setDataset(getOutputFormat());
    push(inst);
}

From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java

License:Open Source License

/**
 * Returns class probability distribution for the given instance.
 * /* w w w. j ava2s .c o m*/
 * @param instance the instance to be classified
 * @return the class probabilities
 * @throws Exception if an error occurred during the prediction
 */
@Override
public double[] distributionForInstance(Instance instance) throws Exception {

    if (m_ZeroR != null) {
        return m_ZeroR.distributionForInstance(instance);
    } else {
        double[] result = new double[instance.numClasses()];

        if (m_ActualClusterer != null) {
            // build new instance
            Instances tempData = m_ClusteringHeader.stringFreeStructure();
            double[] values = new double[tempData.numAttributes()];
            int n = 0;
            for (int i = 0; i < instance.numAttributes(); i++) {
                if (i == instance.classIndex()) {
                    continue;
                }
                if (instance.attribute(i).isString()) {
                    values[n] = tempData.attribute(n).addStringValue(instance.stringValue(i));
                } else if (instance.attribute(i).isRelationValued()) {
                    values[n] = tempData.attribute(n).addRelation(instance.relationalValue(i));
                } else {
                    values[n] = instance.value(i);
                }
                n++;
            }
            Instance newInst = new DenseInstance(instance.weight(), values);
            newInst.setDataset(tempData);

            if (!getLabelAllClusters()) {

                // determine cluster/class
                double r = m_ClustersToClasses[m_ActualClusterer.clusterInstance(newInst)];
                if (r == -1) {
                    return result; // Unclassified
                } else {
                    result[(int) r] = 1.0;
                    return result;
                }
            } else {
                double[] classProbs = new double[instance.numClasses()];
                double[] dist = m_ActualClusterer.distributionForInstance(newInst);
                for (int i = 0; i < dist.length; i++) {
                    for (int j = 0; j < instance.numClasses(); j++) {
                        classProbs[j] += dist[i] * m_ClusterClassProbs[i][j];
                    }
                }
                Utils.normalize(classProbs);
                return classProbs;
            }
        } else {
            return result; // Unclassified
        }
    }
}

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
 * Samoa instance from weka instance./*from  ww w  . ja  v  a  2 s .c  o m*/
 *
 * @param inst the inst
 * @return the instance
 */
public Instance samoaInstance(weka.core.Instance inst) {
    Instance samoaInstance;
    if (inst instanceof weka.core.SparseInstance) {
        double[] attributeValues = new double[inst.numValues()];
        int[] indexValues = new int[inst.numValues()];
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != inst.classIndex()) {
                attributeValues[i] = inst.valueSparse(i);
                indexValues[i] = inst.index(i);
            }
        }
        samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes());
    } else {
        samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray());
        //samoaInstance.deleteAttributeAt(inst.classIndex());
    }
    if (this.samoaInstanceInformation == null) {
        this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset());
    }
    samoaInstance.setDataset(samoaInstanceInformation);
    samoaInstance.setClassValue(inst.classValue());
    return samoaInstance;
}

From source file:com.zooclassifier.Model.FileLoader.java

public FileLoader(String filename) throws FileNotFoundException, IOException {
    BufferedReader reader = new BufferedReader(new FileReader(filename));
    ArffLoader.ArffReader arff = new ArffLoader.ArffReader(reader);
    Instances data = arff.getData();//from  w  w w.  j ava  2 s .  co  m
    data.setClassIndex(data.numAttributes() - 1);

    attributes = new String[data.numInstances()][data.numAttributes() - 1];
    labels = new String[data.numInstances()];

    for (int i = 0; i < data.numInstances(); i++) {
        Instance instance = data.instance(i);
        for (int j = 0; j < instance.numAttributes() - 1; j++) {
            attributes[i][j] = instance.stringValue(j);
        }
        labels[i] = instance.stringValue(instance.numAttributes() - 1);
    }

    attributesLegalValues = new String[data.numAttributes() - 1][];
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        attributesLegalValues[i] = (String[]) Collections.list(data.attribute(i).enumerateValues())
                .toArray(new String[data.attribute(i).numValues()]);
    }

    labelsLegalValues = (String[]) Collections.list(data.attribute(data.numAttributes() - 1).enumerateValues())
            .toArray(new String[data.attribute(data.numAttributes() - 1).numValues()]);
}

From source file:control.CosineDistance.java

License:Open Source License

/**
 * Calculates the distance between two instances.
 * //from   ww w. j  a  v  a 2  s  . c  o m
 * @param first    the first instance
 * @param second    the second instance
 * @return       the distance between the two given instances
 */
public double distance(Instance first, Instance second) {

    HashMap<String, Double> fInstance = new HashMap<String, Double>();
    HashMap<String, Double> sInstance = new HashMap<String, Double>();

    for (int i = 0; i < first.numAttributes(); i++) {
        fInstance.put(first.attribute(i).name(), first.value(i));
        sInstance.put(second.attribute(i).name(), second.value(i));
    }

    return 1 - CosineSimilarity.calculateCosineSimilarity(fInstance, sInstance);
}

From source file:core.ClusterEvaluationEX.java

License:Open Source License

/**
 * Builds a string listing the attribute values in a specified range of indices,
 * separated by commas and enclosed in brackets.
 *
 * @param instance the instance to print the values from
 * @param attRange the range of the attributes to list
 * @return a string listing values of the attributes in the range
 *///  www .j  a  v a 2 s  .  co m
private static String attributeValuesString(Instance instance, Range attRange) {
    StringBuffer text = new StringBuffer();
    if (attRange != null) {
        boolean firstOutput = true;
        attRange.setUpper(instance.numAttributes() - 1);
        for (int i = 0; i < instance.numAttributes(); i++)
            if (attRange.isInRange(i)) {
                if (firstOutput)
                    text.append("(");
                else
                    text.append(",");
                text.append(instance.toString(i));
                firstOutput = false;
            }
        if (!firstOutput)
            text.append(")");
    }
    return text.toString();
}

From source file:core.DatabaseSaverEx.java

License:Open Source License

/**
 * inserts the given instance into the table.
 * /*w ww . j av a  2s  .co  m*/
 * @param inst the instance to insert
 * @throws Exception if something goes wrong
 */
public void writeInstance(Instance inst) throws Exception {

    StringBuffer insert = new StringBuffer();
    insert.append("INSERT INTO ");
    insert.append(m_tableName);
    insert.append(" VALUES ( ");
    if (m_id) {
        insert.append(m_count);
        insert.append(", ");
        m_count++;
    }
    for (int j = 0; j < inst.numAttributes(); j++) {
        if (inst.isMissing(j))
            insert.append("NULL");
        else {
            if ((inst.attribute(j)).isDate())
                insert.append("'" + m_DateFormat.format((long) inst.value(j)) + "'");
            else if ((inst.attribute(j)).isNumeric())
                insert.append(inst.value(j));
            else {
                String stringInsert = "'" + inst.stringValue(j) + "'";
                if (stringInsert.length() > 2)
                    stringInsert = stringInsert.replaceAll("''", "'");
                insert.append(stringInsert);
            }
        }
        if (j != inst.numAttributes() - 1)
            insert.append(", ");
    }
    insert.append(" )");
    //System.out.println(insert.toString());
    if (m_DataBaseConnection.update(insert.toString()) < 1) {
        throw new IOException("Tuple cannot be inserted.");
    } else {
        m_DataBaseConnection.close();
    }
}