Example usage for weka.core Instances Instances

Introduction

In this page you can find the example usage for weka.core Instances Instances.

Prototype

public Instances(Instances dataset, int capacity)

Source Link

Document

Constructor creating an empty set of instances.

Usage

From source file:activeSegmentation.learning.WekaClassifier.java

License:Open Source License

/**
 * Evaluates the classifier using the test dataset and stores the evaluation.
 *
 * @param instances The instances to test
 * @return The evaluation//from  w  w w .j ava2  s.c o m
 */
@Override
public double[] testModel(IDataSet instances) {

    try {

        // test the current classifier with the test set
        Evaluation evaluator = new Evaluation(new Instances(instances.getDataset(), 0));

        double[] predict = evaluator.evaluateModel(classifier, instances.getDataset());

        System.out.println(evaluator.toSummaryString());
        return predict;

    } catch (Exception e) {
        Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, e);
    }

    return null;
}

From source file:adams.data.conversion.AbstractMatchWekaInstanceAgainstHeader.java

License:Open Source License

/**
 * Performs the actual conversion.//from w  w  w .j a v  a 2 s .  co m
 *
 * @return      the converted data
 * @throws Exception   if something goes wrong with the conversion
 */
protected Object doConvert() throws Exception {
    Instance input;
    Instance result;
    String error;

    result = null;

    // get header
    if (m_Dataset == null) {
        m_Dataset = new Instances(getDatasetHeader(), 0);
        if (m_Dataset == null)
            throw new IllegalStateException("Failed to obtain header!");
    }

    input = (Instance) m_Input;

    // check compatibility
    error = isCompatible(input);
    if (error != null)
        throw new IllegalArgumentException("Input is not compatible: " + error);

    // convert (if necessary)
    result = match(input);

    return result;
}

From source file:adams.data.instance.Instance.java

License:Open Source License

/**
 * Clears the container and adds the data from the weka.core.Instance
 * (internal values). Uses only the attributes specified in the range.
 *
 * @param inst   the instance to use// ww  w .  j ava 2  s . c  o  m
 * @param index   the row index in the original dataset, use -1 to ignore
 * @param additional   the indices of the additional attribute values to
 *          store in the report
 * @param range   the range of attributes to limit the instance to
 * @param attTypes   whether to restrict to attributes types, null or zero-length array means no restriction
 * @see      Attribute
 */
public void set(weka.core.Instance inst, int index, int[] additional, Range range, HashSet<Integer> attTypes) {
    ArrayList<InstancePoint> list;
    int i;
    Attribute att;
    String fieldStr;

    clear();

    // keep reference to header
    m_DatasetHeader = new Instances(inst.dataset(), 0);

    range.setMax(inst.numAttributes());
    list = new ArrayList<InstancePoint>();
    for (i = 0; i < inst.numAttributes(); i++) {
        if (i == inst.classIndex())
            continue;
        if (!range.isInRange(i))
            continue;
        if ((attTypes != null) && (!attTypes.contains(inst.attribute(i).type())))
            continue;
        list.add(new InstancePoint(i, inst.value(i)));
    }

    addAll(list);

    // create artificial report
    m_Report.addParameter(REPORT_DATASET, m_DatasetHeader.relationName());
    att = m_DatasetHeader.attribute(ArffUtils.getDBIDName());
    if (att != null) {
        m_Report.addParameter(REPORT_DB_ID, new Double(inst.value(att)));
        m_Report.setDatabaseID((int) inst.value(att));
    }
    att = m_DatasetHeader.attribute(ArffUtils.getIDName());
    if (att != null)
        m_Report.addParameter(REPORT_ID, new Double(inst.value(att)));
    // class
    if (inst.classIndex() > -1) {
        if (inst.classAttribute().isNumeric()) {
            m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC));
            if (inst.classIsMissing()) {
                m_Report.addField(new Field(REPORT_CLASS, DataType.STRING));
                m_Report.addParameter(REPORT_CLASS, "?");
            } else {
                m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC));
                m_Report.addParameter(REPORT_CLASS, Double.toString(inst.classValue()));
            }
        } else {
            m_Report.addField(new Field(REPORT_CLASS, DataType.STRING));
            if (inst.classIsMissing())
                m_Report.addParameter(REPORT_CLASS, "?");
            else
                m_Report.addParameter(REPORT_CLASS, inst.stringValue(inst.classIndex()));
        }
    }
    // row
    if (index != -1) {
        m_Report.addField(new Field(REPORT_ROW, DataType.NUMERIC));
        m_Report.addParameter(REPORT_ROW, new Double(index + 1));
    }
    // additional attributes
    for (i = 0; i < additional.length; i++) {
        att = inst.attribute(additional[i]);
        fieldStr = REPORT_ADDITIONAL_PREFIX + (additional[i] + 1) + "-" + att.name();
        if (att.isNumeric()) {
            m_Report.addField(new Field(fieldStr, DataType.NUMERIC));
            m_Report.addParameter(fieldStr, inst.value(additional[i]));
        } else {
            m_Report.addField(new Field(fieldStr, DataType.STRING));
            m_Report.addParameter(fieldStr, inst.stringValue(additional[i]));
        }
    }

    // display ID (hashcode of string representation of Instance)
    if (getID().length() == 0)
        setID("" + inst.toString().hashCode());
}

From source file:adams.data.instancesanalysis.pls.OPLS.java

License:Open Source License

/**
 * Determines the output format based on the input format and returns this.
 *
 * @param input    the input format to base the output format on
 * @return       the output format//from   www. ja  v a2 s  .c  o  m
 * @throws Exception    in case the determination goes wrong
 */
@Override
public Instances determineOutputFormat(Instances input) throws Exception {
    m_OutputFormat = new Instances(input, 0);
    return m_OutputFormat;
}

From source file:adams.flow.transformer.WekaDatasetsMerge.java

License:Open Source License

/**
 * Creates an Instances dataset, containing a copy of the single instance
 * provided.//from   www .j  av  a2 s.  c o  m
 *
 * @param instance The instance to create a dataset for.
 * @return The created dataset.
 */
protected Instances datasetForSingleInstance(Instance instance) {
    // Create a copy of the instance's original dataset
    Instances dataset = new Instances(instance.dataset(), 1);

    // Add a copy of the provided instance
    dataset.add((Instance) instance.copy());

    // Return the dataset
    return dataset;
}

From source file:adams.flow.transformer.WekaInstanceBuffer.java

License:Open Source License

/**
 * Executes the flow item.// www. j ava2 s . c o  m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instance[] insts;
    Instance inst;
    double[] values;
    int i;
    int n;
    boolean updated;

    result = null;

    if (m_Operation == Operation.INSTANCE_TO_INSTANCES) {
        if (m_InputToken.getPayload() instanceof Instance) {
            insts = new Instance[] { (Instance) m_InputToken.getPayload() };
        } else {
            insts = (Instance[]) m_InputToken.getPayload();
        }

        for (n = 0; n < insts.length; n++) {
            inst = insts[n];

            if ((m_Buffer != null) && m_CheckHeader) {
                if (!m_Buffer.equalHeaders(inst.dataset())) {
                    getLogger().info("Header changed, resetting buffer");
                    m_Buffer = null;
                }
            }

            // buffer instance
            if (m_Buffer == null)
                m_Buffer = new Instances(inst.dataset(), 0);

            // we need to make sure that string and relational values are in our
            // buffer header and update the current Instance accordingly before
            // buffering it
            values = inst.toDoubleArray();
            updated = false;
            for (i = 0; i < values.length; i++) {
                if (inst.isMissing(i))
                    continue;
                if (inst.attribute(i).isString()) {
                    values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i));
                    updated = true;
                } else if (inst.attribute(i).isRelationValued()) {
                    values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i));
                    updated = true;
                }
            }

            if (updated) {
                if (inst instanceof SparseInstance) {
                    inst = new SparseInstance(inst.weight(), values);
                } else if (inst instanceof BinarySparseInstance) {
                    inst = new BinarySparseInstance(inst.weight(), values);
                } else {
                    if (!(inst instanceof DenseInstance)) {
                        getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), "
                                + "defaulting to " + DenseInstance.class.getName());
                    }
                    inst = new DenseInstance(inst.weight(), values);
                }
            } else {
                inst = (Instance) inst.copy();
            }

            m_Buffer.add(inst);
        }

        if (m_Buffer.numInstances() % m_Interval == 0) {
            m_OutputToken = new Token(m_Buffer);
            if (m_ClearBuffer)
                m_Buffer = null;
        }
    } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) {
        m_Buffer = (Instances) m_InputToken.getPayload();
        m_Iterator = m_Buffer.iterator();
    } else {
        throw new IllegalStateException("Unhandled operation: " + m_Operation);
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstanceDumper.java

License:Open Source License

/**
 * Turns the dataset header into the appropriate format.
 *
 * @param header   the header to convert
 * @return      the generated output/*from   w ww  .  j  a  va 2  s .c om*/
 */
protected String createHeader(Instances header) {
    StringBuilder result;
    int i;

    result = new StringBuilder();

    switch (m_OutputFormat) {
    case ARFF:
        result.append(new Instances(header, 0).toString());
        break;

    case CSV:
        for (i = 0; i < header.numAttributes(); i++) {
            if (i > 0)
                result.append(",");
            result.append(Utils.quote(header.attribute(i).name()));
        }
        break;

    case TAB:
        for (i = 0; i < header.numAttributes(); i++) {
            if (i > 0)
                result.append("\t");
            result.append(Utils.quote(header.attribute(i).name()));
        }
        break;

    default:
        throw new IllegalStateException("Unhandled output format: " + m_OutputFormat);
    }

    return result.toString();
}

From source file:adams.flow.transformer.WekaInstancesInfo.java

License:Open Source License

/**
 * Executes the flow item.//from  w  w  w.  ja  v a2s.  c om
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances inst;
    int index;
    int labelIndex;
    double[] dist;
    Enumeration enm;
    int i;

    result = null;

    if (m_InputToken.getPayload() instanceof Instance)
        inst = ((Instance) m_InputToken.getPayload()).dataset();
    else
        inst = (Instances) m_InputToken.getPayload();
    m_AttributeIndex.setData(inst);
    index = m_AttributeIndex.getIntIndex();

    m_Queue.clear();

    switch (m_Type) {
    case FULL:
        m_Queue.add(inst.toSummaryString());
        break;

    case FULL_ATTRIBUTE:
        m_Queue.add(getAttributeStats(inst, index));
        break;

    case FULL_CLASS:
        if (inst.classIndex() > -1)
            m_Queue.add(getAttributeStats(inst, inst.classIndex()));
        break;

    case HEADER:
        m_Queue.add(new Instances(inst, 0).toString());
        break;

    case RELATION_NAME:
        m_Queue.add(inst.relationName());
        break;

    case ATTRIBUTE_NAME:
        if (index != -1)
            m_Queue.add(inst.attribute(index).name());
        break;

    case ATTRIBUTE_NAMES:
        for (i = 0; i < inst.numAttributes(); i++)
            m_Queue.add(inst.attribute(i).name());
        break;

    case LABELS:
        if (index != -1) {
            enm = inst.attribute(index).enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case CLASS_LABELS:
        if (inst.classIndex() > -1) {
            enm = inst.classAttribute().enumerateValues();
            while (enm.hasMoreElements())
                m_Queue.add(enm.nextElement());
        }
        break;

    case LABEL_COUNT:
        if (index > -1) {
            m_LabelIndex.setData(inst.attribute(index));
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(index).nominalCounts[labelIndex]);
        }
        break;

    case LABEL_COUNTS:
        if (index > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(index).nominalCounts));
        break;

    case LABEL_DISTRIBUTION:
        if (index > -1) {
            dist = new double[inst.attributeStats(index).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(index).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case CLASS_LABEL_COUNT:
        if (inst.classIndex() > -1) {
            m_LabelIndex.setData(inst.classAttribute());
            labelIndex = m_LabelIndex.getIntIndex();
            m_Queue.add(inst.attributeStats(inst.classIndex()).nominalCounts[labelIndex]);
        }
        break;

    case CLASS_LABEL_COUNTS:
        if (inst.classIndex() > -1)
            m_Queue.add(StatUtils.toNumberArray(inst.attributeStats(inst.classIndex()).nominalCounts));
        break;

    case CLASS_LABEL_DISTRIBUTION:
        if (inst.classIndex() > -1) {
            dist = new double[inst.attributeStats(inst.classIndex()).nominalCounts.length];
            for (i = 0; i < dist.length; i++)
                dist[i] = inst.attributeStats(inst.classIndex()).nominalCounts[i];
            Utils.normalize(dist);
            m_Queue.add(StatUtils.toNumberArray(dist));
        }
        break;

    case NUM_ATTRIBUTES:
        m_Queue.add(inst.numAttributes());
        break;

    case NUM_INSTANCES:
        m_Queue.add(inst.numInstances());
        break;

    case NUM_CLASS_LABELS:
        if ((inst.classIndex() != -1) && inst.classAttribute().isNominal())
            m_Queue.add(inst.classAttribute().numValues());
        break;

    case NUM_LABELS:
        if ((index != -1) && inst.attribute(index).isNominal())
            m_Queue.add(inst.attribute(index).numValues());
        break;

    case NUM_DISTINCT_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).distinctCount);
        break;

    case NUM_UNIQUE_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).uniqueCount);
        break;

    case NUM_MISSING_VALUES:
        if (index != -1)
            m_Queue.add(inst.attributeStats(index).missingCount);
        break;

    case MIN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.min);
        break;

    case MAX:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.max);
        break;

    case MEAN:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.mean);
        break;

    case STDEV:
        if ((index != -1) && inst.attribute(index).isNumeric())
            m_Queue.add(inst.attributeStats(index).numericStats.stdDev);
        break;

    case ATTRIBUTE_TYPE:
        if (index != -1)
            m_Queue.add(Attribute.typeToString(inst.attribute(index)));
        break;

    case CLASS_TYPE:
        if (inst.classIndex() != -1)
            m_Queue.add(Attribute.typeToString(inst.classAttribute()));
        break;

    default:
        result = "Unhandled info type: " + m_Type;
    }

    return result;
}

From source file:adams.flow.transformer.WekaInstancesMerge.java

License:Open Source License

/**
 * Executes the flow item.//from  ww w  . j a  va 2  s  .  co m
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    String[] filesStr;
    File[] files;
    int i;
    Instances output;
    Instances[] orig;
    Instances[] inst;
    Instance[] rows;
    HashSet ids;
    int max;
    TIntList uniqueList;
    Remove remove;

    result = null;

    // get filenames
    files = null;
    orig = null;
    if (m_InputToken.getPayload() instanceof String[]) {
        filesStr = (String[]) m_InputToken.getPayload();
        files = new File[filesStr.length];
        for (i = 0; i < filesStr.length; i++)
            files[i] = new PlaceholderFile(filesStr[i]);
    } else if (m_InputToken.getPayload() instanceof File[]) {
        files = (File[]) m_InputToken.getPayload();
    } else if (m_InputToken.getPayload() instanceof Instance[]) {
        rows = (Instance[]) m_InputToken.getPayload();
        orig = new Instances[rows.length];
        for (i = 0; i < rows.length; i++) {
            orig[i] = new Instances(rows[i].dataset(), 1);
            orig[i].add((Instance) rows[i].copy());
        }
    } else if (m_InputToken.getPayload() instanceof Instances[]) {
        orig = (Instances[]) m_InputToken.getPayload();
    } else {
        throw new IllegalStateException("Unhandled input type: " + m_InputToken.getPayload().getClass());
    }

    try {
        output = null;

        // simple merge
        if (m_UniqueID.length() == 0) {
            if (files != null) {
                inst = new Instances[1];
                for (i = 0; i < files.length; i++) {
                    if (isStopped())
                        break;
                    inst[0] = DataSource.read(files[i].getAbsolutePath());
                    inst[0] = prepareData(inst[0], i);
                    if (i == 0) {
                        output = inst[0];
                    } else {
                        if (isLoggingEnabled())
                            getLogger().info("Merging with file #" + (i + 1) + ": " + files[i]);
                        output = Instances.mergeInstances(output, inst[0]);
                    }
                }
            } else if (orig != null) {
                inst = new Instances[1];
                for (i = 0; i < orig.length; i++) {
                    if (isStopped())
                        break;
                    inst[0] = prepareData(orig[i], i);
                    if (i == 0) {
                        output = inst[0];
                    } else {
                        if (isLoggingEnabled())
                            getLogger()
                                    .info("Merging with dataset #" + (i + 1) + ": " + orig[i].relationName());
                        output = Instances.mergeInstances(output, inst[0]);
                    }
                }
            }
        }
        // merge based on row IDs
        else {
            m_AttType = -1;
            max = 0;
            m_UniqueIDAtts = new ArrayList<>();
            if (files != null) {
                orig = new Instances[files.length];
                for (i = 0; i < files.length; i++) {
                    if (isStopped())
                        break;
                    if (isLoggingEnabled())
                        getLogger().info("Loading file #" + (i + 1) + ": " + files[i]);
                    orig[i] = DataSource.read(files[i].getAbsolutePath());
                    max = Math.max(max, orig[i].numInstances());
                }
            } else if (orig != null) {
                for (i = 0; i < orig.length; i++)
                    max = Math.max(max, orig[i].numInstances());
            }
            inst = new Instances[orig.length];
            ids = new HashSet(max);
            for (i = 0; i < orig.length; i++) {
                if (isStopped())
                    break;
                if (isLoggingEnabled())
                    getLogger().info("Updating IDs #" + (i + 1));
                updateIDs(i, orig[i], ids);
                if (isLoggingEnabled())
                    getLogger().info("Preparing dataset #" + (i + 1));
                inst[i] = prepareData(orig[i], i);
            }
            output = merge(orig, inst, ids);
            // remove unnecessary unique ID attributes
            if (m_KeepOnlySingleUniqueID) {
                uniqueList = new TIntArrayList();
                for (String att : m_UniqueIDAtts)
                    uniqueList.add(output.attribute(att).index());
                if (uniqueList.size() > 0) {
                    if (isLoggingEnabled())
                        getLogger().info("Removing duplicate unique ID attributes: " + m_UniqueIDAtts);
                    remove = new Remove();
                    remove.setAttributeIndicesArray(uniqueList.toArray());
                    remove.setInputFormat(output);
                    output = Filter.useFilter(output, remove);
                }
            }
        }

        if (!isStopped()) {
            m_OutputToken = new Token(output);
            updateProvenance(m_OutputToken);
        }
    } catch (Exception e) {
        result = handleException("Failed to merge: ", e);
    }

    return result;
}

From source file:adams.flow.transformer.WekaPrimeForecaster.java

License:Open Source License

/**
 * Executes the flow item./*  www  . j a v  a2  s  .c  o m*/
 *
 * @return      null if everything is fine, otherwise error message
 */
@Override
protected String doExecute() {
    String result;
    Instances data;
    Instance inst;
    AbstractForecaster cls;

    result = null;

    try {
        cls = getForecasterInstance();
        if (cls == null)
            result = "Failed to obtain forecaster!";

        if (result == null) {
            if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instances)) {
                data = (Instances) m_InputToken.getPayload();
                cls.primeForecaster(data);
                m_OutputToken = new Token(new WekaModelContainer(cls, new Instances(data, 0), data));
            } else if ((m_InputToken != null) && (m_InputToken.getPayload() instanceof Instance)) {
                inst = (Instance) m_InputToken.getPayload();
                data = inst.dataset();
                if (cls instanceof IncrementallyPrimeable) {
                    ((IncrementallyPrimeable) cls).primeForecasterIncremental(inst);
                    m_OutputToken = new Token(new WekaModelContainer(cls, new Instances(data, 0), data));
                } else {
                    result = m_Forecaster.getValue() + " (= " + cls.getClass().getName()
                            + ") does not implement " + IncrementallyPrimeable.class.getName()
                            + "! Cannot prime incrementally!";
                }
            }
        }
    } catch (Exception e) {
        m_OutputToken = null;
        result = handleException("Failed to process data:", e);
    }

    if (m_OutputToken != null)
        updateProvenance(m_OutputToken);

    return result;
}