Example usage for weka.core SparseInstance SparseInstance

List of usage examples for weka.core SparseInstance SparseInstance

Introduction

In this page you can find the example usage for weka.core SparseInstance SparseInstance.

Prototype

public SparseInstance(int numAttributes) 

Source Link

Document

Constructor of an instance that sets weight to one, all values to be missing, and the reference to the dataset to null.

Usage

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds the sum of instance sum with instance inst 
 *///from   w  w  w. ja v  a2  s  .  c o m
public static Instance sumWithInstance(Instance sum, Instance inst, Instances m_Instances) throws Exception {
    Instance newSum;
    if (sum == null) {
        if (inst instanceof SparseInstance) {
            newSum = new SparseInstance(inst);
            newSum.setDataset(m_Instances);
        } else {
            newSum = new Instance(inst);
            newSum.setDataset(m_Instances);
        }
    } else {
        newSum = sumInstances(sum, inst, m_Instances);
    }
    return newSum;
}

From source file:com.yahoo.research.scoring.classifier.NutchOnlineClassifier.java

License:Apache License

/**
 * Converts an {@link AnthURL} into an {@link Instance} which can be handled
 * by the {@link Classifier}./*  w w  w  .  ja  v a  2s . co  m*/
 * 
 * @param url
 *            the {@link AnthURL} which should be transformed/converted.
 * @return the resulting {@link Instance}.
 */
private static Instance convert(AnthURL url) {
    if (url != null) {

        Instance inst = new SparseInstance(dimension);
        inst.replaceMissingValues(replaceMissingValues);

        inst.setDataset(instances);
        inst.setValue(attributesIndex.get("class"), (url.sem ? "sem" : "nonsem"));
        inst.setValue(attributesIndex.get("sempar"), (url.semFather ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("semsib"), (url.semSibling ? 1 : 0));
        inst.setValue(attributesIndex.get("nonsempar"), (url.nonSemFather ? 1 : 0));
        inst.setValue(attributesIndex.get("domain"), url.uri.getHost());
        Set<String> tokens = new HashSet<String>();

        tokens.addAll(tokenizer(url.uri.getPath()));
        tokens.addAll(tokenizer(url.uri.getQuery()));
        tokens.addAll(tokenizer(url.uri.getFragment()));
        for (String tok : tokens) {
            inst.setValue(attributesIndex.get(getAttributeNameOfHash(getHash(tok, hashTrickSize))), 1);
        }
        return inst;

    } else {
        System.out.println("Input AnthURL for convertion into instance was null.");
        return null;
    }
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java

License:Apache License

@Override
public Map<String, Double> infer(Map<String, Double> features) throws AnalysisEngineProcessException {
    Instance instance = new SparseInstance(features.size());
    instance.setDataset(datasetSchema);//from   w  w w  . ja v a  2  s  .c o  m
    for (Map.Entry<String, Double> e : features.entrySet()) {
        Attribute attribute = datasetSchema.attribute(e.getKey());
        if (attribute == null)
            continue;
        instance.setValue(attribute, e.getValue());
    }
    double[] probs;
    try {
        probs = classifier.distributionForInstance(instance);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    assert datasetSchema.classIndex() == probs.length;
    return IntStream.range(0, probs.length).boxed()
            .collect(toMap(i -> datasetSchema.attribute(i).name(), i -> probs[i]));
}

From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java

License:Apache License

@Override
public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation)
        throws AnalysisEngineProcessException {
    // create attribute (including label) info
    ArrayList<Attribute> attributes = new ArrayList<>();
    List<String> labelNames = ClassifierProvider.labelNames(Y);
    labelNames.stream().map(attr -> new Attribute(attr, Arrays.asList("y", "n")))
            .forEachOrdered(attributes::add);
    List<String> featureNames = ClassifierProvider.featureNames(X);
    featureNames.stream().map(Attribute::new).forEachOrdered(attributes::add);
    String name = Files.getNameWithoutExtension(modelFile.getName());
    datasetSchema = new Instances(name, attributes, 0);
    datasetSchema.setClassIndex(labelNames.size());
    // add instances
    // due to the limitation of the interface definition, X, Y should be reorganized
    SetMultimap<Map<String, Double>, String> XY = HashMultimap.create();
    IntStream.range(0, X.size()).forEach(i -> XY.put(X.get(i), Y.get(i)));
    Instances trainingInstances = new Instances(datasetSchema, XY.size());
    for (Map.Entry<Map<String, Double>, Collection<String>> entry : XY.asMap().entrySet()) {
        Set<String> y = ImmutableSet.copyOf(entry.getValue());
        Map<String, Double> x = entry.getKey();
        SparseInstance instance = new SparseInstance(labelNames.size() + x.size());
        for (String labelName : labelNames) {
            instance.setValue(datasetSchema.attribute(labelName), y.contains(labelName) ? "y" : "n");
        }/*  w  ww.ja v a 2s  .  c  om*/
        for (Map.Entry<String, Double> e : x.entrySet()) {
            instance.setValue(datasetSchema.attribute(e.getKey()), e.getValue());
        }
        trainingInstances.add(instance);
    }
    // training
    try {
        classifier = (MultiLabelClassifier) AbstractClassifier.forName(classifierName, options);
        classifier.buildClassifier(trainingInstances);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    try {
        SerializationHelper.write(modelFile.getAbsolutePath(), classifier);
        SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }
    if (crossValidation) {
        try {
            Evaluation eval = new Evaluation(trainingInstances);
            Random rand = new Random();
            eval.crossValidateModel(classifier, trainingInstances, 10, rand);
            LOG.debug(eval.toSummaryString());
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }
}

From source file:elh.eus.absa.CLI.java

License:Open Source License

/**
 * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier
 * (E and A) for E#A aspect categories/*from   www  . ja  v a  2  s  .c o m*/
 * @throws Exception 
 */
public final void trainATC2(final InputStream inputStream) throws IOException {
    // load training parameters file
    String paramFile = parsedArguments.getString("params");
    String testFile = parsedArguments.getString("testset");
    String paramFile2 = parsedArguments.getString("params2");
    String corpusFormat = parsedArguments.getString("corpusFormat");
    //String validation = parsedArguments.getString("validation");
    String lang = parsedArguments.getString("language");
    //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum"));
    //boolean printPreds = parsedArguments.getBoolean("printPreds");
    boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences");
    boolean onlyTest = parsedArguments.getBoolean("testOnly");
    double threshold = 0.5;
    double threshold2 = 0.5;
    String modelsPath = "/home/inaki/elixa-atp/ovsaModels";

    CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang);
    Features atcTrain = new Features(reader, paramFile, "3");
    Instances traindata = atcTrain.loadInstances(true, "atc");

    if (onlyTest) {
        if (FileUtilsElh.checkFile(testFile)) {
            System.err.println("read from test file");
            reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat,
                    nullSentenceOpinions, lang);
            atcTrain.setCorpus(reader);
            traindata = atcTrain.loadInstances(true, "atc");
        }
    }

    //setting class attribute (entCat|attCat|entAttCat|polarityCat)

    //HashMap<String, Integer> opInst = atcTrain.getOpinInst();      
    //WekaWrapper classifyAtts;
    WekaWrapper onevsall;
    try {

        //classify.printMultilabelPredictions(classify.multiLabelPrediction());      */   

        //onevsall
        Instances entdata = new Instances(traindata);
        entdata.deleteAttributeAt(entdata.attribute("attCat").index());
        entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
        entdata.setClassIndex(entdata.attribute("entCat").index());
        onevsall = new WekaWrapper(entdata, true);

        if (!onlyTest) {
            onevsall.trainOneVsAll(modelsPath, paramFile + "entCat");
            System.out.println("trainATC: one vs all models ready");
        }
        onevsall.setTestdata(entdata);
        HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath,
                paramFile + "entCat");
        System.out.println("trainATC: one vs all predictions ready");
        HashMap<Integer, String> instOps = new HashMap<Integer, String>();
        for (String oId : atcTrain.getOpinInst().keySet()) {
            instOps.put(atcTrain.getOpinInst().get(oId), oId);
        }

        atcTrain = new Features(reader, paramFile2, "3");
        entdata = atcTrain.loadInstances(true, "attTrain2_data");
        entdata.deleteAttributeAt(entdata.attribute("entAttCat").index());
        //entdata.setClassIndex(entdata.attribute("entCat").index());

        Attribute insAtt = entdata.attribute("instanceId");
        double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1);
        System.err.println("last instance has index: " + maxInstId);
        for (int ins = 0; ins < entdata.numInstances(); ins++) {
            System.err.println("ins" + ins);
            int i = (int) entdata.instance(ins).value(insAtt);
            Instance currentInst = entdata.instance(ins);
            //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
            String sId = reader.getOpinion(instOps.get(i)).getsId();
            String oId = instOps.get(i);
            reader.removeSentenceOpinions(sId);
            int oSubId = 0;
            for (String cl : ovsaRes.get(i).keySet()) {
                //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                if (ovsaRes.get(i).get(cl) > threshold) {
                    //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                    // for the first one update the instances
                    if (oSubId >= 1) {
                        Instance newIns = new SparseInstance(currentInst);
                        newIns.setDataset(entdata);
                        entdata.add(newIns);
                        newIns.setValue(insAtt, maxInstId + oSubId);
                        newIns.setClassValue(cl);
                        instOps.put((int) maxInstId + oSubId, oId);

                    }
                    // if the are more create new instances
                    else {
                        currentInst.setClassValue(cl);
                        //create and add opinion to the structure
                        //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                        //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId);
                        //reader.addOpinion(op);
                    }
                    oSubId++;
                }
            } //finished updating instances data                                    
        }

        entdata.setClass(entdata.attribute("attCat"));
        onevsall = new WekaWrapper(entdata, true);

        /**
         *  Bigarren sailkatzailea
         * 
         * */
        if (!onlyTest) {
            onevsall.trainOneVsAll(modelsPath, paramFile + "attCat");
            System.out.println("trainATC: one vs all attcat models ready");
        }

        ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat");

        insAtt = entdata.attribute("instanceId");
        maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues());
        System.err.println("last instance has index: " + maxInstId);
        for (int ins = 0; ins < entdata.numInstances(); ins++) {
            System.err.println("ins: " + ins);
            int i = (int) entdata.instance(ins).value(insAtt);
            Instance currentInst = entdata.instance(ins);
            //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i));
            String sId = reader.getOpinion(instOps.get(i)).getsId();
            String oId = instOps.get(i);
            reader.removeSentenceOpinions(sId);
            int oSubId = 0;
            for (String cl : ovsaRes.get(i).keySet()) {
                //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                if (ovsaRes.get(i).get(cl) > threshold2) {
                    ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));
                    if (ovsaRes.get(i).get(cl) > threshold) {
                        //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl));                  
                        // for the first one update the instances
                        if (oSubId >= 1) {
                            String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                            //create and add opinion to the structure
                            //   trgt, offsetFrom, offsetTo, polarity, cat, sId);                     
                            Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                            reader.addOpinion(op);
                        }
                        // if the are more create new instances
                        else {
                            String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl;
                            //create and add opinion to the structure
                            //   trgt, offsetFrom, offsetTo, polarity, cat, sId);
                            reader.removeOpinion(oId);
                            Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId);
                            reader.addOpinion(op);
                        }
                        oSubId++;
                    }
                } //finished updating instances data                                    
            }
        }
        reader.print2Semeval2015format(paramFile + "entAttCat.xml");
    } catch (Exception e) {
        e.printStackTrace();
    }

    //traindata.setClass(traindata.attribute("entAttCat"));
    System.err.println("DONE CLI train-atc2 (oneVsAll)");
}

From source file:europarl.PhraseTranslation.java

License:Open Source License

public boolean getFromGz(String fileName, String targetWord, int limit) {
    String strLine;/*from  w  w w.  j  a  va  2s  . c o  m*/
    ArrayList<String> line_triple = new ArrayList<String>();

    BufferedReader gzipReader;
    Pattern word_align = Pattern.compile("(\\w+) \\(\\{(.*?)\\}\\) ");

    Bag<String> words_list = new Bag<String>(); //Set of ALL words: it will be the list of attributes
    ArrayList<PhraseTranslation> translations = new ArrayList<PhraseTranslation>();
    try {
        gzipReader = new BufferedReader(
                new InputStreamReader(new GZIPInputStream(new FileInputStream(fileName))));

        while ((strLine = gzipReader.readLine()) != null) //read-everything
        {
            line_triple.add(strLine);
            if (line_triple.size() == 3) //triple finished
            {
                //TODO: match only complete words
                //TODO: stem it before doing this

                Matcher matcher = word_align.matcher(line_triple.get(2));
                String[] foreign_words = line_triple.get(1).split(" ");
                line_triple.clear();
                if (!strLine.contains(targetWord)) //skip it
                    continue;

                ArrayList<String> e_phrase = new ArrayList<String>();
                String translation = "";
                while (matcher.find()) //each iteration is word +alignment
                {
                    assert matcher.groupCount() == 2;
                    String e_word = matcher.group(1).trim();
                    if (e_word.equals("NULL"))
                        e_word = "";
                    if (stopwordsList.contains(e_word))
                        continue;
                    if (stemmer != null)
                        e_word = stemmer.stem(e_word);

                    e_phrase.add(e_word);
                    words_list.add(e_word);

                    //we don't care about the alignment of non-target words
                    if (!e_word.equals(targetWord))
                        continue;

                    //parse the { x y z } alignment part
                    ArrayList<Integer> f_words = new ArrayList<Integer>();
                    translation = "";
                    //for each number between curly brackets
                    for (String number : matcher.group(2).split(" ")) {
                        if (!number.isEmpty()) {
                            int n_word = Integer.parseInt(number) - 1;
                            f_words.add(n_word);
                            translation += foreign_words[n_word] + " ";
                        }
                    } // end of curly brackets for

                } //end of word+alignment while
                if (!translation.isEmpty()) {
                    PhraseTranslation trans = new PhraseTranslation(e_phrase, translation);
                    translations.add(trans);
                }
                line_triple.clear();
            } //end of triple-finished if
            if (translations.size() == limit)
                break; //stop collecting!
        } //end of the read-everything while
    } catch (Exception e) {
        log.error("Error: " + e);
        e.printStackTrace();
        return false;
    }

    //what we NOW have: a set of attributes in HashSet<String>words_list
    //a ArrayList<PhraseTranslation> translations      
    log.info("Collected " + translations.size() + " phrases and " + words_list.size() + " words");

    postProcessData(translations, words_list);

    //now convert the data we collected to Weka data
    //we needed to do "double passing" because we need to initialize
    //the dataset with the complete list of attributes

    //this will convert word to attributes: they are all "boolean"
    ArrayList<Attribute> attrs = new ArrayList<Attribute>();
    HashMap<String, Attribute> attrs_map = new HashMap<String, Attribute>();
    Attribute att;
    for (String word : words_list) {
        att = new Attribute(word);
        attrs.add(att);
        attrs_map.put(word, att);
    }

    //now we need to manage class.
    //each translation is a class, so we need to get all of them
    HashMap<String, Integer> class_map = new HashMap<String, Integer>();
    ArrayList<String> classes = new ArrayList<String>();
    for (PhraseTranslation phraseTranslation : translations) {
        if (!class_map.containsKey(phraseTranslation.getTranslatedWord())) {
            class_map.put(phraseTranslation.getTranslatedWord(), classes.size());
            classes.add(phraseTranslation.getTranslatedWord());
        }
    }

    log.info(targetWord + " has " + classes.size() + " translations:");
    if (log.isInfoEnabled())
        for (String translation : classes)
            System.out.println(translation);
    att = new Attribute("%class", classes);
    attrs.add(att);
    attrs_map.put("%class", att);
    dataSet = new Instances("dataset", attrs, 0);
    for (PhraseTranslation phraseTranslation : translations) {
        SparseInstance inst = new SparseInstance(attrs.size());
        //set everything to 0
        for (int i = 0; i < attrs.size(); i++)
            inst.setValue(i, 0);
        //set present word to 1
        for (String word : phraseTranslation.getPhraseWords())
            inst.setValue(attrs_map.get(word), 1);
        //set class of instance
        inst.setValue(attrs_map.get("%class"), class_map.get(phraseTranslation.getTranslatedWord()));
        dataSet.add(inst);
    }

    return true;
}

From source file:eyetracker.ServerCommunicator.java

public Instance getInput() {
    // For all the attribute, initialize them.
    int totalAttribute = MLPProcessor.inst.firstInstance().numAttributes();
    Instance instance = new SparseInstance(totalAttribute);
    instance.setDataset(MLPProcessor.inst);
    String[] attributes = unifiedData.split(",");
    //String[] attributes = examData.split(",");
    for (int i = 0; i < totalAttribute - 1; i++) {
        instance.setValue(i, Double.valueOf(attributes[i]));
    }/*from w  w w. j  a va  2  s.  co m*/
    return instance;
}

From source file:jkamal.ddbmssim.incmine.core.Segment.java

License:Open Source License

/**
 * Adds a new itemset to the segment// ww w.  j  a  v a 2 s.c  om
 * @param itemset itemset to be added
 */
public void addItemset(Instance instance) {
    context.addItemset(toItemset(new SparseInstance(instance)));
}

From source file:moa.streams.generators.multilabel.MetaMultilabelGenerator.java

License:Open Source License

/**
 * GenerateMLInstance./*from w  ww .ja  va  2  s  .  c  o  m*/
 *
 * @param   Y   a set of label [indices]
 * @return a multit-labelled example
 */
private Instance generateMLInstance(HashSet<Integer> Y) {

    // create a multi-label instance:
    Instance x_ml = new SparseInstance(this.multilabelStreamTemplate.numAttributes());
    x_ml.setDataset(this.multilabelStreamTemplate);

    // set classes
    for (int j = 0; j < m_L; j++) {
        x_ml.setValue(j, 0.0);
    }
    for (int l : Y) {
        x_ml.setValue(l, 1.0);
    }

    // generate binary instances
    Instance x_0 = getNextWithBinary(0);
    Instance x_1 = getNextWithBinary(1);

    // Loop through each feature attribute @warning: assumes class is last index
    for (int a = 0; a < m_A; a++) {

        // The combination is present: use a positive value
        if (Y.containsAll(m_TopCombinations[a])) {
            x_ml.setValue(m_L + a, x_1.value(a));
            //x_ml.setValue(m_L+a,1.0);
        } // The combination is absent: use a negative value
        else {
            x_ml.setValue(m_L + a, x_0.value(a));
            //x_ml.setValue(m_L+a,0.0);
        }
    }

    return x_ml;
}

From source file:mulan.classifier.transformation.CalibratedLabelRanking.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {
    // Virtual label models
    debug("Building calibration label models");
    System.out.println("Building calibration label models");
    virtualLabelModels = new BinaryRelevance(getBaseClassifier());
    virtualLabelModels.setDebug(getDebug());
    virtualLabelModels.build(trainingSet);

    // One-vs-one models
    numModels = ((numLabels) * (numLabels - 1)) / 2;
    oneVsOneModels = AbstractClassifier.makeCopies(getBaseClassifier(), numModels);
    nodata = new boolean[numModels];
    metaDataTest = new Instances[numModels];

    Instances trainingData = trainingSet.getDataSet();

    int counter = 0;
    // Creation of one-vs-one models
    for (int label1 = 0; label1 < numLabels - 1; label1++) {
        // Attribute of label 1
        Attribute attrLabel1 = trainingData.attribute(labelIndices[label1]);
        for (int label2 = label1 + 1; label2 < numLabels; label2++) {
            debug("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            System.out.println("Building one-vs-one model " + (counter + 1) + "/" + numModels);
            // Attribute of label 2
            Attribute attrLabel2 = trainingData.attribute(labelIndices[label2]);

            // initialize training set
            Instances dataOneVsOne = new Instances(trainingData, 0);
            // filter out examples with no preference
            for (int i = 0; i < trainingData.numInstances(); i++) {
                Instance tempInstance;//ww  w.ja va2 s . com
                if (trainingData.instance(i) instanceof SparseInstance) {
                    tempInstance = new SparseInstance(trainingData.instance(i));
                } else {
                    tempInstance = new DenseInstance(trainingData.instance(i));
                }

                int nominalValueIndex;
                nominalValueIndex = (int) tempInstance.value(labelIndices[label1]);
                String value1 = attrLabel1.value(nominalValueIndex);
                nominalValueIndex = (int) tempInstance.value(labelIndices[label2]);
                String value2 = attrLabel2.value(nominalValueIndex);

                if (!value1.equals(value2)) {
                    tempInstance.setValue(attrLabel1, value1);
                    dataOneVsOne.add(tempInstance);
                }
            }

            // remove all labels apart from label1 and place it at the end
            Reorder filter = new Reorder();
            int numPredictors = trainingData.numAttributes() - numLabels;
            int[] reorderedIndices = new int[numPredictors + 1];
            for (int i = 0; i < numPredictors; i++) {
                reorderedIndices[i] = featureIndices[i];
            }
            reorderedIndices[numPredictors] = labelIndices[label1];
            filter.setAttributeIndicesArray(reorderedIndices);
            filter.setInputFormat(dataOneVsOne);
            dataOneVsOne = Filter.useFilter(dataOneVsOne, filter);
            //System.out.println(dataOneVsOne.toString());
            dataOneVsOne.setClassIndex(numPredictors);

            // build model label1 vs label2
            if (dataOneVsOne.size() > 0) {
                oneVsOneModels[counter].buildClassifier(dataOneVsOne);
            } else {
                nodata[counter] = true;
            }
            dataOneVsOne.delete();
            metaDataTest[counter] = dataOneVsOne;
            counter++;
        }
    }
}