wedt.project.Common.java Source code

Java tutorial

Introduction

Here is the source code for wedt.project.Common.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package wedt.project;

import cmu.arktweetnlp.POSTagger;
import cmu.arktweetnlp.Token;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.SparseInstance;
import weka.core.converters.CSVLoader;

/**
 *
 * @author Micha
 */

public class Common {

    private ArrayList<Attribute> attributes;
    public ArrayList<String> sentiment;
    private ArrayList<String> featureWords;
    private POSTagger tagger;

    Common() {
        attributes = new ArrayList<>();
        sentiment = new ArrayList<>();
        sentiment.add("positive");
        sentiment.add("negative");
        sentiment.add("neutral");
        tagger = new POSTagger();

        ObjectInputStream inputS = null;
        try {
            inputS = new ObjectInputStream(new FileInputStream("featureWords.dat"));
            featureWords = (ArrayList<String>) inputS.readObject();
            inputS.close();
        } catch (Exception ex) {
            System.out.println("Blad wczytywania pliku z feature words");
        }

        for (String featureWord : featureWords) {
            attributes.add(new Attribute(featureWord));
        }
        attributes.add(new Attribute("Sentiment", sentiment));
    }

    public void printDetailedResults(double shouldBe, double[] dist, double score) {
        System.out.println(
                "Should be: " + sentiment.get((int) shouldBe) + ", classified as: " + sentiment.get((int) score));
        System.out.println(
                "Distribution: positive[" + dist[0] + "], negative[" + dist[1] + "], neutral[" + dist[2] + "].");
    }

    public void printDetailedErrors(List<Integer> errors) {
        System.out.println("Bledow ogolem: " + errors.get(0));
        System.out.println("Powinien byc positive, jest negative: " + errors.get(1));
        System.out.println("Powinien byc positive, jest neutral:" + errors.get(2));
        System.out.println("Powinien byc negative, jest positive:" + errors.get(3));
        System.out.println("Powinien byc negative, jest neutral:" + errors.get(4));
        System.out.println("Powinien byc neutral, jest positive:" + errors.get(5));
        System.out.println("Powinien byc neutral, jest negative:" + errors.get(6));
    }

    public Instances getPrepapredSet(File file) {
        try {
            CSVLoader csvLoader = new CSVLoader();
            csvLoader.setSource(file);
            Instances loadedInstances = csvLoader.getDataSet();
            Instances instances = getEmptyInstances("instances");

            for (Instance currentInstance : loadedInstances) {
                Instance tmpInstance = extractFeature(currentInstance);
                tmpInstance.setDataset(instances);
                instances.add(tmpInstance);
            }

            return instances;
        } catch (IOException e) {
            System.out.println("Blad w przygotowywaniu zbioru");
            System.out.println(e.toString());
        }

        return null;
    }

    public Instances prepareSingle(String tweet) throws Exception {
        System.out.println(tweet);

        File fout = new File("tmp.csv");
        FileOutputStream fos = new FileOutputStream(fout);

        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos));
        bw.write("Tweet,Sentiment");
        bw.newLine();
        bw.write("\"" + tweet + "\", neutral");
        bw.newLine();
        bw.close();

        Instances tmp = getPrepapredSet(fout);
        fout.delete();
        return tmp;
    }

    public Instances getEmptyInstances(final String name) {

        Instances instances = new Instances(name, attributes, 0);
        instances.setClassIndex(instances.numAttributes() - 1);
        return instances;
    }

    public Instance extractFeature(Instance input) {
        Map<Integer, Double> map = new TreeMap<>();
        List<Token> tokens = tagger.runPOSTagger(input.stringValue(0));

        for (Token token : tokens) {
            switch (token.getPOS()) {
            case "A":
            case "V":
            case "R":
            case "#":
                String word = token.getWord().replaceAll("#", "");
                if (featureWords.contains(word)) {
                    map.put(featureWords.indexOf(word), 1.0);
                }
            }
        }
        int indices[] = new int[map.size() + 1];
        double values[] = new double[map.size() + 1];
        int i = 0;
        for (Map.Entry<Integer, Double> entry : map.entrySet()) {
            indices[i] = entry.getKey();
            values[i] = entry.getValue();
            i++;
        }
        indices[i] = featureWords.size();
        values[i] = (double) sentiment.indexOf(input.stringValue(1));
        return new SparseInstance(1.0, values, indices, featureWords.size() + 1);
    }

}