com.paolodragone.wsn.evaluation.FirstSenseBaseline.java Source code

Java tutorial

Introduction

Here is the source code for com.paolodragone.wsn.evaluation.FirstSenseBaseline.java

Source

/*
 * Copyright Paolo Dragone 2014
 *
 * This file is part of WiktionarySemanticNetwork.
 *
 * WiktionarySemanticNetwork is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * WiktionarySemanticNetwork is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with WiktionarySemanticNetwork.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.paolodragone.wsn.evaluation;

import com.google.common.collect.ListMultimap;
import com.paolodragone.util.DCollections;
import com.paolodragone.util.DStreamSupport;
import com.paolodragone.wsn.WsnConfiguration;
import com.paolodragone.wsn.dataset.SemanticNetworkDataSet;
import com.paolodragone.wsn.dataset.SensesDataSet;
import com.paolodragone.wsn.dataset.TermsDataSet;
import com.paolodragone.wsn.entities.SemanticEdge;
import com.paolodragone.wsn.entities.Sense;
import com.paolodragone.wsn.entities.Term;
import com.paolodragone.wsn.util.Senses;
import com.paolodragone.wsn.util.Terms;

import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Stream;

/**
 * Total test set: 211
 * Not predicted: 14
 * Accuracy: 0.7563451776649747
 * Average confidence: 0.5969718759987217
 *
 * @author Paolo Dragone
 */
public class FirstSenseBaseline {

    public static void main(String[] args) {
        try {
            System.out.println("Loading...");

            // Readers
            WsnConfiguration configuration = WsnConfiguration.getInstance();
            Reader sensesFileReader = Files.newBufferedReader(configuration.getSensesFilePath());
            Reader termsFileReader = Files.newBufferedReader(configuration.getTermsFilePath());

            // Get sense Stream
            SensesDataSet sensesDataSet = new SensesDataSet();
            SensesDataSet sensesDataSetView;
            sensesDataSetView = sensesDataSet
                    .getView(sensesDataSet.excludeColumns(SensesDataSet.SenseColumn.Gloss));
            Stream<Sense> senseStream = sensesDataSetView.getEntityStream(sensesFileReader).parallel();
            senseStream = Senses.filterValidSenses(senseStream);
            Collection<Sense> senses = DStreamSupport.toList(senseStream);
            Map<Integer, Sense> senseMap = DCollections.collectionToMap(senses, Sense::getId, new HashMap<>());

            TermsDataSet termsDataSet = new TermsDataSet();
            TermsDataSet termsDataSetView;
            termsDataSetView = termsDataSet.getView(termsDataSet.excludeColumns(TermsDataSet.TermColumn.Word));
            Stream<Term> termStream = Terms.filterValidTerms(termsDataSetView.getEntityStream(termsFileReader));
            TermsDataSet.setTerms(senseMap, termStream);

            ListMultimap<String, Sense> wordSensesMap = Senses.buildWordSensesMap(senses);

            List<SemanticEdge> semanticNetwork = new ArrayList<>();
            for (Sense sense : senses) {
                for (Term term : sense.getGlossTerms()) {
                    List<Sense> targetSenses = wordSensesMap.get(term.getLemma());
                    if (!targetSenses.isEmpty()) {
                        SemanticEdge semanticEdge = new SemanticEdge();
                        semanticEdge.setTermId(term.getId());
                        semanticEdge.setTargetSenseId(targetSenses.get(0).getId());
                        semanticEdge.setConfidence(1.0 / targetSenses.size());
                        semanticNetwork.add(semanticEdge);
                    }
                }
            }

            Path semanticNetworkTestSetFilePath = configuration.getSemanticNetworkTestSetFilePath();
            Reader semanticNetworkTestSetReader = Files.newBufferedReader(semanticNetworkTestSetFilePath);
            SemanticNetworkDataSet semanticNetworkDataSet = new SemanticNetworkDataSet();
            Stream<SemanticEdge> semanticNetworkTestSetStream;
            semanticNetworkTestSetStream = semanticNetworkDataSet.getEntityStream(semanticNetworkTestSetReader);
            Collection<SemanticEdge> semanticNetworkTestSet = DStreamSupport.toList(semanticNetworkTestSetStream);

            System.out.println("Done.");

            TestSetEvaluator evaluator = new TestSetEvaluator();
            TestSetEvaluator.EvaluationResult result = evaluator.evaluateTestSet(semanticNetwork,
                    semanticNetworkTestSet);

            System.out.println("Total test set: " + result.getSize());
            System.out.println("Not predicted: " + result.getNotPredicted());
            System.out.println("Accuracy: " + result.getAccuracy());
            System.out.println("Average confidence: " + result.getAverageConfidence());

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}