com.paolodragone.wsn.domain.PopulateDatabase.java Source code

Java tutorial

Introduction

Here is the source code for com.paolodragone.wsn.domain.PopulateDatabase.java

Source

package com.paolodragone.wsn.domain;/*
                                    * Copyright Paolo Dragone 2014
                                    *
                                    * This file is part of WiktionarySemanticNetwork.
                                    *
                                    * WiktionarySemanticNetwork is free software: you can redistribute it and/or modify
                                    * it under the terms of the GNU General Public License as published by
                                    * the Free Software Foundation, either version 3 of the License, or
                                    * (at your option) any later version.
                                    *
                                    * WiktionarySemanticNetwork is distributed in the hope that it will be useful,
                                    * but WITHOUT ANY WARRANTY; without even the implied warranty of
                                    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                                    * GNU General Public License for more details.
                                    *
                                    * You should have received a copy of the GNU General Public License
                                    * along with WiktionarySemanticNetwork.  If not, see <http://www.gnu.org/licenses/>.
                                    */

import com.google.common.base.Stopwatch;
import com.paolodragone.util.DCollections;
import com.paolodragone.util.DStreamSupport;
import com.paolodragone.wsn.WsnConfiguration;
import com.paolodragone.wsn.dataset.SemanticNetworkDataSet;
import com.paolodragone.wsn.dataset.SensesDataSet;
import com.paolodragone.wsn.dataset.TermsDataSet;
import com.paolodragone.wsn.domain.entities.DomainSense;
import com.paolodragone.wsn.domain.entities.DomainTerm;
import com.paolodragone.wsn.entities.SemanticEdge;
import com.paolodragone.wsn.entities.Sense;
import com.paolodragone.wsn.entities.Term;
import com.paolodragone.wsn.util.Senses;

import javax.persistence.EntityManager;
import javax.persistence.EntityManagerFactory;
import javax.persistence.Persistence;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;

/**
 * @author Paolo Dragone
 */
public class PopulateDatabase {

    public static void main(String[] args) {
        try {
            // Readers
            WsnConfiguration configuration = WsnConfiguration.getInstance();
            Path semanticNetworkFilePath = configuration.getSemanticNetworkFilePath();
            Reader sensesFileReader = Files.newBufferedReader(configuration.getSensesFilePath());
            Reader termsFileReader = Files.newBufferedReader(configuration.getTermsFilePath());
            Reader semanticNetworkFileReader = Files.newBufferedReader(semanticNetworkFilePath);

            // Get sense Stream
            SensesDataSet sensesDataSet = new SensesDataSet();

            SensesDataSet sensesDataSetView;
            sensesDataSetView = sensesDataSet
                    .getView(sensesDataSet.excludeColumns(SensesDataSet.SenseColumn.Gloss));
            Stream<com.paolodragone.wsn.entities.Sense> senseStream = sensesDataSetView
                    .getEntityStream(sensesFileReader).parallel();
            senseStream = Senses.filterValidSenses(senseStream);
            Collection<com.paolodragone.wsn.entities.Sense> senses = DStreamSupport.toList(senseStream);
            Map<Integer, com.paolodragone.wsn.entities.Sense> senseMap = DCollections.collectionToMap(senses,
                    com.paolodragone.wsn.entities.Sense::getId, new HashMap<>());

            // Set terms
            TermsDataSet termsDataSet = new TermsDataSet();
            Stream<com.paolodragone.wsn.entities.Term> termStream = termsDataSet.getEntityStream(termsFileReader);
            TermsDataSet.setTerms(senseMap, termStream);

            {

            }

            senseMap = null;

            SemanticNetworkDataSet semanticNetworkDataSet = new SemanticNetworkDataSet();
            Stream<SemanticEdge> semanticNetworkStream;
            semanticNetworkStream = semanticNetworkDataSet.getEntityStream(semanticNetworkFileReader);
            Collection<SemanticEdge> semanticNetwork = DStreamSupport.toList(semanticNetworkStream);
            Map<Integer, SemanticEdge> semanticNetworkMap = DCollections.collectionToMap(semanticNetwork,
                    SemanticEdge::getTermId, new HashMap<>());

            EntityManagerFactory entityManagerFactory = Persistence.createEntityManagerFactory("wsn_unit");
            EntityManager entityManager = entityManagerFactory.createEntityManager();

            Stopwatch stopwatch = Stopwatch.createStarted();
            long lastElapsed = 0;

            Map<Integer, DomainSense> domainSenseMap = new LinkedHashMap<>();

            entityManager.getTransaction().begin();

            int senseCount = 0;
            for (Sense sense : senses) {
                DomainSense domainSense = new DomainSense();
                domainSense.setId(sense.getId());
                domainSense.setWord(sense.getWord());
                domainSense.setPos(sense.getPos());
                domainSense.setNumber(sense.getNumber());
                entityManager.persist(domainSense);
                domainSenseMap.put(domainSense.getId(), domainSense);
                senseCount++;
                long elapsed = stopwatch.elapsed(TimeUnit.SECONDS);
                if (elapsed > lastElapsed) {
                    lastElapsed = elapsed;
                    System.out.print("\rSenses: " + senseCount);
                    entityManager.flush();
                    entityManager.clear();
                }
            }

            entityManager.getTransaction().commit();
            entityManager.clear();
            entityManager.getTransaction().begin();

            System.out.println("\n");

            int termCount = 0;
            for (Sense sense : senses) {
                DomainSense domainSense = domainSenseMap.get(sense.getId());
                List<Term> terms = sense.getGlossTerms();
                for (Term term : terms) {
                    DomainTerm domainTerm = new DomainTerm();
                    domainTerm.setId(term.getId());
                    domainTerm.setWord(term.getWord());
                    domainTerm.setLemma(term.getLemma());
                    domainTerm.setPos(term.getPos());
                    domainTerm.setPosition(term.getPosition());
                    domainTerm.setSense(domainSense);
                    SemanticEdge semanticEdge = semanticNetworkMap.get(term.getId());
                    if (semanticEdge != null) {
                        DomainSense targetDomainSense = domainSenseMap.get(semanticEdge.getTargetSenseId());
                        double confidence = semanticEdge.getConfidence();
                        domainTerm.setTargetSense(targetDomainSense);
                        domainTerm.setConfidence(confidence);
                    }
                    entityManager.persist(domainTerm);
                    termCount++;
                    long elapsed = stopwatch.elapsed(TimeUnit.SECONDS);
                    if (elapsed > lastElapsed) {
                        lastElapsed = elapsed;
                        System.out.print("\rTerms: " + termCount);
                        entityManager.flush();
                        entityManager.clear();
                    }
                }
            }

            entityManager.getTransaction().commit();

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}