Java tutorial
package com.paolodragone.wsn.domain;/* * Copyright Paolo Dragone 2014 * * This file is part of WiktionarySemanticNetwork. * * WiktionarySemanticNetwork is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * WiktionarySemanticNetwork is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with WiktionarySemanticNetwork. If not, see <http://www.gnu.org/licenses/>. */ import com.google.common.base.Stopwatch; import com.paolodragone.util.DCollections; import com.paolodragone.util.DStreamSupport; import com.paolodragone.wsn.WsnConfiguration; import com.paolodragone.wsn.dataset.SemanticNetworkDataSet; import com.paolodragone.wsn.dataset.SensesDataSet; import com.paolodragone.wsn.dataset.TermsDataSet; import com.paolodragone.wsn.domain.entities.DomainSense; import com.paolodragone.wsn.domain.entities.DomainTerm; import com.paolodragone.wsn.entities.SemanticEdge; import com.paolodragone.wsn.entities.Sense; import com.paolodragone.wsn.entities.Term; import com.paolodragone.wsn.util.Senses; import javax.persistence.EntityManager; import javax.persistence.EntityManagerFactory; import javax.persistence.Persistence; import java.io.Reader; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; /** * @author Paolo Dragone */ public class PopulateDatabase { public static void main(String[] args) { try { // Readers WsnConfiguration configuration = WsnConfiguration.getInstance(); Path semanticNetworkFilePath = configuration.getSemanticNetworkFilePath(); Reader sensesFileReader = Files.newBufferedReader(configuration.getSensesFilePath()); Reader termsFileReader = Files.newBufferedReader(configuration.getTermsFilePath()); Reader semanticNetworkFileReader = Files.newBufferedReader(semanticNetworkFilePath); // Get sense Stream SensesDataSet sensesDataSet = new SensesDataSet(); SensesDataSet sensesDataSetView; sensesDataSetView = sensesDataSet .getView(sensesDataSet.excludeColumns(SensesDataSet.SenseColumn.Gloss)); Stream<com.paolodragone.wsn.entities.Sense> senseStream = sensesDataSetView .getEntityStream(sensesFileReader).parallel(); senseStream = Senses.filterValidSenses(senseStream); Collection<com.paolodragone.wsn.entities.Sense> senses = DStreamSupport.toList(senseStream); Map<Integer, com.paolodragone.wsn.entities.Sense> senseMap = DCollections.collectionToMap(senses, com.paolodragone.wsn.entities.Sense::getId, new HashMap<>()); // Set terms TermsDataSet termsDataSet = new TermsDataSet(); Stream<com.paolodragone.wsn.entities.Term> termStream = termsDataSet.getEntityStream(termsFileReader); TermsDataSet.setTerms(senseMap, termStream); { } senseMap = null; SemanticNetworkDataSet semanticNetworkDataSet = new SemanticNetworkDataSet(); Stream<SemanticEdge> semanticNetworkStream; semanticNetworkStream = semanticNetworkDataSet.getEntityStream(semanticNetworkFileReader); Collection<SemanticEdge> semanticNetwork = DStreamSupport.toList(semanticNetworkStream); Map<Integer, SemanticEdge> semanticNetworkMap = DCollections.collectionToMap(semanticNetwork, SemanticEdge::getTermId, new HashMap<>()); EntityManagerFactory entityManagerFactory = Persistence.createEntityManagerFactory("wsn_unit"); EntityManager entityManager = entityManagerFactory.createEntityManager(); Stopwatch stopwatch = Stopwatch.createStarted(); long lastElapsed = 0; Map<Integer, DomainSense> domainSenseMap = new LinkedHashMap<>(); entityManager.getTransaction().begin(); int senseCount = 0; for (Sense sense : senses) { DomainSense domainSense = new DomainSense(); domainSense.setId(sense.getId()); domainSense.setWord(sense.getWord()); domainSense.setPos(sense.getPos()); domainSense.setNumber(sense.getNumber()); entityManager.persist(domainSense); domainSenseMap.put(domainSense.getId(), domainSense); senseCount++; long elapsed = stopwatch.elapsed(TimeUnit.SECONDS); if (elapsed > lastElapsed) { lastElapsed = elapsed; System.out.print("\rSenses: " + senseCount); entityManager.flush(); entityManager.clear(); } } entityManager.getTransaction().commit(); entityManager.clear(); entityManager.getTransaction().begin(); System.out.println("\n"); int termCount = 0; for (Sense sense : senses) { DomainSense domainSense = domainSenseMap.get(sense.getId()); List<Term> terms = sense.getGlossTerms(); for (Term term : terms) { DomainTerm domainTerm = new DomainTerm(); domainTerm.setId(term.getId()); domainTerm.setWord(term.getWord()); domainTerm.setLemma(term.getLemma()); domainTerm.setPos(term.getPos()); domainTerm.setPosition(term.getPosition()); domainTerm.setSense(domainSense); SemanticEdge semanticEdge = semanticNetworkMap.get(term.getId()); if (semanticEdge != null) { DomainSense targetDomainSense = domainSenseMap.get(semanticEdge.getTargetSenseId()); double confidence = semanticEdge.getConfidence(); domainTerm.setTargetSense(targetDomainSense); domainTerm.setConfidence(confidence); } entityManager.persist(domainTerm); termCount++; long elapsed = stopwatch.elapsed(TimeUnit.SECONDS); if (elapsed > lastElapsed) { lastElapsed = elapsed; System.out.print("\rTerms: " + termCount); entityManager.flush(); entityManager.clear(); } } } entityManager.getTransaction().commit(); } catch (Exception e) { e.printStackTrace(); } } }