Java tutorial
/* * (c) 2005 David B. Bracewell * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.davidbracewell.wordnet; import com.davidbracewell.Language; import com.davidbracewell.SystemInfo; import com.davidbracewell.cache.Cache; import com.davidbracewell.cache.CacheManager; import com.davidbracewell.cache.CacheSpec; import com.davidbracewell.cache.impl.GuavaLoadingCache; import com.davidbracewell.collection.Counter; import com.davidbracewell.collection.Counters; import com.davidbracewell.collection.Sorting; import com.davidbracewell.config.Config; import com.davidbracewell.conversion.Cast; import com.davidbracewell.io.Resources; import com.davidbracewell.reflection.BeanUtils; import com.davidbracewell.reflection.ReflectionException; import com.davidbracewell.tuple.Pair; import com.davidbracewell.wordnet.io.WordNetDB; import com.davidbracewell.wordnet.io.WordNetLoader; import com.davidbracewell.wordnet.io.WordNetPropertyLoader; import com.davidbracewell.wordnet.io.properties.InformationContentLoader; import com.davidbracewell.wordnet.properties.PropertyName; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.base.Throwables; import com.google.common.collect.*; import javax.annotation.Nullable; import java.util.*; /** * The type Word net. * * @author David B. Bracewell */ public class WordNet { private static volatile WordNet INSTANCE; private final double[] maxDepths = { -1, -1, -1, -1, -1 }; private final WordNetDB db; private final Cache<Synset, ListMultimap<Synset, Synset>> shortestPathCache = CacheManager.getInstance() .createCache(new CacheSpec<Synset, ListMultimap<Synset, Synset>>().engine(GuavaLoadingCache.class) .maxSize(25000).concurrencyLevel(SystemInfo.NUMBER_OF_PROCESSORS).name("WordNetDistanceCache") .expiresAfterAccess("20m").function(new Function<Synset, ListMultimap<Synset, Synset>>() { @Nullable @Override public ListMultimap<Synset, Synset> apply(@Nullable Synset input) { return input == null ? null : dijkstra_path(input); } })); private WordNet() { db = new WordNetDB(); for (WordNetLoader loader : Config.get(WordNet.class, "loaders").asList(WordNetLoader.class)) { loader.load(db); } if (Config.hasProperty(WordNet.class, "properties")) { for (WordNetPropertyLoader loader : Config.get(WordNet.class, "properties") .asList(WordNetPropertyLoader.class)) { loader.load(db); } } } /** * Gets instance. * * @return the instance */ public static WordNet getInstance() { if (INSTANCE == null) { synchronized (WordNet.class) { if (INSTANCE == null) { INSTANCE = new WordNet(); } } } return INSTANCE; } private ListMultimap<Synset, Synset> dijkstra_path(Synset source) { Counter<Synset> dist = Counters.newHashMapCounter(); Map<Synset, Synset> previous = new HashMap<>(); Set<Synset> visited = Sets.newHashSet(source); for (Synset other : getSynsets()) { if (!other.equals(source)) { dist.set(other, Integer.MAX_VALUE); previous.put(other, null); } } MinMaxPriorityQueue<Pair<Synset, Double>> queue = MinMaxPriorityQueue .orderedBy( Cast.<Comparator<? super Pair<Synset, Double>>>as(Sorting.mapEntryComparator(false, true))) .create(); queue.add(Pair.of(source, 0d)); while (!queue.isEmpty()) { Pair<Synset, Double> next = queue.remove(); Synset synset = next.getFirst(); visited.add(synset); Iterable<Synset> neighbors = Iterables.concat(synset.getRelatedSynsets(Relation.HYPERNYM), synset.getRelatedSynsets(Relation.HYPERNYM_INSTANCE), synset.getRelatedSynsets(Relation.HYPONYM), synset.getRelatedSynsets(Relation.HYPONYM_INSTANCE)); for (Synset neighbor : neighbors) { double alt = dist.get(synset); if (alt != Integer.MAX_VALUE && (alt + 1) < dist.get(neighbor)) { dist.set(neighbor, alt + 1); previous.put(neighbor, synset); } if (!visited.contains(neighbor)) { queue.add(Pair.of(neighbor, alt)); } } } ListMultimap<Synset, Synset> path = ArrayListMultimap.create(); for (Synset other : getSynsets()) { if (other.equals(source) || dist.get(other) == Integer.MAX_VALUE) continue; Deque<Synset> stack = Lists.newLinkedList(); Synset u = other; while (u != null && previous.containsKey(u)) { stack.push(u); u = previous.get(u); } while (!stack.isEmpty()) { Synset to = stack.pop(); path.put(other, to); } } return path; } /** * Gets max depth. * * @param partOfSpeech the part of speech * @return the max depth */ public double getMaxDepth(WordNetPOS partOfSpeech) { Preconditions.checkNotNull(partOfSpeech); if (maxDepths[partOfSpeech.ordinal()] == -1) { synchronized (maxDepths) { if (maxDepths[partOfSpeech.ordinal()] == -1) { double max = 0d; for (Synset synset : getSynsets()) { if (synset.getPOS() == partOfSpeech) { max = Math.max(max, depth(synset) - 1); } } maxDepths[partOfSpeech.ordinal()] = max; } } } return maxDepths[partOfSpeech.ordinal()]; } /** * Gets relation. * * @param from the from * @param to the to * @return the relation */ public Relation getRelation(Sense from, Sense to) { if (from == null || to == null) { return null; } return db.senseRelations.get(db.toSenseRelationIndex(from), db.toSenseRelationIndex(to)); } /** * Contains lemma. * * @param lemma the lemma * @return the boolean */ public boolean containsLemma(String lemma) { return !Strings.isNullOrEmpty(lemma) && db.lemmaToSenseMap.containsKey(lemma.toLowerCase()); } /** * Gets lemmas. * * @return the lemmas in the network. */ public Set<String> getLemmas() { return Collections.unmodifiableSet(db.lemmaToSenseMap.keySet()); } /** * Gets senses. * * @return All senses present in the network */ public Collection<Sense> getSenses() { return Collections.unmodifiableCollection(db.lemmaToSenseMap.values()); } /** * Gets synsets. * * @return All synsets present in the network */ public Collection<Synset> getSynsets() { return Collections.unmodifiableCollection(db.idToSynsetMap.values()); } /** * Returns the height of the node in the ontology. * * @param node The node to check * @return The height */ public int depth(Synset node) { Preconditions.checkNotNull(node); //TODO : implement return -1; } /** * Gets the hypernyms of the given WordNetNode. * * @param node The WordNet node * @return The hypernyms */ public Set<Synset> getHypernyms(Synset node) { return getRelatedSynsets(node, Relation.HYPERNYM); } /** * Gets the first hypernym of the given WordNetNode. * * @param node The WordNet node * @return The first hypernym */ public Synset getHypernym(Synset node) { return Iterables.getFirst(getHypernyms(node), null); } /** * Gets the hyponyms of the given WordNetNode. * * @param node The WordNet node * @return The hyponyms */ public Set<Synset> getHyponyms(Synset node) { return getRelatedSynsets(node, Relation.HYPONYM); } /** * Gets the first hyponym of the given WordNetNode. * * @param node The WordNet node * @return The first hyponym */ public Synset getHyponym(Synset node) { return Iterables.getFirst(getHyponyms(node), null); } /** * Gets the semantic relations associated with the given WordNetNode. * * @param node The WordNet node * @param relation The desired relation * @return A set of synset representing the synsets with the given relation to the given node */ public Set<Synset> getRelatedSynsets(Synset node, final Relation relation) { if (node == null) { return Collections.emptySet(); } Set<Synset> synsets = new HashSet<>(); for (Map.Entry<String, Relation> entry : db.synsetRelations.row(node.getId()).entrySet()) { if (entry.getValue() == relation) { synsets.add(db.idToSynsetMap.get(entry.getKey())); } } return synsets; } /** * Gets the semantic relations associated with the given synset. * * @param synset The WordNet synset * @return A set of synset representing the relation with to the given synset */ public HashMultimap<Relation, Synset> getRelatedSynsets(final Synset synset) { if (synset == null) { return HashMultimap.create(); } HashMultimap<Relation, Synset> map = HashMultimap.create(); for (Map.Entry<String, Relation> entry : db.synsetRelations.row(synset.getId()).entrySet()) { map.put(entry.getValue(), getSynset(entry.getKey())); } return map; } /** * Gets the lexical relations associated with the given sense. * * @param sense The WordNet sense * @param relation The desired relation * @return A set of senses representing the sense with the given relation to the given sense */ public Set<Sense> getRelatedSenses(final Sense sense, final Relation relation) { if (sense == null) { return Collections.emptySet(); } Set<Sense> senses = new HashSet<>(); for (Map.Entry<Sense, Relation> entry : db.senseRelations.row(sense).entrySet()) { if (entry.getValue() == relation) { senses.add(entry.getKey()); } } return senses; } /** * Gets the lexical relations associated with the given sense. * * @param sense The WordNet sense * @return A set of senses representing the sense with to the given sense */ public HashMultimap<Relation, Sense> getRelatedSenses(final Sense sense) { if (sense == null) { return HashMultimap.create(); } HashMultimap<Relation, Sense> map = HashMultimap.create(); for (Map.Entry<Sense, Relation> entry : db.senseRelations.row(sense).entrySet()) { map.put(entry.getValue(), entry.getKey()); } return map; } /** * Gets the siblings of the given Synset, i.e. the synsets with which the given synset shares a hypernym. * * @param synset The synset * @return A set of siblings */ public Set<Synset> getSiblings(Synset synset) { if (synset == null) { return Collections.emptySet(); } Set<Synset> siblings = Sets.newHashSet(); for (Synset hypernym : getHypernyms(synset)) { siblings.addAll(getHyponyms(hypernym)); } siblings.remove(synset); return siblings; } /** * Gets the synset associated with the id * * @param id The sense * @return The synset or null */ public Synset getSynset(String id) { return db.idToSynsetMap.get(id); } /** * Gets senses. * * @param surfaceForm the surface form * @param language the language * @return the senses */ public List<Sense> getSenses(String surfaceForm, Language language) { return getSenses(surfaceForm, WordNetPOS.ANY, language); } /** * Gets senses. * * @param surfaceForm the surface form * @param POS the part of speech tag * @param language the language * @return the senses */ public List<Sense> getSenses(String surfaceForm, WordNetPOS POS, Language language) { Preconditions.checkArgument(!Strings.isNullOrEmpty(surfaceForm)); Preconditions.checkNotNull(POS); Preconditions.checkNotNull(language); List<Sense> senses = Lists.newArrayList(); for (Sense sense : db.lemmaToSenseMap.get(surfaceForm.toLowerCase())) { if ((POS == WordNetPOS.ANY || sense.getPOS() == POS) && sense.getLanguage() == language) { senses.add(sense); } } if (senses.isEmpty()) { for (Sense sense : db.lemmaToSenseMap.get(surfaceForm)) { if ((POS == WordNetPOS.ANY || sense.getPOS() == POS) && sense.getLanguage() == language) { senses.add(sense); } } } Collections.sort(senses); return senses; } /** * Gets senses. * * @param surfaceForm the surface form * @param partOfSpeechTag the part of speech tag * @param senseNum the sense num * @return the senses */ public List<Sense> getSenses(String surfaceForm, WordNetPOS partOfSpeechTag, int senseNum) { Preconditions.checkArgument(!Strings.isNullOrEmpty(surfaceForm)); Preconditions.checkNotNull(partOfSpeechTag); List<Sense> senses = Lists.newArrayList(); for (Sense sense : db.lemmaToSenseMap.get(surfaceForm.toLowerCase())) { if ((partOfSpeechTag == WordNetPOS.ANY || sense.getPOS() == partOfSpeechTag) && sense.getSenseNumber() == senseNum) { senses.add(sense); } } if (senses.isEmpty()) { for (Sense sense : db.lemmaToSenseMap.get(surfaceForm)) { if ((partOfSpeechTag == WordNetPOS.ANY || sense.getPOS() == partOfSpeechTag) && sense.getSenseNumber() == senseNum) { senses.add(sense); } } } Collections.sort(senses); return senses; } /** * Gets the sense for the associated information * * @param lemma The lemma * @param POS The part of speech * @param senseNum The sense number * @param language The language * @return The sense */ public Sense getSense(String lemma, WordNetPOS POS, int senseNum, Language language) { Preconditions.checkArgument(!Strings.isNullOrEmpty(lemma)); Preconditions.checkNotNull(POS); Preconditions.checkNotNull(language); for (Sense sense : db.lemmaToSenseMap.get(lemma.toLowerCase())) { if ((POS == WordNetPOS.ANY || sense.getPOS() == POS) && sense.getSenseNumber() == senseNum && sense.getLanguage() == language) { return sense; } } for (Sense sense : db.lemmaToSenseMap.get(lemma)) { if ((POS == WordNetPOS.ANY || sense.getPOS() == POS) && sense.getSenseNumber() == senseNum && sense.getLanguage() == language) { return sense; } } return null; } /** * Gets the node that is least common subsumer (the synset with maximum height that is a parent to both nodes.) * * @param synset1 The first node * @param synset2 The second node * @return The least common subsumer or null */ public Synset getLeastCommonSubsumer(Synset synset1, Synset synset2) { Preconditions.checkNotNull(synset1); Preconditions.checkNotNull(synset2); if (synset1.equals(synset2)) { return synset1; } List<Synset> path = shortestPath(synset1, synset2); if (path.isEmpty()) { return null; } int node1Height = depth(synset1); int node2Height = depth(synset2); int minHeight = Math.min(node1Height, node2Height); int maxHeight = Integer.MIN_VALUE; Synset lcs = null; for (Synset s : path) { if (s.equals(synset1) || s.equals(synset2)) { continue; } int height = depth(s); if (height < minHeight && height > maxHeight) { maxHeight = height; lcs = s; } } if (lcs == null) { if (node1Height < node2Height) { return synset1; } return synset2; } return lcs; } /** * Gets the shortest path between synset. * * @param synset1 The first synset * @param synset2 The second synset * @return The path */ public List<Synset> shortestPath(Synset synset1, Synset synset2) { Preconditions.checkNotNull(synset1); Preconditions.checkNotNull(synset2); return Collections.unmodifiableList(shortestPathCache.get(synset1).get(synset2)); } /** * Calculates the distance between synsets. * * @param synset1 Synset 1 * @param synset2 Synset 2 * @return The distance */ public double distance(Synset synset1, Synset synset2) { Preconditions.checkNotNull(synset1); Preconditions.checkNotNull(synset2); if (synset1.equals(synset2)) { return 0d; } List<Synset> path = shortestPath(synset1, synset2); return path.isEmpty() ? Double.POSITIVE_INFINITY : path.size() - 1; } /** * Gets the root synsets in the network * * @return The set of root synsets */ public Set<Synset> getRoots() { return Collections.unmodifiableSet(db.roots); } /** * The entry point of application. * * @param args the input arguments * @throws Exception the exception */ public static void main(String[] args) throws Exception { Config.initialize("WordNet"); // System.out.println(WordNetLemmatizer.getInstance().getBaseForm("running up")); System.out.println(WordNetLemmatizer.getInstance().getBaseForm("hand-washing")); InformationContentLoader loader = new InformationContentLoader(Resources.fromFile("/data/ic-bnc-add1.dat"), "INFORMATION_CONTENT"); loader.load(WordNet.getInstance().db); Synset cat = WordNet.getInstance().getSense("cat", WordNetPOS.NOUN, 1, Language.ENGLISH).getSynset(); Synset dog = WordNet.getInstance().getSense("dog", WordNetPOS.NOUN, 1, Language.ENGLISH).getSynset(); System.out.println(cat + " : " + cat.getProperty(PropertyName.INFO_CONTENT).get("value")); System.out.println(dog + " : " + dog.getProperty(PropertyName.INFO_CONTENT).get("value")); List<String> beans = Config.get("com.davidbracewell.wordnet.WordNet.properties").asList(String.class); System.out.println(beans.size()); System.out.println(Iterables.transform(beans, new Function<String, Object>() { @Nullable @Override public Object apply(@Nullable String input) { try { return BeanUtils.getNamedBean(input, WordNetPropertyLoader.class); } catch (ReflectionException e) { throw Throwables.propagate(e); } } })); } }//END OF WordNetGraph