fr.inrialpes.exmo.ontosim.string.JWNLDistances.java Source code

Java tutorial

Introduction

Here is the source code for fr.inrialpes.exmo.ontosim.string.JWNLDistances.java

Source

/*
 * $Id: JWNLDistances.java 111 2011-05-27 09:18:55Z jdavid $
 *
 * Copyright (C) University of Montral, 2004-2005
 * Copyright (C) INRIA, 2004-2005, 2007-2011
 * This program was originaly part of the Alignment API implementation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */

package fr.inrialpes.exmo.ontosim.string;

import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.util.Collection;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import java.util.Vector;
import java.util.WeakHashMap;

import net.didion.jwnl.JWNL;
import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.IndexWord;
import net.didion.jwnl.data.IndexWordSet;
import net.didion.jwnl.data.POS;
import net.didion.jwnl.data.PointerTarget;
import net.didion.jwnl.data.PointerType;
import net.didion.jwnl.data.PointerUtils;
import net.didion.jwnl.data.Synset;
import net.didion.jwnl.data.list.PointerTargetNode;
import net.didion.jwnl.data.list.PointerTargetNodeList;
import net.didion.jwnl.dictionary.Dictionary;

import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;

import fr.inrialpes.exmo.ontosim.OntoSimException;

/**
 * Compute a string distance using the JWNL API (WordNet API)
 * and sometime Lucene
 * @author Jerome Pierson, David Loup, Petko Valtchev, Jerome Euzenat
 * @version $Id: JWNLDistances.java 111 2011-05-27 09:18:55Z jdavid $
 *
 */

public class JWNLDistances {

    public static final double NOUN_WEIGHT = 0.60;
    public static final double ADJ_WEIGHT = 0.25;
    public static final double VERB_WEIGHT = 0.15;
    private static final double MINIMUM_DISTANCE = 0.05;

    private static Dictionary dictionary = null;

    // Uses standard English stopWords because this is wordnet (it is possible to change this)
    @SuppressWarnings("unchecked") // Lucene no 1.5 ?
    private static Set<String> stopWords;

    // Results tables
    double[][] nounsResults;
    double[][] verbsResults;
    double[][] adjectivesResults;

    // Weights tables (masks)
    double[][] nounsMasks;
    double[][] verbsMasks;
    double[][] adjectivesMasks;

    // tokens depending on their nature
    // PG: These are now global variables.
    private Hashtable nouns1 = new Hashtable();
    private Hashtable adjectives1 = new Hashtable();
    private Hashtable verbs1 = new Hashtable();
    private Hashtable nouns2 = new Hashtable();
    private Hashtable adjectives2 = new Hashtable();
    private Hashtable verbs2 = new Hashtable();

    @SuppressWarnings("unchecked") // ENGLISH_STOP_WORDS_SET declared as Set
    public JWNLDistances() throws OntoSimException {
        if (stopWords == null) {
            stopWords = (Set<String>) StopAnalyzer.ENGLISH_STOP_WORDS_SET;
        }
    }

    /**
     * Initialize the JWNL API. Must be done one time before computing distance
     * Need to configure the file_properties.xml located in the current
     * directory
     */
    public void Initialize() throws OntoSimException {
        Initialize((String) null, (String) null);
    }

    public void Initialize(String wordnetdir, String wordnetversion) throws OntoSimException {
        if (!JWNL.isInitialized()) {
            InputStream pptySource = null;
            if (wordnetdir == null) {
                try {
                    pptySource = new FileInputStream("./file_properties.xml");
                } catch (FileNotFoundException e) {
                    throw new OntoSimException("Cannot find WordNet dictionary: use -Dwndict or file_property.xml");
                }
            } else {
                String properties = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
                properties += "<jwnl_properties language=\"en\">";
                properties += "  <resource class=\"PrincetonResource\"/>";
                properties += "  <version publisher=\"Princeton\" number=\"" + wordnetversion
                        + "\" language=\"en\"/>";
                properties += "  <dictionary class=\"net.didion.jwnl.dictionary.FileBackedDictionary\">";
                properties += "     <param name=\"dictionary_element_factory\" value=\"net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory\"/>";
                properties += "     <param name=\"file_manager\" value=\"net.didion.jwnl.dictionary.file_manager.FileManagerImpl\">";
                properties += "       <param name=\"file_type\" value=\"net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile\"/>";
                properties += "       <param name=\"dictionary_path\" value=\"" + wordnetdir + "\"/>";
                properties += "     </param>";
                properties += "  </dictionary>";
                properties += "</jwnl_properties>";
                // Sorry but this initialize wants to read a stream
                pptySource = new ByteArrayInputStream(properties.getBytes());
            }

            // Initialize
            try {
                JWNL.initialize(pptySource);
            } catch (JWNLException e) {
                throw new OntoSimException("Cannot initialize JWNL (WordNet)", e);
            }
            dictionary = Dictionary.getInstance();
        }
    }

    /**
     * Provides the oportunity to cache pretreatments in measures which require them
     * Using this requires to set up initPreCache() before using the cache and please
     * cleanPreCache() after. This can only improve the performances.
     */

    protected WeakHashMap cache;

    public void initPreCache() {
        cache = new WeakHashMap<String, Object>();
    }

    public void cleanPreCache() {
        cache = null;
    }

    /**
     * Reads a file containing one stopword per line
     * Returns these stop words as a set of strings
     * Set the defaults stopWords with this list
     */
    public Set<String> loadStopWordsFromFile(String filename) throws IOException, FileNotFoundException {
        stopWords = new HashSet<String>();
        StreamTokenizer st = new StreamTokenizer(new FileReader(filename));
        st.eolIsSignificant(true);
        st.ordinaryChar(' ');
        st.ordinaryChar('-');
        boolean eof = false;
        String str = "";
        while (!eof) {
            int tk = st.nextToken();
            switch (tk) {
            case StreamTokenizer.TT_EOF:
                eof = true;
                break;
            case StreamTokenizer.TT_EOL:
                stopWords.add(str);
                str = "";
                break;
            //case StreamTokenizer.TT_NUMBER : str += st.nval.toString(); break;
            case StreamTokenizer.TT_WORD:
                str += st.sval;
                break;
            }
        }
        return stopWords;
    }

    /**
     * Compute a basic distance between 2 strings using WordNet synonym.
     * @param s1
     * @param s2
     * @return Distance between s1 & s2 (return 1 if s2 is a synonym of s1, else
     *         return a BasicStringDistance between s1 & s2)
     */
    public double basicSynonymDistance(String s1, String s2) {
        double Dist = 0.0;
        double Dists1s2;
        int j, k = 0;
        int synonymNb = 0;
        int besti = 0;
        int bestj = 0;
        double DistTab[];
        IndexWord index = null;
        Synset Syno[] = null;

        s1 = s1.toLowerCase();
        s2 = s2.toLowerCase();

        Dists1s2 = StringDistances.subStringDistance(s1, s2);

        try {
            // Lookup for first string
            index = dictionary.lookupIndexWord(POS.NOUN, s1);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
        // if found in the dictionary
        if (index != null) {
            try {
                // get the groups of synonyms for each sense
                Syno = index.getSenses();
            } catch (JWNLException e) {
                e.printStackTrace();
            }
            // number of senses for the word s1
            synonymNb = index.getSenseCount();
            DistTab = new double[synonymNb];
            // for each sense
            for (k = 0; k < synonymNb; k++) {
                // for each synonym of this sense
                for (j = 0; j < Syno[k].getWordsSize(); j++) {
                    Dist = StringDistances.subStringDistance(Syno[k].getWord(j).getLemma(), s2);
                    if (Dist < Dists1s2) {
                        Dists1s2 = Dist;
                        besti = k;
                        bestj = j;
                    }
                }
            }
        }

        return Dists1s2;
    }

    /**
     * Retrieve all WordNet senses of a term
     * @param term
     * @return the set of senses of term
     */

    @SuppressWarnings("unchecked") // WordNet non-1.5
    Set<Synset> getAllSenses(String term) throws OntoSimException {
        Set<Synset> res = new HashSet<Synset>();
        IndexWordSet iws = null;
        try {
            iws = dictionary.lookupAllIndexWords(term);
        } catch (JWNLException ex) {
            throw new OntoSimException("Wordnet exception", ex);
        }
        if (iws != null) {
            // not iterable...
            for (IndexWord idx : (Collection<IndexWord>) iws.getIndexWordCollection()) {
                Synset Syno[] = null;
                try {
                    // get the synsets for each sense
                    Syno = idx.getSenses();
                } catch (JWNLException jwnlex) {
                    throw new OntoSimException("Wordnet exception", jwnlex);
                    //jwnlex.printStackTrace();
                }
                // number of senses for the word s1
                int synonymNb = idx.getSenseCount();
                // for each sense
                for (int k = 0; k < synonymNb; k++) {
                    res.add(Syno[k]);
                }
            }
        }
        return res;
    }

    /**
     * Cache method for synsets
     */
    @SuppressWarnings("unchecked") // Only one cache... my bad
    protected Set<Synset> computeSynsets(String s) throws OntoSimException {
        String term = s.toLowerCase();
        if (cache != null && cache.containsKey(term)) {
            return (Set<Synset>) cache.get(term);
        } else {
            Set<Synset> sense = getAllSenses(term);
            if (cache != null)
                cache.put(term, sense);
            return sense;
        }
    }

    /**
     * Compute the proportion of common synset between two words
     * @param s1 a String
     * @param s2 a String
     * @return the proportion of common synonyms shared by both terms
     */
    public double cosynonymySimilarity(String s1, String s2) throws OntoSimException {
        Set<Synset> sense1 = computeSynsets(s1);
        Set<Synset> sense2 = computeSynsets(s2);
        // if found in the dictionary
        if (sense1 != null && sense2 != null) {
            //System.err.print( "Success : "+s1+" / "+s2 );
            int union = sense1.size();
            int inter = 0;
            // For all senses of s2
            for (Synset s : sense2) {
                if (sense1.contains(s)) {
                    inter++;
                } else {
                    union++;
                }
            }
            if (union == 0)
                return 0.;
            //System.err.println( "= "+inter+" / "+union );
            return ((double) inter) / ((double) union);
        } else {
            //System.err.println( "Failure : "+s1+" / "+s2 );
            return 1. - StringDistances.equalDistance(s1.toLowerCase(), s2.toLowerCase());
        }
    }

    /**
     * Evaluate if two terms can be synonym
     * @param s1 a String
     * @param s2 a String
     * @return 1 if strings are equal or s2 is a synonym of s1
     *
     * Note this is asymmetric: it is assumed that WordNet is symmetric
     */

    public double basicSynonymySimilarity(String s1, String s2) throws OntoSimException {
        s1 = s1.toLowerCase();
        s2 = s2.toLowerCase();

        if (s1.equals(s2))
            return 1.;
        else {
            Set<Synset> sense1 = computeSynsets(s1);
            if (sense1 != null) {
                for (Synset s : sense1) {
                    if (s.containsWord(s2))
                        return 1.;
                }
            }
            return 0.;
        }
    }

    /**
     * Compute the overlap between all glosses of two strings
     * @param s1 a String
     * @param s2 a String
     * @return a measure of overlap of their glosses in WordNet based on the following treatments:
     * - take gloss for all senses and add the term name;
     * - suppress quotations ('...');
     * - suppress empty words (or, and, the, a, an, for, of, etc.);
     * [- suppress technical vocabulary, e.g., 'term';]
     * [- suppress empty phrases, e.g., 'usually including';]
     * - keep categories, e.g., law;
     * - stem words.
     * The results are sets (not bags, so there is no repetition) of words and compared with:
     * | g(t1) \cap g(t2) | / | g(t1) \cup g(t2) |
     *
     */
    public double basicGlossOverlap(String s1, String s2) throws OntoSimException {
        Collection<String> st1 = computeGlossValue(s1);
        Collection<String> st2 = computeGlossValue(s2);
        // Compute measure
        if (st1 == null || st2 == null) {
            // JE: no maybe a simple string distance anyway
            // but why this one?
            return 1. - StringDistances.subStringDistance(s1, s2);
        }
        int common = 0;
        for (String s : st1) {
            if (st2.contains(s))
                common++;
        }
        return (double) common / (double) (st1.size() + st2.size() - common);
    }

    /**
     * Cache method for glosses
     */
    @SuppressWarnings("unchecked") // Only one cache... my bad
    protected Collection<String> computeGlossValue(String s) throws OntoSimException {
        // Strange to uppercase...
        String term = s.toLowerCase();
        if (cache != null && cache.containsKey(term)) {
            return (Collection<String>) cache.get(term);
        } else {
            Collection<String> st = null;
            // Collect gloss
            String gloss = term;
            // if found in the dictionary
            gloss += " " + getGlossForLabel(term);
            try {
                // Clean-up gloss
                // Tokenize gloss
                st = tokenizeGloss(gloss);
            } catch (IOException ioex) {
                ioex.printStackTrace(); // should never occur
            }
            // This uses our home-made tokenizer (rather use Lucene maybe)
            //OLD: st = StringDistances.tokenize( gloss );
            if (cache != null)
                cache.put(term, st);
            return st;
        }
    }

    /**
     * Takes a gloss-like string (text) and returns it tokenized.
     * with:
     * - stopwords
     * - lower case
     * - porter stemmer
     */
    protected Set<String> tokenizeGloss(String s) throws IOException {
        Set<String> result = new HashSet<String>();
        // I am affraid that I am reimplementing the StandardAnalizer...
        TokenStream ts = new PorterStemFilter(
                new StopFilter(true, new LowerCaseTokenizer(new StringReader(s)), stopWords, true));
        TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
        while (ts.incrementToken()) {
            result.add(termAtt.term());
        }
        return result;
    }

    protected String getGlossForLabel(String s) {
        String norm = splitStringForWordNet(s);
        String text = norm + "."; // add the label as gloss
        String result = getGlossForLabel1(norm);
        // Could also be done systematically
        if ("".equals(result)) {
            try {
                for (String subterm : tokenizeGloss(norm)) {
                    text += getGlossForLabel1(subterm);
                }
            } catch (IOException ioex) {
            }
        } else {
            text += result;
        }
        return text;
    }

    /**
     * Fetches all the glosses from wordnet for the given term and concatenate them (without quotations).
     */
    protected String getGlossForLabel1(String s) {
        String text = "";
        for (Synset ss : getAllSenses(s)) {
            text += " " + ss.getGloss().replaceAll("\\\"[^\"]*\\\"", "") + ".";
        }
        return text;
    }

    /**
     * Retains only strings made of lowercase/uppercase characters
     * Suppress numbers
     * Split strings when they contain LowercaseUppercase "/" ":" "_" "\" "+" "." "*" "&"
     * But not "-" or "@" taken into account by Lucene
     */
    protected String splitStringForWordNet(String s) {
        String result = "";
        int last = 0;
        int pos = 0;
        int len = s.length();
        while (pos < len) { // case of 0
            char c = s.charAt(pos);
            if (c == '/' || c == ':' || c == '_' || c == '\\' || c == '+' || c == '&' || c == '.' || c == '-') {
                if (pos > 0)
                    result += s.substring(last, pos - 1) + " ";
                last = pos;
            } else if (c >= '0' && c <= '9') {
                if (pos > 0)
                    result += s.substring(last, pos);
                last = pos + 1;
            } else if (pos != 0 && c >= 'A' && c <= 'Z' && s.charAt(pos - 1) >= 'a' && s.charAt(pos - 1) <= 'z') {
                result += s.substring(last, pos) + " ";
                last = pos;
            }
            pos++;
        }
        result += s.substring(last, pos);
        //System.err.println( "* "+result+" *" );
        return result;
    }

    /**
     * Compute the Wu-Palmer similarity defined by
     * score = 2*depth(lcs(s1,s2)) / (depth(s1) + depth(s2))
     * @param s1
     * @param s2
     * @return the Wu-Palmer similarity
     * The algorithm returns the best Wu-Palmer similarity among the pairs
     * of synsets corresponding to s1 and s2
     *
     * Assumption: JE**1: root is when no hypernyms exists...
     *
     * Sketch:
     * 1) full depth-first search from s1 with record shortest path distance from s1 and depth
     * 2) depth-first search from s2 until reached lcs with record the best Wu-Palmer
     *
     * NOTE: The first phase (on s1) is a preprocessing step.
     * In the case when the user want to compute a whole Wu-Palmer matrix,
     * this step is made |s2| times: it may be worth caching this step
     */
    public double wuPalmerSimilarity(String s1, String s2) throws OntoSimException {
        // For each encountered node, record:
        // [0] how far is it from s1
        // [1] how far is it from s2
        // [2] how far is it from a root (depth)
        Hashtable<Synset, int[]> depth = new Hashtable<Synset, int[]>();

        // Strange to uppercase...
        s1 = s1.toLowerCase();
        s2 = s2.toLowerCase();
        if (s1.equals(s2))
            return 1.;

        Set<Synset> sense1 = computeSynsets(s1);
        Set<Synset> sense2 = computeSynsets(s2);
        if (sense1 == null || sense2 == null)
            return 0.;

        // Traverse the graph from s1 and collect distance
        Stack<Synset> queue = new Stack<Synset>();
        for (Synset s : sense1) { // Stack s ith 0
            int[] v = new int[3];
            v[0] = 0;
            v[1] = -1;
            v[2] = -1;//{ 0, -1, -1 };
            depth.put(s, v);
            queue.push(s);
        }
        // Traversal from s1 (marking the distance from start)
        // (introducing distance from top)
        Stack<Synset> passed = new Stack<Synset>();
        while (!queue.empty()) { // Stack non empty
            //System.err.println("QUEUE: "+queue);
            //System.err.println("PASSED: "+passed);
            Synset curnode = queue.pop(); // Unstack
            int[] curval = depth.get(curnode);
            int curdepth = curval[0]; // Retrieve depth
            //System.err.println(">> ["+curdepth+"] "+curnode);
            try {
                PointerTarget[] hyps = curnode.getTargets(PointerType.HYPERNYM);
                if (hyps.length == 0) { // JE**1: Hitting a root
                    //System.err.println("  == ROOT");
                    int level = 0;
                    curval[2] = level;
                    // Mark second queue
                    boolean firstmark = false;
                    for (int i = passed.size() - 1; i >= 0; i--) {
                        Synset current = passed.get(i);
                        if (!firstmark)
                            passed.pop(); // unstack until first mark
                        if (current != null) {
                            level++;
                            //System.err.println("  <== ("+level+") "+current);
                            int[] val = depth.get(current); // record depth
                            if (val[2] == -1 || val[2] > level)
                                val[2] = level;
                        } else {
                            firstmark = true;
                        } // end of poping after first mark
                    }
                } else {
                    passed.push(curnode); // stack me
                    for (PointerTarget s : hyps) {
                        if (s instanceof Synset) {
                            Synset current = (Synset) s;
                            int[] val = depth.get(current);
                            //System.err.println("  -> "+current);
                            if (val == null) { // not encounted yet
                                int[] v = new int[3];
                                v[0] = curdepth + 1;
                                v[1] = -1;
                                v[2] = -1;
                                //int[] v = { curdepth+1, -1, -1 };
                                depth.put(current, v);
                                queue.push(current);
                                passed.push((Synset) null);
                                //System.err.println("  - pushed(1) "+v[0]);
                            } else if (val[0] > curdepth + 1) { // updating shortpath
                                val[0] = curdepth + 1;
                                queue.push(current);
                                passed.push((Synset) null);
                                //System.err.println("  - pushed(2) "+val[0]);
                            } else { // We must unstack here
                                //System.err.println("  == MEET");
                                int level = val[0];
                                // Mark second queue
                                for (int i = passed.size() - 1; i >= 0; i--) {
                                    Synset n = passed.get(i);
                                    if (n != null) {
                                        level++;
                                        //System.err.println("  <== ("+level+") "+n);
                                        int[] v = depth.get(n); // record depth
                                        if (v[2] == -1 || v[2] > level)
                                            v[2] = level;
                                    }
                                }
                            }
                        }
                    }
                    // Either unstack the last mark or s if nothing has been put in queue
                    passed.pop();
                }
            } catch (JWNLException ex) {
            }
        }

        // Traverse the graph from s2 and collect distance
        double bestvalue = 0.;
        for (Synset s : sense2) { // Stack s ith 0
            queue.push(s);
            int[] val = depth.get(s);
            if (val == null) {
                int[] v = new int[3];
                v[0] = -1;
                v[1] = 0;
                v[2] = -1;
                depth.put(s, v);
            } else {
                val[1] = 0;
                //System.err.println(val[0]+"/"+val[1]+"/"+val[2]);
                //System.err.println( s );
                double newvalue = (double) (2 * val[2]) / (double) (val[0] + 2 * val[2]);
                if (newvalue > bestvalue) {
                    bestvalue = newvalue;
                }
            }
        }
        while (!queue.empty()) { // Stack non empty
            Synset s = queue.pop(); // Unstack
            int i = (depth.get(s))[1]; // Retrieve depth
            try {
                for (PointerTarget h : s.getTargets(PointerType.HYPERNYM)) {
                    if (h instanceof Synset) {
                        Synset current = (Synset) h;
                        int[] level = depth.get(current);
                        if (level == null) { // not encounted yet
                            //if ( bestvalue == -1 || i < bestvalue ) { // modest branch and bound
                            int[] v = new int[3];
                            v[0] = -1;
                            v[1] = i + 1;
                            v[2] = -1;
                            //int[] v = { -1, i+1, -1 };
                            depth.put(current, v);
                            queue.push(current);
                            //}
                        } else if (level[0] != -1) { // This is a least common subsumer
                            level[1] = i + 1;
                            //System.err.println(level[0]+"/"+level[1]+"/"+level[2]);
                            //System.err.println( current );
                            double newvalue = (double) (2 * level[2]) / (double) (level[0] + i + 1 + 2 * level[2]);
                            if (newvalue > bestvalue) {
                                bestvalue = newvalue;
                            }
                        } else if (level[1] > i + 1) {
                            level[1] = i + 1;
                            queue.push(current);
                        }
                    }
                }
            } catch (JWNLException ex) {
            }
        }

        return bestvalue;
    }

    /**
     * This is an elaborate similarity based on WordNet
     * It is assumed to assess the similarity based on a decomposition and parsing of the strings.
     * 
     */
    public double computeSimilarity(String s1, String s2) {
        double sim = 0.0;
        double dists1s2;
        IndexWord index = null;

        dists1s2 = StringDistances.subStringDistance(s1, s2);
        if (dists1s2 < MINIMUM_DISTANCE)
            return (1 - dists1s2);

        if (s1.equals(s2) || s1.toLowerCase().equals(s2.toLowerCase())) {
            return 1;
        } else {
            if (s1.equals(s1.toUpperCase()) || s1.equals(s1.toLowerCase())) {
                try {
                    // Lookup for first string
                    index = dictionary.lookupIndexWord(POS.NOUN, s1);
                    if (index == null) {
                        index = dictionary.lookupIndexWord(POS.ADJECTIVE, s1);
                    }
                    if (index == null) {
                        index = dictionary.lookupIndexWord(POS.VERB, s1);
                    }
                } catch (Exception ex) {
                    ex.printStackTrace();
                    System.exit(-1);
                }
                // if not found in the dictionary
                if (index == null)
                    return (1 - dists1s2);
                else
                    sim = compareComponentNames(s1, s2);
            } else
                sim = compareComponentNames(s1, s2);
        }
        // return sim;
        return Math.max(sim, 1 - dists1s2);
    }

    public double compareComponentNames(String s1, String s2) {
        Vector s1Tokens;
        Vector s2Tokens;
        IndexWord indexNoun1, indexNoun2;
        IndexWord indexAdj1, indexAdj2;
        IndexWord indexVerb1, indexVerb2;
        Iterator pIt, gIt;
        Vector vg, vp;
        String token1, token2;
        double simAsAdj, simAsNoun, simAsVerb;
        double maxSim;
        double[][] simMatrix;
        int i, j;

        s1Tokens = StringDistances.tokenize(s1);
        s2Tokens = StringDistances.tokenize(s2);

        // tokens storage

        vg = (s1Tokens.size() >= s2Tokens.size()) ? s1Tokens : s2Tokens;
        vp = (s1Tokens.size() >= s2Tokens.size()) ? s2Tokens : s1Tokens;

        // TODO: Don't forget to switch comments.
        // Initializes the tokens hashtables.
        /*this.nouns1        = new Hashtable();
        this.adjectives1   = new Hashtable();
        this.verbs1        = new Hashtable();
        this.nouns2        = new Hashtable();
        this.adjectives2   = new Hashtable();
        this.verbs2        = new Hashtable();
        */

        simMatrix = new double[vg.size()][vp.size()];

        i = 0;
        gIt = vg.iterator();
        try {
            while (gIt.hasNext()) {
                token1 = (String) gIt.next();

                indexNoun1 = dictionary.lookupIndexWord(POS.NOUN, token1);
                indexAdj1 = dictionary.lookupIndexWord(POS.ADJECTIVE, token1);
                indexVerb1 = dictionary.lookupIndexWord(POS.VERB, token1);

                j = 0;
                pIt = vp.iterator();
                while (pIt.hasNext()) {
                    token2 = (String) pIt.next();

                    indexNoun2 = dictionary.lookupIndexWord(POS.NOUN, token2);
                    indexAdj2 = dictionary.lookupIndexWord(POS.ADJECTIVE, token2);
                    indexVerb2 = dictionary.lookupIndexWord(POS.VERB, token2);

                    simAsAdj = this.computeTokenSimilarity(indexAdj1, indexAdj2);
                    maxSim = simAsAdj;
                    simAsNoun = this.computeTokenSimilarity(indexNoun1, indexNoun2);
                    maxSim = (simAsNoun > maxSim) ? simAsNoun : maxSim;
                    simAsVerb = this.computeTokenSimilarity(indexVerb1, indexVerb2);
                    maxSim = (simAsVerb > maxSim) ? simAsVerb : maxSim;

                    simMatrix[i][j] = maxSim;
                    j++;
                }
                i++;
            }
        } catch (JWNLException ex) {
            ex.printStackTrace();
        }

        return bestMatch(simMatrix);

    }

    public double computeTokenSimilarity(IndexWord index1, IndexWord index2) {
        // the max number of common concepts between the two tokens
        double maxCommon = 0;

        // the two lists giving the best match
        PointerTargetNodeList best1 = new PointerTargetNodeList();
        PointerTargetNodeList best2 = new PointerTargetNodeList();

        // the two lists currently compared
        PointerTargetNodeList ptnl1 = new PointerTargetNodeList();
        PointerTargetNodeList ptnl2 = new PointerTargetNodeList();

        if (index1 != null && index2 != null) {
            // The two tokens exist in WordNet, we find the "depth"
            try {
                // Best match between current lists
                int maxBetweenLists = 0;

                // Synsets for each token
                Synset[] Syno1 = index1.getSenses();
                Synset[] Syno2 = index2.getSenses();
                for (int i = 0; i < index1.getSenseCount(); i++) {

                    Synset synset1 = Syno1[i];
                    for (int k = 0; k < index2.getSenseCount(); k++) {

                        Synset synset2 = Syno2[k];

                        List hypernymList1 = PointerUtils.getInstance().getHypernymTree(synset1).toList();
                        List hypernymList2 = PointerUtils.getInstance().getHypernymTree(synset2).toList();

                        Iterator list1It = hypernymList1.iterator();
                        // browse lists
                        while (list1It.hasNext()) {
                            ptnl1 = (PointerTargetNodeList) list1It.next();
                            Iterator list2It = hypernymList2.iterator();
                            while (list2It.hasNext()) {
                                ptnl2 = (PointerTargetNodeList) list2It.next();

                                int cc = getCommonConcepts(ptnl1, ptnl2);
                                if (cc > maxBetweenLists) {
                                    maxBetweenLists = cc;
                                    best1 = ptnl1;
                                    best2 = ptnl2;
                                }
                            }
                        }
                        if (maxBetweenLists > maxCommon) {
                            maxCommon = maxBetweenLists;
                        }
                    }
                }
                // System.err.println("common = " + maxCommon);
                // System.err.println("value = "
                // + ((2 * maxCommon) / (best1.size() + best2.size())));
                // if (best1 != null) best1.print();
                // if (best2 != null) best2.print();
                if (best1.isEmpty() && best2.isEmpty())
                    return 0;
                return (2 * maxCommon / (best1.size() + best2.size()));
            } catch (JWNLException je) {
                je.printStackTrace();
                System.exit(-1);
            }
        }
        return 0;
    }

    public double findMatchForAdj(IndexWord index1, IndexWord index2) {
        // the max number of common concepts between the two tokens
        double value = 0;

        if (index1 != null && index2 != null) {
            // The two tokens existe in WordNet, we find the "depth"
            try {
                // Synsets for each token
                Synset[] Syno1 = index1.getSenses();
                Synset[] Syno2 = index2.getSenses();
                for (int i = 0; i < index1.getSenseCount(); i++) {

                    Synset synset1 = Syno1[i];
                    for (int k = 0; k < index2.getSenseCount(); k++) {

                        Synset synset2 = Syno2[k];

                        PointerTargetNodeList adjSynonymList = PointerUtils.getInstance().getSynonyms(synset1);

                        Iterator listIt = adjSynonymList.iterator();
                        // browse lists
                        while (listIt.hasNext()) {
                            PointerTargetNode ptn = (PointerTargetNode) listIt.next();
                            if (ptn.getSynset() == synset2) {
                                value = 1;
                            }
                        }
                    }
                }
                // System.err.println("value = " + value);
                return value;
            } catch (JWNLException je) {
                je.printStackTrace();
                System.exit(-1);
            }
        }
        return 0;
    }

    /**
     * @param word
     */
    public void lookUpWord(String word, Hashtable<String, IndexWord> nouns, Hashtable<String, IndexWord> adjectives,
            Hashtable<String, IndexWord> verbs) {
        IndexWord index = null;

        try {
            // Lookup for word in adjectives
            index = dictionary.lookupIndexWord(POS.ADJECTIVE, word);
            if (index != null)
                adjectives.put(word, index);
            // Lookup for word in nouns
            index = dictionary.lookupIndexWord(POS.NOUN, word);
            if (index != null)
                nouns.put(word, index);
            // Lookup for word in verbs
            index = dictionary.lookupIndexWord(POS.VERB, word);
            if (index != null)
                verbs.put(word, index);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
    }

    public void display(Synset syn) {
        String str = "";
        for (int s = 0; s < syn.getWordsSize(); s++) {
            str += syn.getWord(s);
        }
        // System.err.println(str);
    }

    public int getCommonConcepts(PointerTargetNodeList list1, PointerTargetNodeList list2) {
        int cc = 0;
        int i = 1;
        while (i <= Math.min(list1.size(), list2.size()) && ((PointerTargetNode) list1.get(list1.size() - i))
                .getSynset() == ((PointerTargetNode) list2.get(list2.size() - i)).getSynset()) {
            cc++;
            i++;
        }
        return cc;

    }

    private double bestMatch(double matrix[][]) {
        int nbrLines = matrix.length;
        if (nbrLines == 0)
            return 0;
        int nbrColumns = matrix[0].length;
        double sim = 0.;
        int minSize = (nbrLines >= nbrColumns) ? nbrColumns : nbrLines;
        if (minSize == 0)
            return 0;
        for (int k = 0; k < minSize; k++) {
            double max_val = 0;
            int max_i = 0;
            int max_j = 0;
            for (int i = 0; i < nbrLines; i++) {
                for (int j = 0; j < nbrColumns; j++) {
                    if (max_val < matrix[i][j]) {
                        max_val = matrix[i][j];
                        /* mods
                        if (matrix[i][j] > 0.3)
                        max_val = matrix[i][j];
                        else
                        max_val = matrix[i][j] * mask[i][j];
                        end mods */
                        max_val = matrix[i][j];
                        max_i = i;
                        max_j = j;
                    }
                }
            }
            for (int i = 0; i < nbrLines; i++) {
                matrix[i][max_j] = 0;
            }
            for (int j = 0; j < nbrColumns; j++) {
                matrix[max_i][j] = 0;
            }
            sim += max_val;
        }
        return sim / (double) (nbrLines + nbrColumns - minSize);
    }

    /**
     * @param token A token.
     * @param n The number of the ontology (typically 1 or 2).
     * @return the number of occurences of the token in the hashtables
     * nouns, adjectives and verbs.
     */
    public int getNumberOfOccurences(String token, int n) {
        switch (n) {
        case 1:
            return getNumberOfOccurences(token, this.nouns1, this.adjectives1, this.verbs1);
        case 2:
            return getNumberOfOccurences(token, this.nouns2, this.adjectives2, this.verbs2);
        default:
            return 0;
        }
    }

    // Find the number of occurences of a words in different categories
    public int getNumberOfOccurences(String token, Hashtable nouns, Hashtable adj, Hashtable verbs) {
        int nb = 0;
        if (nouns.containsKey(token))
            nb++;
        if (adj.containsKey(token))
            nb++;
        if (verbs.containsKey(token))
            nb++;
        return nb;
    }

    public void displayMatrix(double[][] matrix) {
        for (int i = 0; i < matrix.length; i++) {
            for (int j = 0; j < matrix[i].length; j++) {
                System.out.println("[" + matrix[i][j] + "]");
            }
        }
    }

    public void fillWithOnes(double[][] matrix) {
        for (int i = 0; i < matrix.length; i++) {
            for (int j = 0; j < matrix[i].length; j++) {
                matrix[i][j] = 1;
            }
        }
    }

    /* Getters */
    public double[][] getAdjectivesResults() {
        return adjectivesResults;
    }

    public double[][] getNounsResults() {
        return nounsResults;
    }

    public double[][] getVerbsResults() {
        return verbsResults;
    }

}