eu.project.ttc.models.CrossTable.java Source code

Java tutorial

Introduction

Here is the source code for eu.project.ttc.models.CrossTable.java

Source

/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright 2, 2015nership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package eu.project.ttc.models;

import java.util.Iterator;
import java.util.Map;

import org.apache.commons.lang3.mutable.MutableDouble;

import com.google.common.collect.Maps;

import eu.project.ttc.metrics.AssociationRate;

/**
 * 
 * An index (often called cross table) that helps to compute 
 * the association rate of two terms, i.e. their amount of co-occurrency
 * in the corpus.
 * 
 * @author Damien Cram
 *
 */
public class CrossTable {

    /**
     * The term index of the corpus
     */
    private TermIndex termIndex;

    private Map<Term, MutableDouble> aPlusB;
    private Map<Term, MutableDouble> aPlusC;
    private int totalCoOccurrences;
    //    private int totalFrequency;

    /**
     * 
     * Constructs this cross table with a term index and inits the table 
     * with all co-occurrences of the term index's terms. This init process is
     * based on terms' context vectors, which must be computed before
     * invoking this constructor.
     * 
     * @see Term#getContextVector()
     * @throws NullPointerException
     *          if terms' context vectors are not already computed
     * @param termIndex
     *          The term index of the corpus
     * @throws NullPointerException
     *          when the context vectors of term index terms are not
     */
    public CrossTable(TermIndex termIndex) {
        super();
        this.termIndex = termIndex;
        init();
    }

    private void init() {
        this.aPlusB = Maps.newHashMap();
        this.aPlusC = Maps.newHashMap();
        this.totalCoOccurrences = 0;
        //        this.totalFrequency = 0;

        Term term;
        for (Iterator<Term> it1 = this.termIndex.getTerms().iterator(); it1.hasNext();) {
            term = it1.next();
            //            this.totalFrequency++;
            if (!term.isContextVectorComputed())
                continue;
            ContextVector context = term.getContextVector();
            for (ContextVector.Entry entry : context.getEntries()) {
                this.totalCoOccurrences += entry.getNbCooccs();
                getScoreFromMap(this.aPlusB, term).add(entry.getNbCooccs());
                getScoreFromMap(this.aPlusC, entry.getCoTerm()).add(term.getFrequency());
            }
        }
    }

    private MutableDouble getScoreFromMap(Map<Term, MutableDouble> aScoreMap, Term key) {
        if (aScoreMap.get(key) == null)
            aScoreMap.put(key, new MutableDouble(0));
        return aScoreMap.get(key);
    }

    /**
     * 
     * Computes coefficients a, b, c and d (available) and computes the association
     * rate based on these coefficients.
     * 
     * These coefficients are made available through the not-thread safe methods <code>#getLastX()</code>.
     * 
     * @see #getLastA()
     * @see #getLastB()
     * @see #getLastC()
     * @see #getLastD()
     * 
     * @param rate
     *          the association rate measure
     * @param x
     *          the base term
     * @param y
     *          the co term
     * @return
     *          the association rate
     */
    public double computeRate(AssociationRate rate, Term x, Term y) {

        // A = (x & y)
        int a = (int) x.getContextVector().getNbCooccs(y);

        // B = (x & not(y))
        MutableDouble a_plus_b = this.aPlusB.get(x);
        int b = a_plus_b == null ? 0 : a_plus_b.intValue() - a;
        //        int b = x.getFrequency() - a;

        // C = (not(x) & y)
        MutableDouble a_plus_c = this.aPlusB.get(y);
        int c = a_plus_c == null ? 0 : a_plus_c.intValue() - a;
        //        int c = y.getFrequency() - a;

        // D = (not(x) & not(y))
        int d = this.totalCoOccurrences - a - b - c;
        //       int d = this.totalFrequency - a - b - c;

        // for DEBUG purpose only
        lastA = a;
        lastB = b;
        lastC = c;
        lastD = d;
        // END DEBUG

        final double value = rate.getValue(a, b, c, d);

        return value;
    }

    private int lastA;
    private int lastB;
    private int lastC;
    private int lastD;

    /**
     * The number of times <code>coTerm</code> id a co-occurrence of <code>term</code>
     * 
     * WARNING : not thread safe !
     * 
     * @return
     */
    public int getLastA() {
        return lastA;
    }

    /**
     * The number of times a co-occurrence of <code>term</code> is not <code>coTerm</code>
     * 
     * WARNING : not thread safe !
     *      
     * @return
     */
    public int getLastB() {
        return lastB;
    }

    /**
     * The number of times <code>coTerm</code> is a co-occurrence of another term 
     * than <code>term</code>.
     * 
     * WARNING : not thread safe !
     *
     * @return
     */
    public int getLastC() {
        return lastC;
    }

    /**
     * The number of times neither <code>term</code> nor <code>coTerm</code> is a co-occurrence of something else
     * 
     * WARNING : not thread safe !
     * 
     * @return
     */
    public int getLastD() {
        return lastD;
    }
}