MarginalTaggerEvaluator.java :  » Natural-Language-Processing » LingPipe » com » aliasi » tag » Java Open Source

Java Open Source » Natural Language Processing » LingPipe 
LingPipe » com » aliasi » tag » MarginalTaggerEvaluator.java
/*
 * LingPipe v. 3.9
 * Copyright (C) 2003-2010 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.tag;

import com.aliasi.classify.ConditionalClassification;
import com.aliasi.classify.ConditionalClassifierEvaluator;

import com.aliasi.corpus.ObjectHandler;

import com.aliasi.symbol.SymbolTable;

import com.aliasi.util.ObjectToDoubleMap;
import com.aliasi.util.Strings;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * A {@code MarginalTaggerEvaluator} evaluates marginal taggers either
 * directly or by adding their outputs.
 *
 * <p>Test cases may be added directly using the method {@link
 * #addCase(Tagging,TagLattice)}, where the tagging is a reference
 * gold-standard tagging and the tag lattice the system response
 * tagging.  
 *
 * <p>Test cases may be added indirectly using the method {@link
 * #handle(Tagging)}.  With this use, the contained tagger in the
 * evaluator is used to generate the response tag lattice, which is
 * then added along with the reference tagging. 
 *
 * <p>The tagger may be set in the constructor, or may be set using
 * the method {@link #setTagger(MarginalTagger)}.  The tagger must
 * be non-null in order for a call to the handle method to succeed.
 *
 * <p>The token-by-token evaluation is available through the method
 * {@link #perTokenEval()}, which returns a classifier
 * evaluator.  The items for the classifier evaluator are the tokens,
 * and their classification is determined by the marginal probability
 * assignments for those tags by the tag lattice.  If the marginal
 * tagger evaluator stores tokens, they will be available as part of
 * the returned evaluator.
 * 
 * @author  Bob Carpenter
 * @version 4.0.0
 * @since   LingPipe3.9
 * @param <E> Type of tokens in the tagging.
 */
public class MarginalTaggerEvaluator<E> 
    implements ObjectHandler<Tagging<E>> {


    private MarginalTagger<E> mTagger;
    private final Set<String> mTagSet;
    private final boolean mStoreTokens;
    private final ConditionalClassifierEvaluator<E> mEval;
    private Tagging<E> mLastReferenceTagging;

    /**
     * Construct an evaluator for the specified marginal tagger,
     * using the specifed tag set and flag indicating whether or
     * not to store input tokens.  
     * 
     * @param tagger The tagger to use or {@code null} if all cases
     * are added directly or the tagger's specified later.
     * @param tagSet Complete set of tags found in reference and
     * response taggings.
     * @param storeTokens Flag set to {@code true} to store input
     * tokens for return during classification.
     */
    public MarginalTaggerEvaluator(MarginalTagger<E> tagger,
                                   Set<String> tagSet,
                                   boolean storeTokens) {
        mTagger = tagger;
        mTagSet = new HashSet<String>(tagSet);
        mStoreTokens = storeTokens;
        String[] tags = tagSet.toArray(Strings.EMPTY_STRING_ARRAY);
        mEval = new ConditionalClassifierEvaluator<E>(null,tags,storeTokens);
    }


    /**
     * Returns the tagger for this evaluator.  The tagger may be null.
     *
     * @return Tagger for this evaluator.
     */
    public MarginalTagger<E> tagger() {
        return mTagger;
    }

    /**
     * Set the tagger for this tagger evaluated to the specified
     * tagger.
     *
     * @param tagger Tagger to be evaluated.
     */
    public void setTagger(MarginalTagger<E> tagger) {
        mTagger = tagger;
    }

    /**
     * Returns {@code true} if this evaluator is storing input
     * tokens.
     *
     * @return Whether or not this evaluator stores tokens.
     */
    public boolean storeTokens() {
        return mStoreTokens;
    }

    /**
     * Returns an unmodifiable view of the tags used by
     * this evaluator.
     *
     * @return Tags for this evaluator.
     */
    public Set<String> tagSet() {
        return Collections.unmodifiableSet(mTagSet);
    }
    
    /**
     * Add a case for evaluation consisting of the gold-standard
     * reference tagging and a lattice representing the output of
     * a marginal tagger.
     *
     * @param referenceTagging Reference gold-standard tagging.
     * @param responseLattice System response.
     * @throws IllegalArgumentException If the reference tagging
     * is not the same length as the response.
     */
    public void addCase(Tagging<E> referenceTagging,
                        TagLattice<E> responseLattice) {
        mLastReferenceTagging = referenceTagging;
        if (referenceTagging.size() != responseLattice.numTokens()) {
            String msg = "Reference and response must have the same number of tokens."
                + " Found referenceTagging.size()=" + referenceTagging.size()
                + " responseLattice.numTokens()=" + responseLattice.numTokens();
            throw new IllegalArgumentException(msg);
        }
        for (int i = 0; i < referenceTagging.size(); ++i) {
            if (!referenceTagging.token(i).equals(responseLattice.token(i))) {
                String msg = "Reference and response token lists must be the same."
                    + " referenceTagging.token(" + i + ")=|" + referenceTagging.token(i) + "|"
                    + " responseLattice.token(" + i + ")=|" + responseLattice.token(i) + "|";
                throw new IllegalArgumentException(msg);
            }
        }
        for (String tag : referenceTagging.tags()) {
            if (!mTagSet.contains(tag)) {
                String msg = "Unknown tag in reference tagging."
                    + " Unknown tag=" + tag
                    + " reference tagging=" + referenceTagging;
                throw new IllegalArgumentException(msg);
            }
        }
        List<String> responseTags = responseLattice.tagList();
        for (String tag : responseTags) {
            if (!mTagSet.contains(tag)) {
                String msg = "Unknown tag in output lattice."
                    + " Tag=" + tag
                    + " referenceTagging=" + referenceTagging;
                throw new IllegalArgumentException(msg);
            }
        }
        List<E> tokens = referenceTagging.tokens();
        for (int n = 0; n < tokens.size(); ++n) {
            E token = referenceTagging.token(n);
            List<String> tags = responseLattice.tagList();
            String referenceTag = referenceTagging.tag(n);
            ObjectToDoubleMap<String> tagToScore = new ObjectToDoubleMap<String>();
            for (int i = 0; i < tags.size(); ++i) {
                tagToScore.set(responseTags.get(i),
                               Math.exp(responseLattice.logProbability(n,i)));
            }
            List<String> responseTagsList = tagToScore.keysOrderedByValueList();
            String[] responseTagArray = responseTags.toArray(Strings.EMPTY_STRING_ARRAY);
            double[] responseProbs = new double[responseTags.size()];
            for (int i = 0; i < responseTagArray.length; ++i)
                responseProbs[i] = tagToScore.getValue(responseTagArray[i]);
            ConditionalClassification responseClassification
                = ConditionalClassification.createProbs(responseTagArray,responseProbs);
            mEval.addClassification(referenceTag,responseClassification,token);
        }
    }

    /**
     * Add a test case consisting of the specified reference tagging
     * along with the lattice output of the marginal tagger being
     * evaluated on the tokens of the reference tagging.
     *
     * @param referenceTagging Reference tagging to evaluate
     */
    public void handle(Tagging<E> referenceTagging) {
        List<E> tokens = referenceTagging.tokens();
        TagLattice<E> responseLattice = mTagger.tagMarginal(tokens);
        addCase(referenceTagging,responseLattice);
    }

    public String lastCaseToString(int maxTagsPerToken) {
        if (mLastReferenceTagging == null)
            return "No cases seen yet.";
        List<E> tokenList = mLastReferenceTagging.tokens();
        TagLattice<E> lattice = mTagger.tagMarginal(tokenList);
        StringBuilder sb = new StringBuilder();
        sb.append("Index Token  RefTag  (Prob:ResponseTag)*\n");
        for (int tokenIndex = 0; tokenIndex < tokenList.size(); ++tokenIndex) {
            ConditionalClassification tagScores = lattice.tokenClassification(tokenIndex);
            sb.append(TaggerEvaluator.pad(Integer.toString(tokenIndex),4));
            sb.append(TaggerEvaluator.pad(tokenList.get(tokenIndex),15));
            String refTag = mLastReferenceTagging.tag(tokenIndex);
            sb.append(TaggerEvaluator.pad(refTag,6));
            sb.append("  ");
            for (int i = 0; i < maxTagsPerToken; ++i) {
                double conditionalProb = tagScores.score(i);
                String tag = tagScores.category(i);
                sb.append(" " + NBestTaggerEvaluator.format(conditionalProb) 
                          + ":" + TaggerEvaluator.pad(tag,4));
                sb.append(tag.equals(refTag) ? "* " : "  ");
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    /**
     * Returns the tagger evaluation on a per-token basis.
     *
     * @return Evaluation of the tagger as a classifier.
     */
    public ConditionalClassifierEvaluator<E> perTokenEval() {
        return mEval;
    }

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.