com.github.steveash.jg2p.align.AlignerInferencer.java Source code

Introduction

Here is the source code for com.github.steveash.jg2p.align.AlignerInferencer.java
Source

/*
 * Copyright 2014 Steve Ash
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.github.steveash.jg2p.align;

import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.math.DoubleMath;

import com.github.steveash.jg2p.Word;

import java.util.Collections;
import java.util.List;

/**
 * Runs inference on a sequence X to determine the top-k probable alignment(s)
 *
 * @author Steve Ash
 */
public class AlignerInferencer {

    private final GramOptions opts;
    private final ProbTable probs;
    private final ProbTable.Marginals margs;

    public AlignerInferencer(GramOptions opts, ProbTable probs) {
        this.opts = opts;
        this.probs = probs;
        this.margs = probs.calculateMarginals();
    }

    public List<Alignment> bestGraphemes(Word x, int bestPathCount) {
        PathXTable t = new PathXTable(x.unigramCount() + 1, bestPathCount);
        t.offer(0, t.make(0, -1, -1));

        for (int xx = 1; xx < x.unigramCount() + 1; xx++) {
            for (int i = 1; (i <= opts.getMaxXGram()) && (xx - i >= 0); i++) {
                String xGram = x.gram(xx - i, i);
                double margX = margs.probX(xGram);

                double score = DoubleMath.log2(margX) * i;
                t.extendPath(xx, xx - i, PathXTable.Entry.sample(score, i));
            }
        }

        return createAlignments(x, t, bestPathCount);
    }

    private List<Alignment> createAlignments(Word x, PathXTable t, int bestPathCount) {
        List<Alignment> results = Lists.newArrayListWithCapacity(bestPathCount);

        Iterable<PathXTable.Entry> lastEntries = t.get(x.unigramCount());

        for (PathXTable.Entry lastEntry : lastEntries) {
            if (lastEntry.score < ProbTable.minLogProb) {
                continue;
            }

            results.add(decodePathFrom(x, t, lastEntry));
        }
        Collections.sort(results, Ordering.natural().reverse());
        return results;
    }

    private Alignment decodePathFrom(Word x, PathXTable t, PathXTable.Entry entry) {
        int xx = x.unigramCount();
        Alignment a = new Alignment(x, entry.score);

        while (xx > 0) {
            String xGram = x.gram(xx - entry.xBackRef, entry.xBackRef);
            a.append(xGram, "");

            xx -= entry.xBackRef;
            entry = t.get(xx, entry.pathBackRef);
        }
        return a.finish();
    }

}