Java tutorial
/* * (c) 2005 David B. Bracewell * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.davidbracewell.ml.sequence.hmm; import com.davidbracewell.collection.CollectionUtils; import com.davidbracewell.logging.Logger; import com.davidbracewell.math.DoubleEntry; import com.davidbracewell.math.distribution.DiscreteEstimator; import com.davidbracewell.math.distribution.conditional.DiscreteConditionalEstimator; import com.davidbracewell.ml.Instance; import com.davidbracewell.ml.sequence.SequenceLearner; import com.davidbracewell.ml.sequence.extractor.ContextExtractorSet; import com.davidbracewell.ml.sequence.indexers.DataIndexer; import com.google.common.base.Stopwatch; import com.google.common.collect.Maps; import org.apache.mahout.math.map.OpenIntObjectHashMap; import java.util.Iterator; import java.util.List; import java.util.Map; /** * The type ML first order hMM learner. * * @author David B. Bracewell */ public class MLFirstOrderHMMLearner<V> extends SequenceLearner<V, FirstOrderHMM<V>> { private static final long serialVersionUID = 1753326911357058904L; private static final Logger log = Logger.getLogger(MLFirstOrderHMMLearner.class); DiscreteEstimator pi; DiscreteConditionalEstimator alpha; Map<Integer, DiscreteConditionalEstimator> beta; /** * Instantiates a new maximum likelihood first order hidden markov model learner. * * @param featureExtractors the feature extractors * @param dataIndexer The data indexer */ public MLFirstOrderHMMLearner(ContextExtractorSet<V> featureExtractors, DataIndexer<V> dataIndexer) { super(featureExtractors, dataIndexer); this.model.setContextExtractors(featureExtractors); } private void incBeta(FirstOrderHMM<V> model, Instance instance, Map<Integer, DiscreteConditionalEstimator> beta) { for (DoubleEntry entry : CollectionUtils.asIterable(instance.nonZeroIterator())) { if (!beta.containsKey(entry.index)) { beta.put(entry.index, new DiscreteConditionalEstimator(DiscreteEstimator.mleSupplier())); } beta.get(entry.index).increment(instance.getTargetValue(), entry.value); } } @Override protected void trainAllInstances(List<List<Instance>> instances) { log.info("Beginning training [{0} examples] ...", instances.size()); Stopwatch sw = Stopwatch.createStarted(); for (Iterator<List<Instance>> iterator = instances.iterator(); iterator.hasNext();) { trainOneSequence(iterator.next()); iterator.remove(); } sw.stop(); log.info("Finished training in {0}", sw); } @Override protected void trainOneSequence(List<Instance> sequence) { pi.increment(sequence.get(0).getTargetValue()); incBeta(model, sequence.get(0), beta); double last = sequence.get(0).getTargetValue(); for (int i = 1; i < sequence.size(); i++) { double cv = sequence.get(i).getTargetValue(); alpha.increment(last, cv); last = cv; incBeta(model, sequence.get(i), beta); } } @Override public FirstOrderHMM<V> buildModel() { int NF = model.getFeatures().size(); model.pi = pi.estimate(); model.alpha = alpha.estimate(); model.beta = new OpenIntObjectHashMap<>(); for (int i = 0; i < NF; i++) { if (beta.containsKey(i)) { model.beta.put(i, beta.get(i).estimate()); } } return model; } @Override public void clear() { pi = DiscreteEstimator.laplaceEstimator(); alpha = new DiscreteConditionalEstimator(DiscreteEstimator.mleSupplier()); beta = Maps.newHashMap(); model = new FirstOrderHMM<>(); } }//END OF MLFirstOrderHMMLearner