X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?p=CreaPhase.git;a=blobdiff_plain;f=octave_packages%2Fstatistics-1.1.3%2Fhmmestimate.m;fp=octave_packages%2Fstatistics-1.1.3%2Fhmmestimate.m;h=dbbff5bf179da2290f3941fa3844888aaaa88f6d;hp=0000000000000000000000000000000000000000;hb=c880e8788dfc484bf23ce13fa2787f2c6bca4863;hpb=1705066eceaaea976f010f669ce8e972f3734b05 diff --git a/octave_packages/statistics-1.1.3/hmmestimate.m b/octave_packages/statistics-1.1.3/hmmestimate.m new file mode 100644 index 0000000..dbbff5b --- /dev/null +++ b/octave_packages/statistics-1.1.3/hmmestimate.m @@ -0,0 +1,338 @@ +## Copyright (C) 2006, 2007 Arno Onken +## +## This program is free software; you can redistribute it and/or modify it under +## the terms of the GNU General Public License as published by the Free Software +## Foundation; either version 3 of the License, or (at your option) any later +## version. +## +## This program is distributed in the hope that it will be useful, but WITHOUT +## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +## details. +## +## You should have received a copy of the GNU General Public License along with +## this program; if not, see . + +## -*- texinfo -*- +## @deftypefn {Function File} {[@var{transprobest}, @var{outprobest}] =} hmmestimate (@var{sequence}, @var{states}) +## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'statenames', @var{statenames}) +## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'symbols', @var{symbols}) +## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'pseudotransitions', @var{pseudotransitions}) +## @deftypefnx {Function File} {} hmmestimate (@dots{}, 'pseudoemissions', @var{pseudoemissions}) +## Estimate the matrix of transition probabilities and the matrix of output +## probabilities of a given sequence of outputs and states generated by a +## hidden Markov model. The model assumes that the generation starts in +## state @code{1} at step @code{0} but does not include step @code{0} in the +## generated states and sequence. +## +## @subheading Arguments +## +## @itemize @bullet +## @item +## @var{sequence} is a vector of a sequence of given outputs. The outputs +## must be integers ranging from @code{1} to the number of outputs of the +## hidden Markov model. +## +## @item +## @var{states} is a vector of the same length as @var{sequence} of given +## states. The states must be integers ranging from @code{1} to the number +## of states of the hidden Markov model. +## @end itemize +## +## @subheading Return values +## +## @itemize @bullet +## @item +## @var{transprobest} is the matrix of the estimated transition +## probabilities of the states. @code{transprobest(i, j)} is the estimated +## probability of a transition to state @code{j} given state @code{i}. +## +## @item +## @var{outprobest} is the matrix of the estimated output probabilities. +## @code{outprobest(i, j)} is the estimated probability of generating +## output @code{j} given state @code{i}. +## @end itemize +## +## If @code{'symbols'} is specified, then @var{sequence} is expected to be a +## sequence of the elements of @var{symbols} instead of integers. +## @var{symbols} can be a cell array. +## +## If @code{'statenames'} is specified, then @var{states} is expected to be +## a sequence of the elements of @var{statenames} instead of integers. +## @var{statenames} can be a cell array. +## +## If @code{'pseudotransitions'} is specified then the integer matrix +## @var{pseudotransitions} is used as an initial number of counted +## transitions. @code{pseudotransitions(i, j)} is the initial number of +## counted transitions from state @code{i} to state @code{j}. +## @var{transprobest} will have the same size as @var{pseudotransitions}. +## Use this if you have transitions that are very unlikely to occur. +## +## If @code{'pseudoemissions'} is specified then the integer matrix +## @var{pseudoemissions} is used as an initial number of counted outputs. +## @code{pseudoemissions(i, j)} is the initial number of counted outputs +## @code{j} given state @code{i}. If @code{'pseudoemissions'} is also +## specified then the number of rows of @var{pseudoemissions} must be the +## same as the number of rows of @var{pseudotransitions}. @var{outprobest} +## will have the same size as @var{pseudoemissions}. Use this if you have +## outputs or states that are very unlikely to occur. +## +## @subheading Examples +## +## @example +## @group +## transprob = [0.8, 0.2; 0.4, 0.6]; +## outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1]; +## [sequence, states] = hmmgenerate (25, transprob, outprob); +## [transprobest, outprobest] = hmmestimate (sequence, states) +## @end group +## +## @group +## symbols = @{'A', 'B', 'C'@}; +## statenames = @{'One', 'Two'@}; +## [sequence, states] = hmmgenerate (25, transprob, outprob, +## 'symbols', symbols, 'statenames', statenames); +## [transprobest, outprobest] = hmmestimate (sequence, states, +## 'symbols', symbols, +## 'statenames', statenames) +## @end group +## +## @group +## pseudotransitions = [8, 2; 4, 6]; +## pseudoemissions = [2, 4, 4; 7, 2, 1]; +## [sequence, states] = hmmgenerate (25, transprob, outprob); +## [transprobest, outprobest] = hmmestimate (sequence, states, 'pseudotransitions', pseudotransitions, 'pseudoemissions', pseudoemissions) +## @end group +## @end example +## +## @subheading References +## +## @enumerate +## @item +## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics +## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC, +## 2001. +## +## @item +## Lawrence R. Rabiner. A Tutorial on Hidden Markov Models and Selected +## Applications in Speech Recognition. @cite{Proceedings of the IEEE}, +## 77(2), pages 257-286, February 1989. +## @end enumerate +## @end deftypefn + +## Author: Arno Onken +## Description: Estimation of a hidden Markov model for a given sequence + +function [transprobest, outprobest] = hmmestimate (sequence, states, varargin) + + # Check arguments + if (nargin < 2 || mod (length (varargin), 2) != 0) + print_usage (); + endif + + len = length (sequence); + if (length (states) != len) + error ("hmmestimate: sequence and states must have equal length"); + endif + + # Flag for symbols + usesym = false; + # Flag for statenames + usesn = false; + + # Variables for return values + transprobest = []; + outprobest = []; + + # Process varargin + for i = 1:2:length (varargin) + # There must be an identifier: 'symbols', 'statenames', + # 'pseudotransitions' or 'pseudoemissions' + if (! ischar (varargin{i})) + print_usage (); + endif + # Upper case is also fine + lowerarg = lower (varargin{i}); + if (strcmp (lowerarg, 'symbols')) + usesym = true; + # Use the following argument as symbols + symbols = varargin{i + 1}; + # The same for statenames + elseif (strcmp (lowerarg, 'statenames')) + usesn = true; + # Use the following argument as statenames + statenames = varargin{i + 1}; + elseif (strcmp (lowerarg, 'pseudotransitions')) + # Use the following argument as an initial count for transitions + transprobest = varargin{i + 1}; + if (! ismatrix (transprobest)) + error ("hmmestimate: pseudotransitions must be a non-empty numeric matrix"); + endif + if (rows (transprobest) != columns (transprobest)) + error ("hmmestimate: pseudotransitions must be a square matrix"); + endif + elseif (strcmp (lowerarg, 'pseudoemissions')) + # Use the following argument as an initial count for outputs + outprobest = varargin{i + 1}; + if (! ismatrix (outprobest)) + error ("hmmestimate: pseudoemissions must be a non-empty numeric matrix"); + endif + else + error ("hmmestimate: expected 'symbols', 'statenames', 'pseudotransitions' or 'pseudoemissions' but found '%s'", varargin{i}); + endif + endfor + + # Transform sequence from symbols to integers if necessary + if (usesym) + # sequenceint is used to build the transformed sequence + sequenceint = zeros (1, len); + for i = 1:length (symbols) + # Search for symbols(i) in the sequence, isequal will have 1 at + # corresponding indices; i is the right integer for that symbol + isequal = ismember (sequence, symbols(i)); + # We do not want to change sequenceint if the symbol appears a second + # time in symbols + if (any ((sequenceint == 0) & (isequal == 1))) + isequal *= i; + sequenceint += isequal; + endif + endfor + if (! all (sequenceint)) + index = max ((sequenceint == 0) .* (1:len)); + error (["hmmestimate: sequence(" int2str (index) ") not in symbols"]); + endif + sequence = sequenceint; + else + if (! isvector (sequence)) + error ("hmmestimate: sequence must be a non-empty vector"); + endif + if (! all (ismember (sequence, 1:max (sequence)))) + index = max ((ismember (sequence, 1:max (sequence)) == 0) .* (1:len)); + error (["hmmestimate: sequence(" int2str (index) ") not feasible"]); + endif + endif + + # Transform states from statenames to integers if necessary + if (usesn) + # statesint is used to build the transformed states + statesint = zeros (1, len); + for i = 1:length (statenames) + # Search for statenames(i) in states, isequal will have 1 at + # corresponding indices; i is the right integer for that statename + isequal = ismember (states, statenames(i)); + # We do not want to change statesint if the statename appears a second + # time in statenames + if (any ((statesint == 0) & (isequal == 1))) + isequal *= i; + statesint += isequal; + endif + endfor + if (! all (statesint)) + index = max ((statesint == 0) .* (1:len)); + error (["hmmestimate: states(" int2str (index) ") not in statenames"]); + endif + states = statesint; + else + if (! isvector (states)) + error ("hmmestimate: states must be a non-empty vector"); + endif + if (! all (ismember (states, 1:max (states)))) + index = max ((ismember (states, 1:max (states)) == 0) .* (1:len)); + error (["hmmestimate: states(" int2str (index) ") not feasible"]); + endif + endif + + # Estimate the number of different states as the max of states + nstate = max (states); + # Estimate the number of different outputs as the max of sequence + noutput = max (sequence); + + # transprobest is empty if pseudotransitions is not specified + if (isempty (transprobest)) + # outprobest is not empty if pseudoemissions is specified + if (! isempty (outprobest)) + if (nstate > rows (outprobest)) + error ("hmmestimate: not enough rows in pseudoemissions"); + endif + # The number of states is specified by pseudoemissions + nstate = rows (outprobest); + endif + transprobest = zeros (nstate, nstate); + else + if (nstate > rows (transprobest)) + error ("hmmestimate: not enough rows in pseudotransitions"); + endif + # The number of states is given by pseudotransitions + nstate = rows (transprobest); + endif + + # outprobest is empty if pseudoemissions is not specified + if (isempty (outprobest)) + outprobest = zeros (nstate, noutput); + else + if (noutput > columns (outprobest)) + error ("hmmestimate: not enough columns in pseudoemissions"); + endif + # Number of outputs is specified by pseudoemissions + noutput = columns (outprobest); + if (rows (outprobest) != nstate) + error ("hmmestimate: pseudoemissions must have the same number of rows as pseudotransitions"); + endif + endif + + # Assume that the model started in state 1 + cstate = 1; + for i = 1:len + # Count the number of transitions for each state pair + transprobest(cstate, states(i)) ++; + cstate = states (i); + # Count the number of outputs for each state output pair + outprobest(cstate, sequence(i)) ++; + endfor + + # transprobest and outprobest contain counted numbers + # Each row in transprobest and outprobest should contain estimated + # probabilities + # => scale so that the sum is 1 + # A zero row remains zero + # - for transprobest + s = sum (transprobest, 2); + s(s == 0) = 1; + transprobest = transprobest ./ (s * ones (1, nstate)); + # - for outprobest + s = sum (outprobest, 2); + s(s == 0) = 1; + outprobest = outprobest ./ (s * ones (1, noutput)); + +endfunction + +%!test +%! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3]; +%! states = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]; +%! [transprobest, outprobest] = hmmestimate (sequence, states); +%! expectedtransprob = [0.88889, 0.11111; 0.28571, 0.71429]; +%! expectedoutprob = [0.16667, 0.33333, 0.50000; 1.00000, 0.00000, 0.00000]; +%! assert (transprobest, expectedtransprob, 0.001); +%! assert (outprobest, expectedoutprob, 0.001); + +%!test +%! sequence = {'A', 'B', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'C', 'C', 'C', 'C', 'B', 'C', 'A', 'A', 'A', 'A', 'C', 'C', 'B', 'C', 'A', 'C'}; +%! states = {'One', 'One', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'Two', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One'}; +%! symbols = {'A', 'B', 'C'}; +%! statenames = {'One', 'Two'}; +%! [transprobest, outprobest] = hmmestimate (sequence, states, 'symbols', symbols, 'statenames', statenames); +%! expectedtransprob = [0.88889, 0.11111; 0.28571, 0.71429]; +%! expectedoutprob = [0.16667, 0.33333, 0.50000; 1.00000, 0.00000, 0.00000]; +%! assert (transprobest, expectedtransprob, 0.001); +%! assert (outprobest, expectedoutprob, 0.001); + +%!test +%! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3]; +%! states = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1]; +%! pseudotransitions = [8, 2; 4, 6]; +%! pseudoemissions = [2, 4, 4; 7, 2, 1]; +%! [transprobest, outprobest] = hmmestimate (sequence, states, 'pseudotransitions', pseudotransitions, 'pseudoemissions', pseudoemissions); +%! expectedtransprob = [0.85714, 0.14286; 0.35294, 0.64706]; +%! expectedoutprob = [0.178571, 0.357143, 0.464286; 0.823529, 0.117647, 0.058824]; +%! assert (transprobest, expectedtransprob, 0.001); +%! assert (outprobest, expectedoutprob, 0.001);