1 ## Copyright (C) 2006, 2007 Arno Onken <asnelt@asnelt.org>
3 ## This program is free software; you can redistribute it and/or modify it under
4 ## the terms of the GNU General Public License as published by the Free Software
5 ## Foundation; either version 3 of the License, or (at your option) any later
8 ## This program is distributed in the hope that it will be useful, but WITHOUT
9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 ## You should have received a copy of the GNU General Public License along with
14 ## this program; if not, see <http://www.gnu.org/licenses/>.
17 ## @deftypefn {Function File} {@var{vpath} =} hmmviterbi (@var{sequence}, @var{transprob}, @var{outprob})
18 ## @deftypefnx {Function File} {} hmmviterbi (@dots{}, 'symbols', @var{symbols})
19 ## @deftypefnx {Function File} {} hmmviterbi (@dots{}, 'statenames', @var{statenames})
20 ## Use the Viterbi algorithm to find the Viterbi path of a hidden Markov
21 ## model given a sequence of outputs. The model assumes that the generation
22 ## starts in state @code{1} at step @code{0} but does not include step
23 ## @code{0} in the generated states and sequence.
25 ## @subheading Arguments
29 ## @var{sequence} is the vector of length @var{len} of given outputs. The
30 ## outputs must be integers ranging from @code{1} to
31 ## @code{columns (outprob)}.
34 ## @var{transprob} is the matrix of transition probabilities of the states.
35 ## @code{transprob(i, j)} is the probability of a transition to state
36 ## @code{j} given state @code{i}.
39 ## @var{outprob} is the matrix of output probabilities.
40 ## @code{outprob(i, j)} is the probability of generating output @code{j}
41 ## given state @code{i}.
44 ## @subheading Return values
48 ## @var{vpath} is the vector of the same length as @var{sequence} of the
49 ## estimated hidden states. The states are integers ranging from @code{1} to
50 ## @code{columns (transprob)}.
53 ## If @code{'symbols'} is specified, then @var{sequence} is expected to be a
54 ## sequence of the elements of @var{symbols} instead of integers ranging
55 ## from @code{1} to @code{columns (outprob)}. @var{symbols} can be a cell array.
57 ## If @code{'statenames'} is specified, then the elements of
58 ## @var{statenames} are used for the states in @var{vpath} instead of
59 ## integers ranging from @code{1} to @code{columns (transprob)}.
60 ## @var{statenames} can be a cell array.
62 ## @subheading Examples
66 ## transprob = [0.8, 0.2; 0.4, 0.6];
67 ## outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1];
68 ## [sequence, states] = hmmgenerate (25, transprob, outprob)
69 ## vpath = hmmviterbi (sequence, transprob, outprob)
73 ## symbols = @{'A', 'B', 'C'@};
74 ## statenames = @{'One', 'Two'@};
75 ## [sequence, states] = hmmgenerate (25, transprob, outprob,
76 ## 'symbols', symbols, 'statenames', statenames)
77 ## vpath = hmmviterbi (sequence, transprob, outprob,
78 ## 'symbols', symbols, 'statenames', statenames)
82 ## @subheading References
86 ## Wendy L. Martinez and Angel R. Martinez. @cite{Computational Statistics
87 ## Handbook with MATLAB}. Appendix E, pages 547-557, Chapman & Hall/CRC,
91 ## Lawrence R. Rabiner. A Tutorial on Hidden Markov Models and Selected
92 ## Applications in Speech Recognition. @cite{Proceedings of the IEEE},
93 ## 77(2), pages 257-286, February 1989.
97 ## Author: Arno Onken <asnelt@asnelt.org>
98 ## Description: Viterbi path of a hidden Markov model
100 function vpath = hmmviterbi (sequence, transprob, outprob, varargin)
103 if (nargin < 3 || mod (length (varargin), 2) != 0)
107 if (! ismatrix (transprob))
108 error ("hmmviterbi: transprob must be a non-empty numeric matrix");
110 if (! ismatrix (outprob))
111 error ("hmmviterbi: outprob must be a non-empty numeric matrix");
114 len = length (sequence);
115 # nstate is the number of states of the hidden Markov model
116 nstate = rows (transprob);
117 # noutput is the number of different outputs that the hidden Markov model
119 noutput = columns (outprob);
121 # Check whether transprob and outprob are feasible for a hidden Markov model
122 if (columns (transprob) != nstate)
123 error ("hmmviterbi: transprob must be a square matrix");
125 if (rows (outprob) != nstate)
126 error ("hmmviterbi: outprob must have the same number of rows as transprob");
131 # Flag for statenames
135 for i = 1:2:length (varargin)
136 # There must be an identifier: 'symbols' or 'statenames'
137 if (! ischar (varargin{i}))
140 # Upper case is also fine
141 lowerarg = lower (varargin{i});
142 if (strcmp (lowerarg, 'symbols'))
143 if (length (varargin{i + 1}) != noutput)
144 error ("hmmviterbi: number of symbols does not match number of possible outputs");
147 # Use the following argument as symbols
148 symbols = varargin{i + 1};
149 # The same for statenames
150 elseif (strcmp (lowerarg, 'statenames'))
151 if (length (varargin{i + 1}) != nstate)
152 error ("hmmviterbi: number of statenames does not match number of states");
155 # Use the following argument as statenames
156 statenames = varargin{i + 1};
158 error ("hmmviterbi: expected 'symbols' or 'statenames' but found '%s'", varargin{i});
162 # Transform sequence from symbols to integers if necessary
164 # sequenceint is used to build the transformed sequence
165 sequenceint = zeros (1, len);
167 # Search for symbols(i) in the sequence, isequal will have 1 at
168 # corresponding indices; i is the right integer for that symbol
169 isequal = ismember (sequence, symbols(i));
170 # We do not want to change sequenceint if the symbol appears a second
172 if (any ((sequenceint == 0) & (isequal == 1)))
174 sequenceint += isequal;
177 if (! all (sequenceint))
178 index = max ((sequenceint == 0) .* (1:len));
179 error (["hmmviterbi: sequence(" int2str (index) ") not in symbols"]);
181 sequence = sequenceint;
183 if (! isvector (sequence) && ! isempty (sequence))
184 error ("hmmviterbi: sequence must be a vector");
186 if (! all (ismember (sequence, 1:noutput)))
187 index = max ((ismember (sequence, 1:noutput) == 0) .* (1:len));
188 error (["hmmviterbi: sequence(" int2str (index) ") out of range"]);
192 # Each row in transprob and outprob should contain probabilities
193 # => scale so that the sum is 1
194 # A zero row remains zero
196 s = sum (transprob, 2);
198 transprob = transprob ./ (s * ones (1, columns (transprob)));
200 s = sum (outprob, 2);
202 outsprob = outprob ./ (s * ones (1, columns (outprob)));
204 # Store the path starting from i in spath(i, :)
205 spath = ones (nstate, len + 1);
206 # Set the first state for each path
207 spath(:, 1) = (1:nstate)';
208 # Store the probability of path i in spathprob(i)
209 spathprob = transprob(1, :);
211 # Find the most likely paths for the given output sequence
213 # Calculate the new probabilities of the continuation with each state
214 nextpathprob = ((spathprob' .* outprob(:, sequence(i))) * ones (1, nstate)) .* transprob;
215 # Find the paths with the highest probabilities
216 [spathprob, mindex] = max (nextpathprob);
217 # Update spath and spathprob with the new paths
218 spath = spath(mindex, :);
219 spath(:, i + 1) = (1:nstate)';
222 # Set vpath to the most likely path
223 # We do not want the last state because we do not have an output for it
224 [m, mindex] = max (spathprob);
225 vpath = spath(mindex, 1:len);
227 # Transform vpath into statenames if requested
229 vpath = reshape (statenames(vpath), 1, len);
235 %! sequence = [1, 2, 1, 1, 1, 2, 2, 1, 2, 3, 3, 3, 3, 2, 3, 1, 1, 1, 1, 3, 3, 2, 3, 1, 3];
236 %! transprob = [0.8, 0.2; 0.4, 0.6];
237 %! outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1];
238 %! vpath = hmmviterbi (sequence, transprob, outprob);
239 %! expected = [1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1];
240 %! assert (vpath, expected);
243 %! sequence = {'A', 'B', 'A', 'A', 'A', 'B', 'B', 'A', 'B', 'C', 'C', 'C', 'C', 'B', 'C', 'A', 'A', 'A', 'A', 'C', 'C', 'B', 'C', 'A', 'C'};
244 %! transprob = [0.8, 0.2; 0.4, 0.6];
245 %! outprob = [0.2, 0.4, 0.4; 0.7, 0.2, 0.1];
246 %! symbols = {'A', 'B', 'C'};
247 %! statenames = {'One', 'Two'};
248 %! vpath = hmmviterbi (sequence, transprob, outprob, 'symbols', symbols, 'statenames', statenames);
249 %! expected = {'One', 'One', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'One', 'Two', 'Two', 'Two', 'Two', 'One', 'One', 'One', 'One', 'One', 'One'};
250 %! assert (vpath, expected);