X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?p=CreaPhase.git;a=blobdiff_plain;f=octave_packages%2Fstatistics-1.1.3%2Fkmeans.m;fp=octave_packages%2Fstatistics-1.1.3%2Fkmeans.m;h=5ac5ee4238de0fdef5f0c797d884c6d3dc2f4ed0;hp=0000000000000000000000000000000000000000;hb=f5f7a74bd8a4900f0b797da6783be80e11a68d86;hpb=1705066eceaaea976f010f669ce8e972f3734b05 diff --git a/octave_packages/statistics-1.1.3/kmeans.m b/octave_packages/statistics-1.1.3/kmeans.m new file mode 100644 index 0000000..5ac5ee4 --- /dev/null +++ b/octave_packages/statistics-1.1.3/kmeans.m @@ -0,0 +1,139 @@ +## Copyright (C) 2011 Soren Hauberg +## Copyright (C) 2012 Daniel Ward +## +## This program is free software; you can redistribute it and/or modify it under +## the terms of the GNU General Public License as published by the Free Software +## Foundation; either version 3 of the License, or (at your option) any later +## version. +## +## This program is distributed in the hope that it will be useful, but WITHOUT +## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +## details. +## +## You should have received a copy of the GNU General Public License along with +## this program; if not, see . + +## -*- texinfo -*- +## @deftypefn {Function File} {[@var{idx}, @var{centers}] =} kmeans (@var{data}, @var{k}, @var{param1}, @var{value1}, @dots{}) +## K-means clustering. +## +## @seealso{linkage} +## @end deftypefn + +function [classes, centers, sumd, D] = kmeans (data, k, varargin) + [reg, prop] = parseparams (varargin); + + ## defaults for options + emptyaction = "error"; + start = "sample"; + + #used for getting the number of samples + nRows = rows (data); + + ## used to hold the distances from each sample to each class + D = zeros (nRows, k); + + #used for convergence of the centroids + err = 1; + + #initial sum of distances + sumd = Inf; + + ## Input checking, validate the matrix and k + if (!isnumeric (data) || !ismatrix (data) || !isreal (data)) + error ("kmeans: first input argument must be a DxN real data matrix"); + elseif (!isscalar (k)) + error ("kmeans: second input argument must be a scalar"); + endif + + if (length (varargin) > 0) + ## check for the 'emptyaction' property + found = find (strcmpi (prop, "emptyaction") == 1); + switch (lower (prop{found+1})) + case "singleton" + emptyaction = "singleton"; + otherwise + error ("kmeans: unsupported empty cluster action parameter"); + endswitch + endif + + ## check for the 'start' property + switch (lower (start)) + case "sample" + idx = randperm (nRows) (1:k); + centers = data (idx, :); + otherwise + error ("kmeans: unsupported initial clustering parameter"); + endswitch + + ## Run the algorithm + while err > .001 + ## Compute distances + for i = 1:k + D (:, i) = sumsq (data - repmat (centers(i, :), nRows, 1), 2); + endfor + + ## Classify + [tmp, classes] = min (D, [], 2); + + ## Calcualte new centroids + for i = 1:k + ## Check for empty clusters + if (sum (classes == i) ==0 || length (mean (data(classes == i, :))) == 0) + + switch emptyaction + ## if 'singleton', then find the point that is the + ## farthest and add it to the empty cluster + case 'singleton' + classes(maxCostSampleIndex (data, centers(i,:))) = i; + ## if 'error' then throw the error + otherwise + error ("kmeans: empty cluster created"); + endswitch + endif ## end check for empty clusters + + ## update the centroids + centers(i, :) = mean (data(classes == i, :)); + endfor + + ## calculate the differnece in the sum of distances + err = sumd - objCost (data, classes, centers); + ## update the current sum of distances + sumd = objCost (data, classes, centers); + endwhile +endfunction + +## calculate the sum of distances +function obj = objCost (data, classes, centers) + obj = 0; + for i=1:rows (data) + obj = obj + sumsq (data(i,:) - centers(classes(i),:)); + endfor +endfunction + +function index = maxCostSampleIndex (data, centers) + cost = 0; + for index = 1:rows (data) + if cost < sumsq (data(index,:) - centers) + cost = sumsq (data(index,:) - centers); + endif + endfor +endfunction + +%!demo +%! ## Generate a two-cluster problem +%! C1 = randn (100, 2) + 1; +%! C2 = randn (100, 2) - 1; +%! data = [C1; C2]; +%! +%! ## Perform clustering +%! [idx, centers] = kmeans (data, 2); +%! +%! ## Plot the result +%! figure +%! plot (data (idx==1, 1), data (idx==1, 2), 'ro'); +%! hold on +%! plot (data (idx==2, 1), data (idx==2, 2), 'bs'); +%! plot (centers (:, 1), centers (:, 2), 'kv', 'markersize', 10); +%! hold off