1 ## Copyright (C) 2002 Alberto Terruzzi <t-albert@libero.it>
2 ## Copyright (C) 2006 Alberto Pose <apose@alu.itba.edu.ar>
3 ## Copyright (C) 2011 Pascal Dupuis <Pascal.Dupuis@worldonline.be>
4 ## Copyright (C) 2012 Juan Pablo Carbajal <carbajal@ifi.uzh.ch>
6 ## This program is free software; you can redistribute it and/or modify it under
7 ## the terms of the GNU General Public License as published by the Free Software
8 ## Foundation; either version 3 of the License, or (at your option) any later
11 ## This program is distributed in the hope that it will be useful, but WITHOUT
12 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 ## You should have received a copy of the GNU General Public License along with
17 ## this program; if not, see <http://www.gnu.org/licenses/>.
20 ## @deftypefn {Function File} {@var{s} =} boxplot (@var{data}, @var{notched}, @
21 ## @var{symbol}, @var{vertical}, @var{maxwhisker}, @dots{})
22 ## @deftypefnx {Function File} {[@dots{} @var{h}]=} boxplot (@dots{})
24 ## Produce a box plot.
26 ## The box plot is a graphical display that simultaneously describes several
27 ## important features of a data set, such as center, spread, departure from
28 ## symmetry, and identification of observations that lie unusually far from
29 ## the bulk of the data.
31 ## @var{data} is a matrix with one column for each data set, or data is a cell
32 ## vector with one cell for each data set.
34 ## @var{notched} = 1 produces a notched-box plot. Notches represent a robust
35 ## estimate of the uncertainty about the median.
37 ## @var{notched} = 0 (default) produces a rectangular box plot.
39 ## @var{notched} in (0,1) produces a notch of the specified depth.
40 ## notched values outside (0,1) are amusing if not exactly practical.
42 ## @var{symbol} sets the symbol for the outlier values, default symbol for
43 ## points that lie outside 3 times the interquartile range is 'o',
44 ## default symbol for points between 1.5 and 3 times the interquartile
47 ## @var{symbol} = '.' points between 1.5 and 3 times the IQR is marked with
48 ## '.' and points outside 3 times IQR with 'o'.
50 ## @var{symbol} = ['x','*'] points between 1.5 and 3 times the IQR is marked with
51 ## 'x' and points outside 3 times IQR with '*'.
53 ## @var{vertical} = 0 makes the boxes horizontal, by default @var{vertical} = 1.
55 ## @var{maxwhisker} defines the length of the whiskers as a function of the IQR
56 ## (default = 1.5). If @var{maxwhisker} = 0 then @code{boxplot} displays all data
57 ## values outside the box using the plotting symbol for points that lie
58 ## outside 3 times the IQR.
60 ## Supplemental arguments are concatenated and passed to plot.
62 ## The returned matrix @var{s} has one column for each data set as follows:
64 ## @multitable @columnfractions .1 .8
65 ## @item 1 @tab Minimum
66 ## @item 2 @tab 1st quartile
67 ## @item 3 @tab 2nd quartile (median)
68 ## @item 4 @tab 3rd quartile
69 ## @item 5 @tab Maximum
70 ## @item 6 @tab Lower confidence limit for median
71 ## @item 7 @tab Upper confidence limit for median
74 ## The returned structure @var{h} has hanldes to the plot elements, allowing
75 ## customization of the visualization using set/get functions.
80 ## title ("Grade 3 heights");
82 ## tics ("x", 1:2, @{"girls"; "boys"@});
83 ## boxplot (@{randn(10,1)*5+140, randn(13,1)*8+135@});
88 ## Author: Alberto Terruzzi <t-albert@libero.it>
90 ## Created: 6 January 2002
93 ## Author: Alberto Pose <apose@alu.itba.edu.ar>
94 ## Updated: 3 September 2006
95 ## - Replaced deprecated is_nan_or_na(X) with (isnan(X) | isna(X))
96 ## (now works with this software 2.9.7 and foward)
99 ## Author: Pascal Dupuis <Pascal.Dupuis@worldonline.be>
100 ## Updated: 14 October 2011
101 ## - Added support for named arguments
104 ## Author: Juan Pablo Carbajal <carbajal@ifi.uzh.ch>
105 ## Updated: 01 March 2012
106 ## - Returns structure with handles to plot elements
107 ## - Added example as demo
109 %# function s = boxplot (data,notched,symbol,vertical,maxwhisker)
110 function [s hs] = boxplot (data, varargin)
112 ## assign parameter defaults
124 %# Optional arguments analysis
126 option_args = ['Notch'; 'Symbol'; 'Vertical'; 'Maxwhisker'];
129 dummy = varargin{indopt++};
131 %# old way: positional argument
140 error("No positional argument allowed at position %d", --indopt);
144 if (3 == indopt && length (dummy) <= 2)
145 symbol = dummy; numarg--; continue;
147 tt = strmatch(dummy, option_args);
150 notched = varargin{indopt};
152 symbol = varargin{indopt};
154 vertical = varargin{indopt};
156 maxwhisker = varargin{indopt};
158 %# take two args and append them to plot_opts
159 plot_opts(1, end+1:end+2) = {dummy, varargin{indopt}};
162 indopt++; numarg -= 2;
166 if (1 == length (symbol)) symbol(2) = symbol(1); endif
168 if (1 == notched) notched = 0.25; endif
171 ## figure out how many data sets we have
175 if (isvector (data)) data = data(:); endif
179 ## compute statistics
182 ## 2,3,4 1st, 2nd and 3rd quartile
183 ## 6,7 lower and upper confidence intervals for median
186 whisker_x = ones (2,1)*[1:nc,1:nc];
187 whisker_y = zeros (2,2*nc);
194 ## Get the next data set from the array or cell array
201 col(isnan (col) | isna (col)) = [];
202 ## Remember the data length
206 ## min,max and quartiles
207 s(1:5, indi) = statistics (col)(1:5);
208 ## confidence interval for the median
209 est = 1.57*(s(4, indi)-s(2, indi))/sqrt (nd);
210 s(6, indi) = max ([s(3, indi)-est, s(2, indi)]);
211 s(7, indi) = min ([s(3, indi)+est, s(4, indi)]);
212 ## whiskers out to the last point within the desired inter-quartile range
213 IQR = maxwhisker*(s(4, indi)-s(2, indi));
214 whisker_y(:, indi) = [min(col(col >= s(2, indi)-IQR)); s(2, indi)];
215 whisker_y(:,nc+indi) = [max(col(col <= s(4, indi)+IQR)); s(4, indi)];
216 ## outliers beyond 1 and 2 inter-quartile ranges
217 outliers = col((col < s(2, indi)-IQR & col >= s(2, indi)-2*IQR) | (col > s(4, indi)+IQR & col <= s(4, indi)+2*IQR));
218 outliers2 = col(col < s(2, indi)-2*IQR | col > s(4, indi)+2*IQR);
219 outliers_x = [outliers_x; indi*ones(size(outliers))];
220 outliers_y = [outliers_y; outliers];
221 outliers2_x = [outliers2_x; indi*ones(size(outliers2))];
222 outliers2_y = [outliers2_y; outliers2];
224 ## all statistics collapse to the value of the point
226 ## single point data sets are plotted as outliers.
227 outliers_x = [outliers_x; indi];
228 outliers_y = [outliers_y; col];
230 ## no statistics if no points
235 ## Note which boxes don't have enough stats
236 chop = find (box <= 1);
238 ## Draw a box around the quartiles, with width proportional to the number of
239 ## items in the box. Draw notches if desired.
240 box *= 0.4/max (box);
241 quartile_x = ones (11,1)*[1:nc] + [-a;-1;-1;1;1;a;1;1;-1;-1;-a]*box;
242 quartile_y = s([3,7,4,4,7,3,6,2,2,6,3],:);
244 ## Draw a line through the median
245 median_x = ones (2,1)*[1:nc] + [-a;+a]*box;
246 median_y = s([3,3],:);
248 ## Chop all boxes which don't have enough stats
249 quartile_x(:, chop) = [];
250 quartile_y(:, chop) = [];
251 whisker_x(:,[chop, chop+nc]) = [];
252 whisker_y(:,[chop, chop+nc]) = [];
253 median_x(:, chop) = [];
254 median_y(:, chop) = [];
256 ## Add caps to the remaining whiskers
260 cap_y = whisker_y([1, 1], :);
262 #quartile_x,quartile_y
269 if (isempty (plot_opts))
270 h = plot (quartile_x, quartile_y, "b;;",
271 whisker_x, whisker_y, "b;;",
273 median_x, median_y, "r;;",
274 outliers_x, outliers_y, [symbol(1), "r;;"],
275 outliers2_x, outliers2_y, [symbol(2), "r;;"]);
277 h = plot (quartile_x, quartile_y, "b;;",
278 whisker_x, whisker_y, "b;;",
280 median_x, median_y, "r;;",
281 outliers_x, outliers_y, [symbol(1), "r;;"],
282 outliers2_x, outliers2_y, [symbol(2), "r;;"], plot_opts{:});
285 if (isempty (plot_opts))
286 h = plot (quartile_y, quartile_x, "b;;",
287 whisker_y, whisker_x, "b;;",
289 median_y, median_x, "r;;",
290 outliers_y, outliers_x, [symbol(1), "r;;"],
291 outliers2_y, outliers2_x, [symbol(2), "r;;"]);
293 h = plot (quartile_y, quartile_x, "b;;",
294 whisker_y, whisker_x, "b;;",
296 median_y, median_x, "r;;",
297 outliers_y, outliers_x, [symbol(1), "r;;"],
298 outliers2_y, outliers2_x, [symbol(2), "r;;"], plot_opts{:});
303 nq = 1:size(quartile_x,2);
305 nw = nq(end) + [1:2*size(whisker_x,2)];
307 nm = nw(end)+ [1:size(median_x,2)];
310 if ~isempty (outliers_y)
311 no = nm(end) + [1:size(outliers_y,2)];
314 if ~isempty (outliers2_y)
315 no2 = no(end) + [1:size(outliers2_y,2)];
316 hs.outliers2 = h(no2);
322 %! title ("Grade 3 heights");
324 %! tics ("x", 1:2, {"girls"; "boys"});
325 %! boxplot ({randn(10,1)*5+140, randn(13,1)*8+135});