octave_packages/dataframe-0.9.1/@dataframe/summary.m

   1 function resu = summary(df)
   2   %# function resu = summary(df)
   3   %# This function prints a nice summary of a dataframe, on a
   4   %# colum-by-column basis. For continuous varaibles, returns basic
   5   %# statistics; for discrete one (char, factors, ...), returns the
   6   %# occurence count for each element.
   7
   8   %% Copyright (C) 2009-2012 Pascal Dupuis <Pascal.Dupuis@uclouvain.be>
   9   %%
  10   %% This file is part of Octave.
  11   %%
  12   %% Octave is free software; you can redistribute it and/or
  13   %% modify it under the terms of the GNU General Public
  14   %% License as published by the Free Software Foundation;
  15   %% either version 2, or (at your option) any later version.
  16   %%
  17   %% Octave is distributed in the hope that it will be useful,
  18   %% but WITHOUT ANY WARRANTY; without even the implied
  19   %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  20   %% PURPOSE.  See the GNU General Public License for more
  21   %% details.
  22   %%
  23   %% You should have received a copy of the GNU General Public
  24   %% License along with Octave; see the file COPYING.  If not,
  25   %% write to the Free Software Foundation, 51 Franklin Street -
  26   %% Fifth Floor, Boston, MA 02110-1301, USA.
  27
  28   %#
  29   %# $Id: summary.m 9585 2012-02-05 15:32:46Z cdemills $
  30   %#
  31
  32   dummy = df._type; resu = [];
  33
  34   for indi = 1:length(dummy),
  35     switch dummy{indi}
  36       case {'char' 'factor'}
  37         [sval, sidxi, sidxj] = unique(df._data{:, indi});
  38         %# compute their occurences
  39         sidxj = hist(sidxj, min(sidxj):max(sidxj));
  40         %# generate a column with unique values
  41         resuR = strjust(char(regexp(disp(sval), '\S.*', 'match', ...
  42                                     'dotexceptnewline')), 'right');
  43         resuR = horzcat(resuR, repmat(':', size(resuR, 1), 1),
  44                         strjust(char(regexp(disp(sidxj.'), '\b.*', 'match', ...
  45                                             'dotexceptnewline')), ...
  46                                 'right'));
  47         %# now put the name above all
  48         resuR = strjust([deblank(df._name{1, 2}(indi, :)); resuR], 'right');
  49         resuR = horzcat(resuR, repmat(' ', size(resuR, 1), 1));
  50         resu = horzcat_pad(resu, resuR);
  51
  52       otherwise
  53         s = statistics(df._data{:, indi});
  54         s = s([1:3 6 4:5]);
  55         %# generate a column with name and fields name
  56         resuR = strjust([deblank(df._name{1, 2}{indi, :});
  57                          "Min.   :"; "1st Qu.:";
  58                          "Median :"; "Mean   :";
  59                          "3rd Qu.:"; "Max.   :"], 'right');
  60         %# generate a column with a blank line and the values
  61         resuR = horzcat(resuR, repmat(' ', size(resuR, 1), 1),
  62                         strjust(char(' ', regexp(disp(s), '\S.*', 'match', ...
  63                                                  'dotexceptnewline')), 'right'),...
  64                         repmat(' ', size(resuR, 1), 1));
  65         resu = horzcat_pad(resu, resuR);
  66
  67     endswitch
  68   endfor
  69
  70 endfunction
  71
  72
  73 function resu = horzcat_pad(A, B)
  74   %# small auxiliary function to cat horizontally tables of different height
  75   dx = size(A, 1) - size(B, 1);
  76
  77   if dx < 0,
  78     %# pad A
  79     A = strvcat(A, repmat(' ', -dx, size(A, 2)));
  80   elseif dx > 0
  81     B = strvcat(B, repmat(' ', dx, size(B, 2)));
  82   endif
  83
  84   resu =  horzcat(A, B);
  85
  86 endfunction