octave_packages/dataframe-0.9.1/@dataframe/private/df_matassign.m

   1 function df = df_matassign(df, S, indc, ncol, RHS)
   2   %# auxiliary function: assign the dataframe as if it was a matrix
   3
   4   %% Copyright (C) 2009-2012 Pascal Dupuis <Pascal.Dupuis@uclouvain.be>
   5   %%
   6   %% This file is part of Octave.
   7   %%
   8   %% Octave is free software; you can redistribute it and/or
   9   %% modify it under the terms of the GNU General Public
  10   %% License as published by the Free Software Foundation;
  11   %% either version 2, or (at your option) any later version.
  12   %%
  13   %% Octave is distributed in the hope that it will be useful,
  14   %% but WITHOUT ANY WARRANTY; without even the implied
  15   %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  16   %% PURPOSE.  See the GNU General Public License for more
  17   %% details.
  18   %%
  19   %% You should have received a copy of the GNU General Public
  20   %% License along with Octave; see the file COPYING.  If not,
  21   %% write to the Free Software Foundation, 51 Franklin Street -
  22   %% Fifth Floor, Boston, MA 02110-1301, USA.
  23
  24   %#
  25   %# $Id: df_matassign.m 9615 2012-02-10 15:22:57Z cdemills $
  26   %#
  27
  28   if (isnull (RHS))
  29     if (1 == ncol)
  30       if (sum (~strcmp (S.subs, ':')) > 2)
  31         error("A null assignment can only have one non-colon index.");
  32       endif
  33     elseif (sum (~strcmp (S.subs, ':')) > 1)
  34       error("A null assignment can only have one non-colon index.");
  35     endif
  36
  37     if (strcmp (S.subs(1), ':'))  %# removing column/matrix
  38       RHS = S; RHS.subs(2) = [];
  39       for indi = (indc)
  40         unfolded  = df._data{indi}(:, df._rep{indi});
  41         unfolded  = feval (@subsasgn, unfolded, RHS, []);
  42         df._data{indi} = unfolded;
  43         if (~isempty (unfolded))
  44           df._rep(indi) = 1:size (unfolded, 2);
  45         endif
  46       endfor
  47       %# remove empty elements
  48       indi = cellfun ('isempty', df._data);
  49       if (any (indi)) %# nothing left, remove this column
  50         df._cnt(2) = df._cnt(2) - sum (indi);
  51         indi = ~indi; %# vector of kept data
  52         df._name{2} = df._name{2}(indi);
  53         df._over{2} = df._over{2}(indi);
  54         df._type = df._type(indi);
  55         df._data = df._data(indi);
  56         df._rep = df._rep(indi);
  57       endif
  58       if (size (df._ridx, 3) > 1)
  59         df._ridx(:, indc, :) = [];
  60       endif
  61     elseif (strcmp (S.subs(2), ':'))  %# removing rows
  62       indr = S.subs{1};
  63       if (~isempty (df._name{1}))
  64         df._name{1}(indr, :) = [];
  65         df._over{1}(indr) = [];
  66       endif
  67       df._ridx(indr, :, :) = [];
  68       %# to remove a line, iterate on each column
  69       df._data = cellfun (@(x) feval(@subsasgn, x, S, []), \
  70                          df._data, "UniformOutPut", false);
  71       if (isa (indr, 'char'))
  72          df._cnt(1) = 0;
  73        else
  74          df._cnt(1) = df._cnt(1) - length (indr);
  75        endif
  76     endif
  77     df = df_thirddim (df);
  78     return;
  79   endif
  80
  81   indc_was_set = ~isempty (indc);
  82   if (~indc_was_set) %# initial dataframe was empty
  83     ncol = size (RHS, 2); indc = 1:ncol;
  84   endif
  85
  86   indr = S.subs{1, 1};
  87   indr_was_set = ~isempty (indr);
  88   %# initial dataframe was empty ?
  89   if (~indr_was_set || strcmp (indr, ':'))
  90     if (iscell (RHS))
  91       nrow = max (sum (cellfun ('size', RHS, 1)));
  92     else
  93       if (isvector (RHS))
  94         if (0 == df._cnt(1))
  95           nrow = size (RHS, 1);
  96         else
  97           nrow = df._cnt(1);  %# limit to df numbner of rows
  98         endif
  99       else
 100         %# deduce limit from RHS
 101         nrow = size (RHS, 1);
 102       endif
 103     endif
 104     indr = 1:nrow;
 105   elseif (~isempty (indr))
 106     if (~isnumeric (indr))
 107       %# translate row names to row index
 108       [indr, nrow] = df_name2idx (df._name{1}, indr, df._cnt(1), 'row');
 109       S.subs{1, 1} = indr;
 110     else
 111       nrow = length (indr);
 112     endif
 113   endif
 114   if (length (S.subs) > 2)
 115     inds = S.subs{1, 3};
 116   else
 117     inds = [];
 118   endif
 119
 120   rname = cell(0, 0); rname_width = max (1, size (df._name{2}, 2));
 121   ridx = []; cname = rname; ctype = rname;
 122
 123   if (iscell (RHS))
 124     if ((length (indc) == df._cnt(2) && size (RHS, 2) >=  df._cnt(2)) \
 125         || 0 == df._cnt(2) || isempty (S.subs{1}) || isempty (S.subs{2}))
 126       %# providing too much information -- remove extra content
 127       if (size (RHS, 1) > 1)
 128         %# at this stage, verify that the first line doesn't contain
 129         %# chars only; use them for column names
 130         dummy = cellfun ('class', \
 131                          RHS(1, ~cellfun ('isempty', RHS(1, :))), \
 132                          'UniformOutput', false);
 133         dummy = strcmp (dummy, 'char');
 134         if (all (dummy))
 135           if (length (df._over{2}) >= max (indc) \
 136                 && ~all (df._over{2}(indc)) && ~isempty (S.subs{2}))
 137             warning("Trying to overwrite colum names");
 138           endif
 139
 140           cname = RHS(1, :).'; RHS = RHS(2:end, :);
 141           if (~indr_was_set)
 142             nrow = nrow - 1; indr = 1:nrow;
 143           else
 144             %# we know indr, there is no reason that RHS(:, 1) contains
 145             %# row names.
 146             if (isempty (S.subs{2}))
 147               %# extract columns position from columns names
 148               [indc, ncol,  S.subs{2}, dummy] = ...
 149                   df_name2idx (df._name{2}, cname, df._cnt(2), 'column');
 150               if (length (dummy) ~= sum (dummy))
 151                 warning ("Not all RHS column names used");
 152                 cname = cname(dummy); RHS = RHS(:, dummy);
 153               endif
 154             endif
 155           endif
 156         endif
 157         %# at this stage, verify that the first line doesn't contain
 158         %# chars only; use them for column types
 159         dummy = cellfun ('class', \
 160                         RHS(1, ~cellfun ('isempty', RHS(1, :))), \
 161                         'UniformOutput', false);
 162         dummy = strcmp (dummy, 'char');
 163         if (all (dummy))
 164           if (length (df._over{2}) >= max (indc) \
 165                 && ~all (df._over{2}(indc)))
 166             warning ("Trying to overwrite colum names");
 167           endif
 168
 169           ctype = RHS(1, :); RHS = RHS(2:end, :);
 170           if (~indr_was_set)
 171             nrow = nrow - 1; indr = 1:nrow;
 172           endif
 173         endif
 174       endif
 175
 176       %# more elements than df width -- try to use the first two as
 177       %# row index and/or row name
 178       if (size (RHS, 1) > 1)
 179         dummy = all (cellfun ('isnumeric', \
 180                             RHS(~cellfun ('isempty', RHS(:, 1)), 1)));
 181       else
 182         dummy =  isnumeric(RHS{1, 1});
 183       endif
 184       dummy = dummy && (~isempty (cname) && size (cname{1}, 2) < 1);
 185       if (dummy)
 186         ridx = cell2mat (RHS(:, 1));
 187         %# can it be converted to a list of unique numbers ?
 188         if (length (unique (ridx)) == length (ridx))
 189           ridx = RHS(:, 1); RHS = RHS(:, 2:end);
 190           if (length (df._name{2}) == df._cnt(2) + ncol)
 191             %# columns name were pre-filled with too much values
 192             df._name{2}(end) = [];
 193             df._over{2}(end) = [];
 194             if (size (RHS, 2) < ncol)
 195               ncol = size (RHS, 2); indc = 1:ncol;
 196             endif
 197           elseif (~indc_was_set)
 198             ncol = ncol - 1;  indc = 1:ncol;
 199           endif
 200           if (~isempty (cname)) cname = cname(2:end); endif
 201           if (~isempty (ctype)) ctype = ctype(2:end); endif
 202         else
 203           ridx = [];
 204         endif
 205       endif
 206
 207       if (size (RHS, 2) >  df._cnt(2))
 208         %# verify the the first row doesn't contain chars only, use them
 209         %# for row names
 210         dummy = cellfun ('class', \
 211                         RHS(~cellfun ('isempty', RHS(:, 1)), 1), \
 212                         'UniformOutput', false);
 213         dummy = strcmp (dummy, 'char') \
 214             && (~isempty (cname) && size (cname{1}, 2) < 1);
 215         if (all (dummy))
 216           if (length (df._over{1}) >= max (indr) \
 217                 && ~all (df._over{1}(indr)))
 218             warning("Trying to overwrite row names");
 219           else
 220             rname = RHS(:, 1);
 221           endif
 222           rname_width = max ([1; cellfun('size', rname, 2)]);
 223           RHS = RHS(:, 2:end);
 224           if (length (df._name{2}) == df._cnt(2) + ncol)
 225             %# columns name were pre-filled with too much values
 226             df._name{2}(end) = [];
 227             df._over{2}(end) = [];
 228             if (size (RHS, 2) < ncol)
 229               ncol = size (RHS, 2); indc = 1:ncol;
 230             endif
 231           elseif (~indc_was_set)
 232             ncol = ncol - 1;  indc = 1:ncol;
 233           endif
 234           if (~isempty (cname)) cname = cname(2:end); endif
 235           if (~isempty (ctype)) ctype = ctype(2:end); endif
 236         endif
 237       endif
 238     endif
 239   endif
 240
 241   %# perform row resizing if columns are already filled
 242   if (~isempty (indr) && isnumeric(indr))
 243     if (max (indr) > df._cnt(1) && size (df._data, 2) == df._cnt(2))
 244       df = df_pad (df, 1, max (indr)-df._cnt(1), rname_width);
 245     endif
 246   endif
 247
 248   if (iscell(RHS)) %# we must pad on a column-by-column basis
 249     %# verify that each cell contains a non-empty vector, and that sizes
 250     %# are compatible
 251     %# dummy = cellfun ('size', RHS(:), 2);
 252     %# if any (dummy < 1),
 253     %#   error("cells content may not be empty");
 254     %# endif
 255
 256     %# dummy = cellfun ('size', RHS, 1);
 257     %# if any (dummy < 1),
 258     %#   error("cells content may not be empty");
 259     %# endif
 260     %# if any (diff(dummy) > 0),
 261     %#   error("cells content with unequal length");
 262     %# endif
 263     %# if 1 < size (RHS, 1) && any (dummy > 1),
 264     %#   error("cells may only contain scalar");
 265     %# endif
 266
 267     if (size(RHS, 2) > indc)
 268       keyboard
 269     endif
 270
 271     %# the real assignement
 272     if (1 == size (RHS, 1)) %# each cell contains one vector
 273       fillfunc = @(x) RHS{x};
 274       idxOK = logical(indr);
 275     else %# use cell2mat to pad on a column-by-column basis
 276       fillfunc = @(x) cell2mat (RHS(:, x));
 277     endif
 278
 279     indj = 1;
 280     for indi = (1:ncol)
 281       if (indc(indi) > df._cnt(2))
 282         %# perform dynamic resizing one-by-one, to get type right
 283         if (isempty (ctype) || length (ctype) < indc(indi))
 284           df = df_pad(df, 2, indc(indi)-df._cnt(2), class(RHS{1, indj}));
 285         else
 286           df = df_pad(df, 2, indc(indi)-df._cnt(2), ctype{indj});
 287         endif
 288       endif
 289       if (nrow == df._cnt(1))
 290         %# whole assignement
 291         try
 292           if (size (RHS, 1) <= 1)
 293             switch df._type{indc(indi)}
 294               case {'char' } %# use a cell array to hold strings
 295                 dummy = RHS(:, indj);
 296               case {'double' }
 297                 dummy = fillfunc (indj);
 298               otherwise
 299                 dummy = cast(fillfunc (indj), df._type{indc(indi)});
 300             endswitch
 301           else
 302             %# keeps indexes in sync as cell elements may be empty
 303             idxOK = ~cellfun ('isempty', RHS(:, indj));
 304             %# intialise dummy so that it can receive "anything"
 305             dummy = [];
 306             switch (df._type{indc(indi)})
 307               case {'char' } %# use a cell array to hold strings
 308                 dummy = RHS(:, indj);
 309               case {'double' }
 310                 dummy(idxOK, :) = fillfunc (indj); dummy(~idxOK, :) = NA;
 311               otherwise
 312                 dummy(idxOK, :) = fillfunc (indj); dummy(~idxOK, :) = NA;
 313                 dummy = cast(dummy, df._type{indc(indi)});
 314             endswitch
 315           endif
 316         catch
 317           dummy = \
 318               sprintf ("Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s", \
 319                        indj, df._type{indc(indi)}, length (indr), disp (RHS(:, indj)));
 320           error (dummy);
 321         end_try_catch
 322         if (size (dummy, 1) < df._cnt(1))
 323           dummy(end+1:df._cnt(1), :) = NA;
 324         endif
 325       else
 326         %# partial assignement -- extract actual data and update
 327         dummy = df._data{indc(indi)};
 328         try
 329           switch (df._type{indc(indi)})
 330             case {'char' } %# use a cell array to hold strings
 331               dummy(indr, 1) = RHS(:, indj);
 332             case {'double' }
 333               dummy(indr, :) = fillfunc (indj);
 334             otherwise
 335               dummy(indr, :) = cast(fillfunc (indj), df._type{indc(indi)});
 336           endswitch
 337         catch
 338           dummy = \
 339               sprintf ("Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s", \
 340                        indj, df._type{indc(indi)}, length (indr), disp(RHS(:, indj)));
 341           error (dummy);
 342         end_try_catch
 343       endif
 344       df._data{indc(indi)} = dummy; df._rep{indc(indi)} = 1:size (dummy, 2);
 345       indj = indj + 1;
 346     endfor
 347
 348   else
 349     %# RHS is either a numeric, either a df
 350     if (any (indc > min (size (df._data, 2), df._cnt(2))))
 351       df = df_pad(df, 2, max (indc-min (size (df._data, 2), df._cnt(2))),\
 352                    class(RHS));
 353     endif
 354     if (~isempty (inds) && isnumeric(inds) && any (inds > 1))
 355       for indi = (1:length (indc))
 356         if (max (inds) > length (df._rep{indc(indi)}))
 357           df = df_pad(df, 3, max (inds)-length (df._rep{indc(indi)}), \
 358                       indc(indi));
 359         endif
 360       endfor
 361     endif
 362
 363     if (isa (RHS, 'dataframe'))
 364       %# block-copy index
 365       S.subs(2) = 1;
 366       if (any (~isna(RHS._ridx)))
 367         df._ridx = feval(@subsasgn,  df._ridx, S,  RHS._ridx);
 368       endif
 369       %# skip second dim and copy data
 370       S.subs(2) = []; Sorig = S;
 371       for indi = (1:length (indc))
 372         [df, S] = df_cow(df, S, indc(indi));
 373         if (strcmp (df._type(indc(indi)), RHS._type(indi)))
 374           try
 375             df._data{indc(indi)} = feval(@subsasgn, df._data{indc(indi)}, S, \
 376                                          RHS._data{indi}(:, RHS._rep{indi}));
 377           catch
 378             disp(lasterr()); disp('line 516 ???'); keyboard
 379           end_try_catch
 380         else
 381           df._data{indc(indi)} = feval(@subsasgn, df._data{indc(indi)}, S, \
 382                                        cast(RHS._data{indi}(:, RHS._rep{indi}),\
 383                                             df._type(indc(indi))));
 384         endif
 385         S = Sorig;
 386       endfor
 387       if (~isempty (RHS._name{1}))
 388         df._name{1}(indr) = genvarname(RHS._name{1}(indr));
 389         df._over{1}(indr) = RHS._over{1}(indr);
 390       endif
 391       if (~isempty (RHS._src))
 392         if (~any (strcmp (cellstr(df._src), cellstr(RHS._src))))
 393           df._src = vertcat(df._src, RHS._src);
 394         endif
 395       endif
 396       if (~isempty (RHS._cmt))
 397         if (~any (strcmp (cellstr(df._cmt), cellstr(RHS._cmt))))
 398           df._cmt = vertcat(df._cmt, RHS._cmt);
 399         endif
 400       endif
 401
 402     else
 403       %# RHS is homogenous, pad at once
 404       if (isvector (RHS)) %# scalar - vector
 405         if (isempty (S.subs))
 406           fillfunc = @(x, y) RHS;
 407         else
 408           %# ignore 'column' dimension -- force colum vectors -- use a
 409           %# third dim just in case
 410           if (isempty (S.subs{1})) S.subs{1} = ':'; endif
 411           S.subs(2) = [];
 412           if (length (S.subs) < 2)
 413             S.subs{2} = 1;
 414           endif
 415           if (length (indc) > 1 && length (RHS) > 1)
 416             %# set a row from a vector
 417             fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS(y));
 418           else
 419             fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS);
 420           endif
 421         endif
 422         Sorig = S;
 423         for indi = (1:length (indc))
 424           try
 425             [df, S] = df_cow(df, S, indc(indi));
 426             df._data{indc(indi)} = fillfunc (df._data{indc(indi)}, S, indi);
 427             S = Sorig;
 428           catch
 429             disp(lasterr)
 430             disp('line 470 '); keyboard
 431           end_try_catch
 432           # catch
 433           #   if ndims(df._data{indc(indi)}) > 2,
 434           #     %# upstream forgot to give the third dim
 435           #     dummy = S; dummy.subs(3) = 1;
 436           #     df._data{indc(indi)} = fillfunc(df._data{indc(indi)}, \
 437           #                                   dummy, indi);
 438           #   else
 439           #     rethrow(lasterr());
 440           #   endif
 441           # end_try_catch
 442         endfor
 443       else %# 2D - 3D matrix
 444         S.subs(2) = []; %# ignore 'column' dimension
 445         if (isempty (S.subs{1}))
 446           S.subs{1} = indr;
 447         endif
 448         %# rotate slices in dim 1-3 to slices in dim 1-2
 449         fillfunc = @(x, S, y) feval(@subsasgn, x, S, squeeze(RHS(:, y, :)));
 450         Sorig = S;
 451         for indi = (1:length (indc))
 452           [df, S] = df_cow(df, S, indc(indi));
 453           df._data{indc(indi)} = fillfunc (df._data{indc(indi)}, S, indi);
 454           S = Sorig;
 455         endfor
 456       endif
 457       if (indi < size (RHS, 2) && ~isa (RHS, 'char'))
 458         warning (' not all columns of RHS used');
 459       endif
 460     endif
 461   endif
 462
 463   %# delayed row padding -- column padding occured before
 464   if (~isempty (indr) && isnumeric (indr))
 465     if (max (indr) > df._cnt(1) && size (df._data, 2) < df._cnt(2))
 466       df = df_pad(df, 1, max (indr)-df._cnt(1), rname_width);
 467     endif
 468   endif
 469
 470   %# adjust ridx and rnames, if required
 471   if (~isempty (ridx))
 472     dummy = df._ridx;
 473     if (1 == size (RHS, 1))
 474       dummy(indr) = ridx{1};
 475     else
 476       dummy(indr) = vertcat(ridx{indr});
 477     endif
 478     if (length (unique (dummy)) ~= length (dummy)) %# || \
 479           %# any (diff(dummy) <= 0),
 480       error("row indexes are not unique or not ordered");
 481     endif
 482     df._ridx = dummy;
 483   endif
 484
 485   if (~isempty (rname) && (length (df._over{1}) < max (indr) || \
 486         all (df._over{1}(indr))))
 487     df._name{1}(indr, 1) = genvarname(rname);
 488     df._over{1}(1, indr) = false;
 489   endif
 490   if (~isempty (cname) && (length (df._over{2}) < max (indc) || \
 491         all (df._over{2}(indc))))
 492     try
 493       df._name{2}(indc, 1) = genvarname (cname);
 494     catch
 495       disp('line 472 '); keyboard
 496     end_try_catch
 497     df._over{2}(1, indc) = false;
 498   endif
 499
 500   df = df_thirddim (df);
 501
 502 endfunction