X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?a=blobdiff_plain;ds=sidebyside;f=octave_packages%2Fdataframe-0.9.1%2F%40dataframe%2Fprivate%2Fdf_matassign.m;fp=octave_packages%2Fdataframe-0.9.1%2F%40dataframe%2Fprivate%2Fdf_matassign.m;h=ae639d76e2aae34bc0ba9230a20dd44e4aaf2e88;hb=f5f7a74bd8a4900f0b797da6783be80e11a68d86;hp=0000000000000000000000000000000000000000;hpb=1705066eceaaea976f010f669ce8e972f3734b05;p=CreaPhase.git diff --git a/octave_packages/dataframe-0.9.1/@dataframe/private/df_matassign.m b/octave_packages/dataframe-0.9.1/@dataframe/private/df_matassign.m new file mode 100644 index 0000000..ae639d7 --- /dev/null +++ b/octave_packages/dataframe-0.9.1/@dataframe/private/df_matassign.m @@ -0,0 +1,502 @@ +function df = df_matassign(df, S, indc, ncol, RHS) + %# auxiliary function: assign the dataframe as if it was a matrix + + %% Copyright (C) 2009-2012 Pascal Dupuis + %% + %% This file is part of Octave. + %% + %% Octave is free software; you can redistribute it and/or + %% modify it under the terms of the GNU General Public + %% License as published by the Free Software Foundation; + %% either version 2, or (at your option) any later version. + %% + %% Octave is distributed in the hope that it will be useful, + %% but WITHOUT ANY WARRANTY; without even the implied + %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + %% PURPOSE. See the GNU General Public License for more + %% details. + %% + %% You should have received a copy of the GNU General Public + %% License along with Octave; see the file COPYING. If not, + %% write to the Free Software Foundation, 51 Franklin Street - + %% Fifth Floor, Boston, MA 02110-1301, USA. + + %# + %# $Id: df_matassign.m 9615 2012-02-10 15:22:57Z cdemills $ + %# + + if (isnull (RHS)) + if (1 == ncol) + if (sum (~strcmp (S.subs, ':')) > 2) + error("A null assignment can only have one non-colon index."); + endif + elseif (sum (~strcmp (S.subs, ':')) > 1) + error("A null assignment can only have one non-colon index."); + endif + + if (strcmp (S.subs(1), ':')) %# removing column/matrix + RHS = S; RHS.subs(2) = []; + for indi = (indc) + unfolded = df._data{indi}(:, df._rep{indi}); + unfolded = feval (@subsasgn, unfolded, RHS, []); + df._data{indi} = unfolded; + if (~isempty (unfolded)) + df._rep(indi) = 1:size (unfolded, 2); + endif + endfor + %# remove empty elements + indi = cellfun ('isempty', df._data); + if (any (indi)) %# nothing left, remove this column + df._cnt(2) = df._cnt(2) - sum (indi); + indi = ~indi; %# vector of kept data + df._name{2} = df._name{2}(indi); + df._over{2} = df._over{2}(indi); + df._type = df._type(indi); + df._data = df._data(indi); + df._rep = df._rep(indi); + endif + if (size (df._ridx, 3) > 1) + df._ridx(:, indc, :) = []; + endif + elseif (strcmp (S.subs(2), ':')) %# removing rows + indr = S.subs{1}; + if (~isempty (df._name{1})) + df._name{1}(indr, :) = []; + df._over{1}(indr) = []; + endif + df._ridx(indr, :, :) = []; + %# to remove a line, iterate on each column + df._data = cellfun (@(x) feval(@subsasgn, x, S, []), \ + df._data, "UniformOutPut", false); + if (isa (indr, 'char')) + df._cnt(1) = 0; + else + df._cnt(1) = df._cnt(1) - length (indr); + endif + endif + df = df_thirddim (df); + return; + endif + + indc_was_set = ~isempty (indc); + if (~indc_was_set) %# initial dataframe was empty + ncol = size (RHS, 2); indc = 1:ncol; + endif + + indr = S.subs{1, 1}; + indr_was_set = ~isempty (indr); + %# initial dataframe was empty ? + if (~indr_was_set || strcmp (indr, ':')) + if (iscell (RHS)) + nrow = max (sum (cellfun ('size', RHS, 1))); + else + if (isvector (RHS)) + if (0 == df._cnt(1)) + nrow = size (RHS, 1); + else + nrow = df._cnt(1); %# limit to df numbner of rows + endif + else + %# deduce limit from RHS + nrow = size (RHS, 1); + endif + endif + indr = 1:nrow; + elseif (~isempty (indr)) + if (~isnumeric (indr)) + %# translate row names to row index + [indr, nrow] = df_name2idx (df._name{1}, indr, df._cnt(1), 'row'); + S.subs{1, 1} = indr; + else + nrow = length (indr); + endif + endif + if (length (S.subs) > 2) + inds = S.subs{1, 3}; + else + inds = []; + endif + + rname = cell(0, 0); rname_width = max (1, size (df._name{2}, 2)); + ridx = []; cname = rname; ctype = rname; + + if (iscell (RHS)) + if ((length (indc) == df._cnt(2) && size (RHS, 2) >= df._cnt(2)) \ + || 0 == df._cnt(2) || isempty (S.subs{1}) || isempty (S.subs{2})) + %# providing too much information -- remove extra content + if (size (RHS, 1) > 1) + %# at this stage, verify that the first line doesn't contain + %# chars only; use them for column names + dummy = cellfun ('class', \ + RHS(1, ~cellfun ('isempty', RHS(1, :))), \ + 'UniformOutput', false); + dummy = strcmp (dummy, 'char'); + if (all (dummy)) + if (length (df._over{2}) >= max (indc) \ + && ~all (df._over{2}(indc)) && ~isempty (S.subs{2})) + warning("Trying to overwrite colum names"); + endif + + cname = RHS(1, :).'; RHS = RHS(2:end, :); + if (~indr_was_set) + nrow = nrow - 1; indr = 1:nrow; + else + %# we know indr, there is no reason that RHS(:, 1) contains + %# row names. + if (isempty (S.subs{2})) + %# extract columns position from columns names + [indc, ncol, S.subs{2}, dummy] = ... + df_name2idx (df._name{2}, cname, df._cnt(2), 'column'); + if (length (dummy) ~= sum (dummy)) + warning ("Not all RHS column names used"); + cname = cname(dummy); RHS = RHS(:, dummy); + endif + endif + endif + endif + %# at this stage, verify that the first line doesn't contain + %# chars only; use them for column types + dummy = cellfun ('class', \ + RHS(1, ~cellfun ('isempty', RHS(1, :))), \ + 'UniformOutput', false); + dummy = strcmp (dummy, 'char'); + if (all (dummy)) + if (length (df._over{2}) >= max (indc) \ + && ~all (df._over{2}(indc))) + warning ("Trying to overwrite colum names"); + endif + + ctype = RHS(1, :); RHS = RHS(2:end, :); + if (~indr_was_set) + nrow = nrow - 1; indr = 1:nrow; + endif + endif + endif + + %# more elements than df width -- try to use the first two as + %# row index and/or row name + if (size (RHS, 1) > 1) + dummy = all (cellfun ('isnumeric', \ + RHS(~cellfun ('isempty', RHS(:, 1)), 1))); + else + dummy = isnumeric(RHS{1, 1}); + endif + dummy = dummy && (~isempty (cname) && size (cname{1}, 2) < 1); + if (dummy) + ridx = cell2mat (RHS(:, 1)); + %# can it be converted to a list of unique numbers ? + if (length (unique (ridx)) == length (ridx)) + ridx = RHS(:, 1); RHS = RHS(:, 2:end); + if (length (df._name{2}) == df._cnt(2) + ncol) + %# columns name were pre-filled with too much values + df._name{2}(end) = []; + df._over{2}(end) = []; + if (size (RHS, 2) < ncol) + ncol = size (RHS, 2); indc = 1:ncol; + endif + elseif (~indc_was_set) + ncol = ncol - 1; indc = 1:ncol; + endif + if (~isempty (cname)) cname = cname(2:end); endif + if (~isempty (ctype)) ctype = ctype(2:end); endif + else + ridx = []; + endif + endif + + if (size (RHS, 2) > df._cnt(2)) + %# verify the the first row doesn't contain chars only, use them + %# for row names + dummy = cellfun ('class', \ + RHS(~cellfun ('isempty', RHS(:, 1)), 1), \ + 'UniformOutput', false); + dummy = strcmp (dummy, 'char') \ + && (~isempty (cname) && size (cname{1}, 2) < 1); + if (all (dummy)) + if (length (df._over{1}) >= max (indr) \ + && ~all (df._over{1}(indr))) + warning("Trying to overwrite row names"); + else + rname = RHS(:, 1); + endif + rname_width = max ([1; cellfun('size', rname, 2)]); + RHS = RHS(:, 2:end); + if (length (df._name{2}) == df._cnt(2) + ncol) + %# columns name were pre-filled with too much values + df._name{2}(end) = []; + df._over{2}(end) = []; + if (size (RHS, 2) < ncol) + ncol = size (RHS, 2); indc = 1:ncol; + endif + elseif (~indc_was_set) + ncol = ncol - 1; indc = 1:ncol; + endif + if (~isempty (cname)) cname = cname(2:end); endif + if (~isempty (ctype)) ctype = ctype(2:end); endif + endif + endif + endif + endif + + %# perform row resizing if columns are already filled + if (~isempty (indr) && isnumeric(indr)) + if (max (indr) > df._cnt(1) && size (df._data, 2) == df._cnt(2)) + df = df_pad (df, 1, max (indr)-df._cnt(1), rname_width); + endif + endif + + if (iscell(RHS)) %# we must pad on a column-by-column basis + %# verify that each cell contains a non-empty vector, and that sizes + %# are compatible + %# dummy = cellfun ('size', RHS(:), 2); + %# if any (dummy < 1), + %# error("cells content may not be empty"); + %# endif + + %# dummy = cellfun ('size', RHS, 1); + %# if any (dummy < 1), + %# error("cells content may not be empty"); + %# endif + %# if any (diff(dummy) > 0), + %# error("cells content with unequal length"); + %# endif + %# if 1 < size (RHS, 1) && any (dummy > 1), + %# error("cells may only contain scalar"); + %# endif + + if (size(RHS, 2) > indc) + keyboard + endif + + %# the real assignement + if (1 == size (RHS, 1)) %# each cell contains one vector + fillfunc = @(x) RHS{x}; + idxOK = logical(indr); + else %# use cell2mat to pad on a column-by-column basis + fillfunc = @(x) cell2mat (RHS(:, x)); + endif + + indj = 1; + for indi = (1:ncol) + if (indc(indi) > df._cnt(2)) + %# perform dynamic resizing one-by-one, to get type right + if (isempty (ctype) || length (ctype) < indc(indi)) + df = df_pad(df, 2, indc(indi)-df._cnt(2), class(RHS{1, indj})); + else + df = df_pad(df, 2, indc(indi)-df._cnt(2), ctype{indj}); + endif + endif + if (nrow == df._cnt(1)) + %# whole assignement + try + if (size (RHS, 1) <= 1) + switch df._type{indc(indi)} + case {'char' } %# use a cell array to hold strings + dummy = RHS(:, indj); + case {'double' } + dummy = fillfunc (indj); + otherwise + dummy = cast(fillfunc (indj), df._type{indc(indi)}); + endswitch + else + %# keeps indexes in sync as cell elements may be empty + idxOK = ~cellfun ('isempty', RHS(:, indj)); + %# intialise dummy so that it can receive "anything" + dummy = []; + switch (df._type{indc(indi)}) + case {'char' } %# use a cell array to hold strings + dummy = RHS(:, indj); + case {'double' } + dummy(idxOK, :) = fillfunc (indj); dummy(~idxOK, :) = NA; + otherwise + dummy(idxOK, :) = fillfunc (indj); dummy(~idxOK, :) = NA; + dummy = cast(dummy, df._type{indc(indi)}); + endswitch + endif + catch + dummy = \ + sprintf ("Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s", \ + indj, df._type{indc(indi)}, length (indr), disp (RHS(:, indj))); + error (dummy); + end_try_catch + if (size (dummy, 1) < df._cnt(1)) + dummy(end+1:df._cnt(1), :) = NA; + endif + else + %# partial assignement -- extract actual data and update + dummy = df._data{indc(indi)}; + try + switch (df._type{indc(indi)}) + case {'char' } %# use a cell array to hold strings + dummy(indr, 1) = RHS(:, indj); + case {'double' } + dummy(indr, :) = fillfunc (indj); + otherwise + dummy(indr, :) = cast(fillfunc (indj), df._type{indc(indi)}); + endswitch + catch + dummy = \ + sprintf ("Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s", \ + indj, df._type{indc(indi)}, length (indr), disp(RHS(:, indj))); + error (dummy); + end_try_catch + endif + df._data{indc(indi)} = dummy; df._rep{indc(indi)} = 1:size (dummy, 2); + indj = indj + 1; + endfor + + else + %# RHS is either a numeric, either a df + if (any (indc > min (size (df._data, 2), df._cnt(2)))) + df = df_pad(df, 2, max (indc-min (size (df._data, 2), df._cnt(2))),\ + class(RHS)); + endif + if (~isempty (inds) && isnumeric(inds) && any (inds > 1)) + for indi = (1:length (indc)) + if (max (inds) > length (df._rep{indc(indi)})) + df = df_pad(df, 3, max (inds)-length (df._rep{indc(indi)}), \ + indc(indi)); + endif + endfor + endif + + if (isa (RHS, 'dataframe')) + %# block-copy index + S.subs(2) = 1; + if (any (~isna(RHS._ridx))) + df._ridx = feval(@subsasgn, df._ridx, S, RHS._ridx); + endif + %# skip second dim and copy data + S.subs(2) = []; Sorig = S; + for indi = (1:length (indc)) + [df, S] = df_cow(df, S, indc(indi)); + if (strcmp (df._type(indc(indi)), RHS._type(indi))) + try + df._data{indc(indi)} = feval(@subsasgn, df._data{indc(indi)}, S, \ + RHS._data{indi}(:, RHS._rep{indi})); + catch + disp(lasterr()); disp('line 516 ???'); keyboard + end_try_catch + else + df._data{indc(indi)} = feval(@subsasgn, df._data{indc(indi)}, S, \ + cast(RHS._data{indi}(:, RHS._rep{indi}),\ + df._type(indc(indi)))); + endif + S = Sorig; + endfor + if (~isempty (RHS._name{1})) + df._name{1}(indr) = genvarname(RHS._name{1}(indr)); + df._over{1}(indr) = RHS._over{1}(indr); + endif + if (~isempty (RHS._src)) + if (~any (strcmp (cellstr(df._src), cellstr(RHS._src)))) + df._src = vertcat(df._src, RHS._src); + endif + endif + if (~isempty (RHS._cmt)) + if (~any (strcmp (cellstr(df._cmt), cellstr(RHS._cmt)))) + df._cmt = vertcat(df._cmt, RHS._cmt); + endif + endif + + else + %# RHS is homogenous, pad at once + if (isvector (RHS)) %# scalar - vector + if (isempty (S.subs)) + fillfunc = @(x, y) RHS; + else + %# ignore 'column' dimension -- force colum vectors -- use a + %# third dim just in case + if (isempty (S.subs{1})) S.subs{1} = ':'; endif + S.subs(2) = []; + if (length (S.subs) < 2) + S.subs{2} = 1; + endif + if (length (indc) > 1 && length (RHS) > 1) + %# set a row from a vector + fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS(y)); + else + fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS); + endif + endif + Sorig = S; + for indi = (1:length (indc)) + try + [df, S] = df_cow(df, S, indc(indi)); + df._data{indc(indi)} = fillfunc (df._data{indc(indi)}, S, indi); + S = Sorig; + catch + disp(lasterr) + disp('line 470 '); keyboard + end_try_catch + # catch + # if ndims(df._data{indc(indi)}) > 2, + # %# upstream forgot to give the third dim + # dummy = S; dummy.subs(3) = 1; + # df._data{indc(indi)} = fillfunc(df._data{indc(indi)}, \ + # dummy, indi); + # else + # rethrow(lasterr()); + # endif + # end_try_catch + endfor + else %# 2D - 3D matrix + S.subs(2) = []; %# ignore 'column' dimension + if (isempty (S.subs{1})) + S.subs{1} = indr; + endif + %# rotate slices in dim 1-3 to slices in dim 1-2 + fillfunc = @(x, S, y) feval(@subsasgn, x, S, squeeze(RHS(:, y, :))); + Sorig = S; + for indi = (1:length (indc)) + [df, S] = df_cow(df, S, indc(indi)); + df._data{indc(indi)} = fillfunc (df._data{indc(indi)}, S, indi); + S = Sorig; + endfor + endif + if (indi < size (RHS, 2) && ~isa (RHS, 'char')) + warning (' not all columns of RHS used'); + endif + endif + endif + + %# delayed row padding -- column padding occured before + if (~isempty (indr) && isnumeric (indr)) + if (max (indr) > df._cnt(1) && size (df._data, 2) < df._cnt(2)) + df = df_pad(df, 1, max (indr)-df._cnt(1), rname_width); + endif + endif + + %# adjust ridx and rnames, if required + if (~isempty (ridx)) + dummy = df._ridx; + if (1 == size (RHS, 1)) + dummy(indr) = ridx{1}; + else + dummy(indr) = vertcat(ridx{indr}); + endif + if (length (unique (dummy)) ~= length (dummy)) %# || \ + %# any (diff(dummy) <= 0), + error("row indexes are not unique or not ordered"); + endif + df._ridx = dummy; + endif + + if (~isempty (rname) && (length (df._over{1}) < max (indr) || \ + all (df._over{1}(indr)))) + df._name{1}(indr, 1) = genvarname(rname); + df._over{1}(1, indr) = false; + endif + if (~isempty (cname) && (length (df._over{2}) < max (indc) || \ + all (df._over{2}(indc)))) + try + df._name{2}(indc, 1) = genvarname (cname); + catch + disp('line 472 '); keyboard + end_try_catch + df._over{2}(1, indc) = false; + endif + + df = df_thirddim (df); + +endfunction