1 function resu = subsref(df, S)
2 %# function resu = subsref(df, S)
3 %# This function returns a subpart of a dataframe. It is invoked when
4 %# calling df.field, df(value), or df{value}. In case of fields,
5 %# returns either the content of the container with the same name,
6 %# either the column with the same name, priority being given to the
7 %# container. In case of range, selection may occur on name or order
8 %# (not rowidx for rows). If the result is homogenous, it is
9 %# downclassed. In case an extra field is given, is it used to
10 %# determine the class of the return value. F.i.,
11 %# df(1, 2, 'dataframe')
12 %# does not return a scalar but a dataframe, keeping all the meta-information
14 %% Copyright (C) 2009-2012 Pascal Dupuis <Pascal.Dupuis@uclouvain.be>
16 %% This file is part of Octave.
18 %% Octave is free software; you can redistribute it and/or
19 %% modify it under the terms of the GNU General Public
20 %% License as published by the Free Software Foundation;
21 %% either version 2, or (at your option) any later version.
23 %% Octave is distributed in the hope that it will be useful,
24 %% but WITHOUT ANY WARRANTY; without even the implied
25 %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
26 %% PURPOSE. See the GNU General Public License for more
29 %% You should have received a copy of the GNU General Public
30 %% License along with Octave; see the file COPYING. If not,
31 %% write to the Free Software Foundation, 51 Franklin Street -
32 %% Fifth Floor, Boston, MA 02110-1301, USA.
35 %# $Id: subsref.m 9585 2012-02-05 15:32:46Z cdemills $
38 %# what kind of object should we return ?
39 asked_output_type = ''; asked_output_format = [];
41 if (strcmp (S(1).type, '.')) %# struct access
42 indi = strmatch (S(1).subs, 'as');
44 if (length (S) < 2 || ~strcmp (S(2).type, '.'))
45 error ("The output format qualifier 'as' must be followed by a type");
47 asked_output_type = "array";
48 asked_output_format = S(2).subs; S = S(3:end);
50 indi = strmatch(S(1).subs, 'array');
52 asked_output_type = "array";
55 indi = strmatch (S(1).subs, char ('df', class (df)));
57 %# requiring a dataframe
58 if (1 == indi) %# 'df' = short for 'dataframe'
59 asked_output_type = 'dataframe';
61 asked_output_type = S(1).subs;
64 if (isempty (S) && strcmp (asked_output_type, class (df)))
68 indi = strmatch(S(1).subs, 'cell');
70 asked_output_type = S(1).subs;
73 %# access as a pseudo-struct
74 resu = struct(df); %# avoid recursive calls
75 if (1 == strfind(S(1).subs, '_')) %# its an internal field name
76 %# FIXME: this should only be called from class members and friends
77 %# FIXME -- in case many columns are asked, horzcat them
78 resu = horzcat (feval (@subsref, resu, S));
80 %# direct access through the exact column name
81 indi = strmatch(S(1).subs, resu._name{2}, "exact");
83 resu = df._data{indi}; %# extract colum;
84 if (strcmp (df._type{indi}, 'char') \
85 && 1 == size (df._data{indi}, 2))
89 dummy = S(2:end); S = S(1);
92 if (isa(dummy(1).subs{1}, "char"))
93 [indr, nrow, dummy(1).subs{1}] = \
94 df_name2idx(df._name{1}, dummy(1).subs{1}, df._cnt(1), 'row');
96 resu = feval(@subsref, resu, dummy);
98 error ("Invalid column access");
101 else %# access of an attribute
102 dummy = S(2:end); S = S(1);
103 postop = ''; further_deref = false;
104 %# translate the external to internal name
108 S(2).type = "{}"; S(2).subs{1}= 1;
109 postop = @(x) char (x);
112 S(2).type = "{}"; S(2).subs{1}= 2;
113 postop = @(x) char (x); further_deref = true;
116 S(2).type = "()"; S(2).subs{1}= 1;
119 S(2).type = "()"; S(2).subs{1}= 2;
121 S(1).subs = "_ridx"; further_deref = true;
122 case "types" %# this one should be accessed as a matrix
123 S(1).subs = "_type"; further_deref = true;
126 further_deref = true;
129 further_deref = true;
132 resu = dataframe([]);
134 if (!strcmp (dummy(1).type, "()"))
135 error ("Bogus constructor call");
137 resu = dataframe(dummy(1).subs{:});
139 if (length (dummy) > 1)
140 resu = subsref(resu, dummy(2:end));
144 error ("Unknown column name: %s", S(1).subs);
146 if (!isempty (dummy))
148 error ("Invalid sub-dereferencing");
150 if (isa(dummy(1).subs{1}, "char"))
151 [indc, ncol, dummy(1).subs{1}] = \
152 df_name2idx(df._name{2}, dummy(1).subs{1}, \
153 df._cnt(2), 'column');
155 %# should be already catched inside df_name2idx
156 error ("Unknown column name: %s", dummy(1).subs{1});
159 if (!strcmp (dummy(1).type, '()'))
160 error ("Invalid internal field name sub-access, use () instead");
163 %# workaround around bug 30921, fixed in hg changeset 10937
164 %# if !isempty (dummy)
167 resu = feval(@subsref, resu, S);
168 if (!isempty (postop))
180 %# disp('line 103 '); keyboard
183 while 1, %# avoid recursive calls on dataframe sub-accesses
185 %# a priori, performs whole accesses
186 nrow = df._cnt(1); indr = 1:nrow;
187 ncol = df._cnt(2); indc = 1:ncol;
189 [fullindr, fullindc, fullinds, onedimidx] = deal([]);
191 %# iterate over S, sort out strange constructs as x()()(1:10, 1:4)
192 while length (S) > 0,
193 if (strcmp (S(1).type, '{}'))
194 if (!IsFirst || !isempty (asked_output_format))
195 error ("Illegal dataframe dereferencing");
197 [asked_output_type, asked_output_format] = deal('cell');
198 elseif (!strcmp (S(1).type, '()'))
200 error ("Illegal dataframe dereferencing");
202 if (isempty (S(1).subs)) %# process calls like x()
203 if (isempty (asked_output_type))
204 asked_output_type = class (df);
207 if (strcmp (asked_output_type, class (df)))
208 %# whole access without conversion
211 break; %# no dimension specified -- select all, the
212 %# asked_output_type was set in a previous iteration
214 %# avoid recursive calls
216 IsFirst = false; continue;
220 if (isempty (S(1).subs{1}))
221 error ('subsref: first dimension empty ???');
223 if (length (S(1).subs) > 1)
224 if (isempty (S(1).subs{2}))
225 error ('subsref: second dimension empty ???');
227 [indr, nrow, S(1).subs{1}] = \
228 df_name2idx(df._name{1}, S(1).subs{1}, df._cnt(1), 'row');
229 if (!isa(indr, 'char') && max (indr) > df._cnt(1))
230 error ("Accessing dataframe past end of lines");
232 [indc, ncol, S(1).subs{2}] = \
233 df_name2idx(df._name{2}, S(1).subs{2}, df._cnt(2), 'column');
234 if (max (indc) > df._cnt(2))
235 %# is it a two index access of a 3D structure ?
236 if (length (df._cnt) > 2)
237 [fullindc, fullinds] = ind2sub (df._cnt(2:3), indc);
238 if (fullindc <= df._cnt(2))
239 indc = fullindc; inds = fullinds;
243 if (max (indc) > df._cnt(2))
244 error ("Accessing dataframe past end of columns");
248 %# one single dim -- probably something like df(:), df(A), ...
249 fullindr = 1; onedimidx = S(1).subs{1};
250 switch class (S(1).subs{1})
251 case {'char'} %# one dimensional access, disallow it if not ':'
252 if (strcmp (S(1).subs{1}, ':'))
253 fullindr = []; fullindc = []; asked_output_type = "array";
255 error (["Accessing through single dimension and name " \
256 S(1).subs{1} " not allowed\n-- use variable(:, 'name') instead"]);
259 S(1).subs{1} = find(S(1).subs{1});
261 S(1).subs{1} = subsindex(S(1).subs{1}, 1);
264 if (isempty (S(1).subs{1}))
265 resu = df_colmeta(df);
269 if (!isempty (fullindr))
270 %# convert linear index to subscripts
271 if (length (df._cnt) <= 2)
272 [fullindr, fullindc] = ind2sub (df._cnt, S(1).subs{1});
273 fullinds = ones (size (fullindr));
275 dummy = max (cellfun(@length, df._rep));
276 [fullindr, fullindc, fullinds] = ind2sub\
277 ([df._cnt dummy], S(1).subs{1});
280 indr = unique (fullindr); nrow = length (indr);
281 %# determine on which columns we'll iterate
282 indc = unique (fullindc)(:).'; ncol = length (indc);
283 if (!isempty (asked_output_type) && ncol > 1)
284 %# verify that the extracted values form a square matrix
285 dummy = zeros(indr(end), indc(end));
287 indj = find (fullindc == indc(indi));
288 dummy(fullindr(indj), indc(indi)) = 1;
290 dummy = dummy(indr(1):indr(end), indc(1):indc(end));
291 if (any (any (dummy!= 1)))
292 error ("Vector-like selection is not rectangular for the asked output type");
294 fullindr = []; fullindc = [];
299 %# at this point, S is either empty, either contains further dereferencing
303 %# we're ready to extract data
304 %# disp('line 211 '); keyboard
306 if (isempty (asked_output_type))
307 output_type = class (df); %# force df output
309 if (!strcmp (asked_output_type, "array") \
310 || !isempty (asked_output_format))
311 %# override the class of the return value
312 output_type = asked_output_type;
314 %# can the data be merged ?
315 output_type = df._data{indc(1)}(1);
316 dummy = isnumeric(df._data{indc(1)});
318 dummy = dummy & isnumeric (df._data{indc(indi)});
319 if (~strcmp (class (output_type), df._type{indc(indi)}))
321 %# let downclassing occur
322 output_type = horzcat (output_type, df._data{indc(indi)}(1));
325 %# unmixable args -- falls back to type of parent container
326 error ("Selected columns %s not compatible with cat() -- use 'cell' as output format", mat2str (indc));
327 %# dead code -- suppress previous line for switching automagically the output format to df
328 output_type = class (df);
332 asked_output_format = class (output_type);
333 output_type = "array";
337 if (any(strcmp ({output_type, asked_output_type}, class (df))))
338 if (!isempty (S) && (1 == length (S(1).subs)))
339 %# is the selection index vector-like ?
340 if ((isnumeric(S(1).subs{1}) && isvector(S(1).subs{1}) &&
341 df._cnt(1) > 1) && isempty (asked_output_type))
342 %# in the case of vector input, favor array output
343 [asked_output_type, output_type] = deal("array");
348 indt = {}; %# in case we have to mix matrix of different width
349 if (!isempty (fullinds))
350 inds = unique (fullinds); nseq = length (inds);
351 indt(1, 1:df._cnt(2)) = inds;
353 inds = 1; indt(1, 1:df._cnt(2)) = inds; nseq = 1;
354 if (isempty (S) || all(cellfun('isclass', S(1).subs, 'char')))
355 inds = ':'; indt(1, 1:df._cnt(2)) = inds;
356 nseq = max (cellfun(@length, df._rep(indc)));
358 if (length (S(1).subs) > 1) %# access-as-matrix
359 if (length (S(1).subs) > 2)
361 if (isa(inds, 'char'))
362 nseq = max (cellfun(@length, df._rep(indc)));
363 indt(1, 1:df._cnt(2)) = inds;
365 %# generate a specific index for each column
366 nseq = length (inds);
367 dummy = cellfun(@length, df._rep(indc));
368 indt(1, 1:df._cnt(2)) = inds;
376 if (strcmp (output_type, class (df)))
377 %# disp('line 295 '); keyboard
378 %# export the result as a dataframe
379 resu = dataframe ([]);
380 resu._cnt(1) = nrow; resu._cnt(2) = ncol;
381 if (isempty (fullindr))
383 resu._data{indi} = df._data{indc(indi)}\
384 (indr, df._rep{indc(indi)}(indt{indc(indi)}));
385 resu._rep{indi} = 1:size (resu._data{indi}, 2);
386 resu._name{2}(indi, 1) = df._name{2}(indc(indi));
387 resu._over{2}(1, indi) = df._over{2}(indc(indi));
388 resu._type{indi} = df._type{indc(indi)};
390 if (!isempty (df._ridx) && size (df._ridx, 2) >= inds)
391 resu._ridx = df._ridx(indr, inds);
393 if (length (df._name{1}) >= max (indr))
394 resu._name{1}(1:nrow, 1) = df._name{1}(indr);
395 resu._over{1}(1, 1:nrow) = df._over{1}(indr);
398 dummy = df_whole(df);
399 dummy = dummy(onedimidx);
400 for indi = (1:resu._cnt(2))
401 indc = unique (fullindc(:, indi));
402 if (1 == length (indc))
403 resu._name{2}(indi)= df._name{2}(indc);
404 resu._over{2}(indi)= df._over{2}(indc);
405 unfolded = df._data{indc}(:, df._rep{indc});
406 indj = sub2ind (size (unfolded), fullindr(:, indi), \
408 resu._data{indi} = unfolded(indj);
409 resu._type{indi} = df._type{indc};
410 resu._rep{indi} = 1:size (resu._data{indi}, 2);
412 resu._name{2}(indi)= ["X" num2str(indi)];
413 resu._over{2}(indi)= true;
414 resu._data{indi} = squeeze(dummy(:, indi, :));
415 resu._type{indi} = class (dummy(1, indi, 1));
416 resu._rep{indi} = 1:size (resu._data{indi}, 2);
419 if (1 == size (df._ridx, 2))
420 resu._ridx = repmat (df._ridx, [1 ncol 1]);
422 resu._ridx = df._ridx;
424 if (!isempty (resu._ridx))
425 if (size (resu._ridx, 2) > 1)
426 resu._ridx = resu._ridx(indr, indc);
428 resu._ridx = resu._ridx(indr);
432 %# to be verified : keyboard
435 resu = df_thirddim(resu);
436 if (length (S) > 1) %# perform further access, if required
438 S = S(2:end); %# avoid recursive calls
439 continue; %# restart the loop around line 150
443 elseif (strcmp (output_type, 'cell'))
444 %# export the result as a cell array
445 if (isempty (asked_output_format))
446 resu = cell (2+nrow, 2+ncol); resu(1:end, 1:2) = {''};
447 resu(2, 3:end) = df._type(indc); %column type
448 row_offs = 2; col_offs = 2;
450 resu{1, 2+indi} = df._name{2}{indc(indi)}; % column name
452 resu(3:end, 1) = mat2cell (df._ridx(indr), ones (nrow, 1), 1);
453 if (length (df._name{1}) >= max (indr))
454 resu(3:end, 2) = df._name{1}{indr};
457 resu = cell (nrow, ncol);
458 row_offs = 0; col_offs = 0;
461 switch df._type{indc(indi)} % cell content
463 %# dummy = cellstr(df._data{indc(indi)}(indr, :));
464 dummy = df._data{indc(indi)}(indr, :);
465 resu(1+row_offs:end, indi+col_offs) = dummy;
467 dummy = df._data{indc(indi)}(indr, :);
468 resu(1+row_offs:end, indi+col_offs) = \
469 mat2cell (dummy, ones (nrow, 1), size (dummy, 2));
473 %# did we arrive here by x.cell ?
474 if (0 == length (S)) return; endif
476 %# perform the selection on the content, keeping the header
477 if (length (S) > 1) %# perform further access, if required
478 if (~strcmp (S(2).type, '()'))
479 error ("Illegal dataframe-as-cell sub-dereferencing");
481 if (!isempty (asked_output_format))
482 resu = feval(@subsref, resu, S(2:end));
484 if (length (S(2).subs) != 1)
485 %# normal, two-dimensionnal access apply the selection on the
486 %# zone containing the data
488 if (!isempty (dummy(2).subs))
489 dummy(2).subs{2} = ':';
493 feval (@subsref, resu(3:end, 1),
496 feval (@subsref, resu(3:end, 2),
498 %# extract - reorder - whatever
499 feval (@subsref, resu(3:end, 3:end), S(2:end))
503 if (!isempty (dummy(2).subs))
504 dummy(2).subs{1} = [1 2];
507 %# reselect column names and types
508 [cell(2, 2) feval(@subsref, resu(1:2,
513 resuf(1:2, 1:2) = {''}; resu = resuf;
515 %# one dimensionnal access of the whole 2D cell array -- you
516 %# asked it, you got it
517 resu = feval(@subsref, resu(:), S(2:end));
518 if (!isa(S(2).subs{1}, 'char') \
519 && size (S(2).subs{1}, 2) > 1)
524 elseif (1 == length (S(1).subs))
526 if (!isa(S(1).subs{1}, 'char') \
527 && size (S(1).subs{1}, 2) > 1)
531 return; %# no more iteration required
534 %# export the result as a vector/matrix. Rules:
535 %# * x(:, :, :) returns a 3D matrix
536 %# * x(:, n:m, :) returns a 3D matrix
537 %# * x(:, :) returns a horzcat of the third dimension
538 %# * x(:, n:m) select only the first sequence
539 %# * x(:) returns a vertcat of the columns of x(:, :)
540 %# disp('line 403 '); keyboard
541 if (isempty (S) || isempty (S(1).subs) || \
542 length (S(1).subs) > 1 || \
543 (isnumeric(S(1).subs{1}) && !isvector(S(1).subs{1})))
545 df = struct(df); %# remove the magic, avoid recursive calls
546 if (isempty (fullindr)) %# two index access
547 if (~isempty (asked_output_format)) %# force a conversion
548 if (strmatch(asked_output_format, 'cell'))
549 extractfunc = @(x) mat2cell\
550 (df._data{indc(x)}(indr, df._rep{indc(x)}(inds)), \
553 extractfunc = @(x) cast ( df._data{indc(x)}\
554 (indr, df._rep{indc(x)}(inds)),\
555 asked_output_format);
557 else %# let the usual downclassing occur
558 extractfunc = @(x) df._data{indc(x)}(indr, df._rep{indc(x)}(inds));
562 dummy = reshape (extractfunc (1), nrow, 1, []);
563 if (size (dummy, 3) < nseq)
564 dummy = repmat (dummy, [1 1 nseq]);
567 dummy = extractfunc (1);
570 error ("Column %d format (%s) can't be converted to %s", \
571 indc(1), df._type{indc(1)}, asked_output_format);
574 %# dynamic allocation with the final type
575 resu = repmat (dummy, [1 ncol]);
579 dummy = reshape (extractfunc (indi), nrow, 1, []);
580 if (size (dummy, 3) < nseq)
581 dummy = repmat (dummy, [1 1 nseq]);
584 dummy = extractfunc (indi);
587 error ("Column %d format (%s) can't be converted to %s", \
588 indc(indi), df._type{indc(indi)}, asked_output_format);
590 resu(:, indi, :) = dummy;
593 if (strcmp (df._type{indc(1)}, 'char'))
599 if (!isempty (S) && 2 == length (S(1).subs) \
600 && all(cellfun('isclass', S(1).subs, 'char')))
601 resu = reshape (resu, nrow, ncol*nseq);
603 else %# one index access
604 %# disp('line 557'); keyboard
605 if (~isempty (asked_output_format)) %# force a conversion
606 if (strmatch (asked_output_format, 'cell'))
607 extractfunc = @(x, y) mat2cell (df._data{x}(:, df._rep{x}(y)), \
608 ones (length (y), 1));
610 extractfunc = @(x, y) cast (df._data{x}(:, df._rep{x})(y), \
611 asked_output_format);
613 else %# let the usual downclassing occur
614 extractfunc = @(x, y) df._data{x}(:, df._rep{x})(y);
617 resu = zeros(0, class (sum (cellfun (@(x) zeros (1, class (x(1))),\
620 dummy = find (indi == fullindc); %# linear global index
621 %# linear index for this matrix
622 idx = sub2ind (size (df._data{indi}), fullindr(dummy), \
624 resu(dummy) = extractfunc (indi, idx);
628 error ("Column %d format (%s) can't be converted to %s", \
629 indi, df._type{indi}, asked_output_format);
631 resu = reshape (resu, size (onedimidx));
633 else %# access-as-vector
634 %# disp('line 548 '); keyboard
635 if (!isempty (fullindr))
636 switch df._type{indc(1)}
638 resu = df._data{indc(1)}(fullindr(1), \
639 df._rep{indc(1)}(fullinds(1)));
640 for indi = (2:length (fullindr))
641 resu = char (resu, df._data{indc(indi)}\
642 (fullindr(indi), df._rep{indc(indi)}(fullinds(indi))));
645 if (isempty (asked_output_format))
646 resu = df._data{fullindc(1)}\
647 (fullindr(1), df._rep{fullindc(1)}(fullinds(1)));
648 else %# this type will propagate with subsequent cat
649 resu = cast (df._data{fullindc(1)}\
650 (fullindr(1), df._rep{fullindc(1)}(fullinds(1))),\
651 asked_output_format);
653 for indi = (2:length (fullindr))
654 resu = cat(1, resu, df._data{fullindc(indi)}\
656 df._rep{fullindc(indi)}(fullinds(indi))));
659 else %# using the (:) operator
660 resu = df_whole(df)(:);
662 if (!isa(S(1).subs{1}, 'char') \
663 && size (S(1).subs{1}, 2) > 1)
667 if (length (S) > 1) %# perform further access, if required
668 %# disp('line 442 '); keyboard
669 resu = feval(@subsref, resu, S(2:end));
672 return; %# no more iteration required
675 %# disp("line 343 !?!"); %# keyboard