1 ## Copyright (C) 2009,2010,2011,2012 Philip Nienhuis <pr.nienhuis at users.sf.net>
3 ## This program is free software; you can redistribute it and/or modify it under
4 ## the terms of the GNU General Public License as published by the Free Software
5 ## Foundation; either version 3 of the License, or (at your option) any later
8 ## This program is distributed in the hope that it will be useful, but WITHOUT
9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 ## You should have received a copy of the GNU General Public License along with
14 ## this program; if not, see <http://www.gnu.org/licenses/>.
17 ## @deftypefn {Function File} [ @var{rawarr}, @var{ods}, @var{rstatus} ] = ods2oct (@var{ods})
18 ## @deftypefnx {Function File} [ @var{rawarr}, @var{ods}, @var{rstatus} ] = ods2oct (@var{ods}, @var{wsh})
19 ## @deftypefnx {Function File} [ @var{rawarr}, @var{ods}, @var{rstatus} ] = ods2oct (@var{ods}, @var{wsh}, @var{range})
20 ## @deftypefnx {Function File} [ @var{rawarr}, @var{ods}, @var{rstatus} ] = ods2oct (@var{ods}, @var{wsh}, @var{range}, @var{options})
22 ## Read data contained within cell range @var{range} from worksheet @var{wsh}
23 ## in an OpenOffice_org Calc spreadsheet file pointed to in struct @var{ods}.
25 ## @var{ods} is supposed to have been created earlier by odsopen in the
26 ## same octave session.
28 ## @var{wsh} is either numerical or text, in the latter case it is
30 ## Note that in case of a numerical @var{wsh} this number refers to the
31 ## position in the worksheet stack, counted from the left in a Calc
32 ## window. The default is numerical 1, i.e. the leftmost worksheet
35 ## @var{range} is expected to be a regular spreadsheet range format,
36 ## or "" (empty string, indicating all data in a worksheet).
37 ## If no range is specified the occupied cell range will have to be
38 ## determined behind the scenes first; this can take some time.
40 ## Optional argument @var{options}, a structure, can be used to
41 ## specify various read modes by setting option fields in the struct
42 ## to true (1) or false (0). Currently recognized option fields are:
45 ## @item "formulas_as_text"
46 ## If set to TRUE or 1, spreadsheet formulas (if at all present)
47 ## are read as formula strings rather than the evaluated formula
48 ## result values. This only works for the OTK and UNO interfaces.
49 ## The default value is 0 (FALSE).
51 ## @item 'strip_array'
52 ## Set the value of this field set to TRUE or 1 to strip the returned
53 ## output array @var{rawarr} from empty outer columns and rows. The
54 ## spreadsheet cell rectangle limits from where the data actually
55 ## came will be updated. The default value is FALSE or 0 (no cropping).
58 ## If only the first argument @var{ods} is specified, ods2oct will
59 ## try to read all contents from the first = leftmost (or the only)
60 ## worksheet (as if a range of @'' (empty string) was specified).
62 ## If only two arguments are specified, ods2oct assumes the second
63 ## argument to be @var{wsh}. In that case ods2oct will try to read
64 ## all data contained in that worksheet.
66 ## Return argument @var{rawarr} contains the raw spreadsheet cell data.
67 ## Use parsecell() to separate numeric and text values from @var{rawarr}.
69 ## Optional return argument @var{ods} contains the pointer struct. Field
70 ## @var{ods}.limits contains the outermost column and row numbers of the
71 ## actually read cell range.
73 ## Optional return argument @var{rstatus} will be set to 1 if the
74 ## requested data have been read successfully, 0 otherwise.
76 ## Erroneous data and empty cells turn up empty in @var{rawarr}.
77 ## Date/time values in OpenOffice.org are returned as numerical values
78 ## with base 1-1-0000 (same as octave). But beware that Excel spreadsheets
79 ## rewritten by OpenOffice.org into .ods format may have numerical date
80 ## cells with base 01-01-1900 (same as MS-Excel).
82 ## When reading from merged cells, all array elements NOT corresponding
83 ## to the leftmost or upper OpenOffice.org cell will be treated as if the
84 ## "corresponding" cells are empty.
89 ## A = ods2oct (ods1, '2nd_sheet', 'C3:ABS40000');
90 ## (which returns the numeric contents in range C3:ABS40000 in worksheet
91 ## '2nd_sheet' from a spreadsheet file pointed to in pointer struct ods1,
92 ## into numeric array A)
96 ## [An, ods2, status] = ods2oct (ods2, 'Third_sheet');
99 ## @seealso {odsopen, odsclose, parsecell, odsread, odsfinfo, oct2ods, odswrite}
103 ## Author: Philip Nienhuis
104 ## Created: 2009-12-13
106 ## 2009-12-30 First working version
107 ## 2010-03-19 Added check for odfdom version (should be 0.7.5 until further notice)
108 ## 2010-03-20 Works for odfdom v 0.8 too. Added subfunction ods3jotk2oct for that
109 ## 2010-04-06 Benchmarked odfdom versions. v0.7.5 is up to 7 times faster than v0.8!
110 ## So I added a warning for users using odfdom 0.8.
111 ## 2010-04-11 Removed support for odfdom-0.8 - it's painfully slow and unreliable
112 ## 2010-05-31 Updated help text (delay i.c.o. empty range due to getusedrange call)
113 ## 2010-08-03 Added support for reading back formulas (works only in OTK)
114 ## 2010-08-12 Added explicit support for jOpenDocument v 1.2b3+
115 ## 2010-08-25 Improved helptext (moved some text around)
116 ## 2010-08-27 Added ods3jotk2oct - internal function for odfdom-0.8.6.jar
117 ## '' Extended check on spsh_opts (must be a struct)
118 ## 2010-10-27 Moved cropping rawarr from empty outer rows & columns to here
119 ## 2011-05-06 Experimental UNO support
120 ## 2011-09-18 Set rstatus var here
121 ## 2012-01-26 Fixed "seealso" help string
122 ## 2012-02-25 Added 0.8.7 to supported odfdom versions in L.155
123 ## 2012-02-26 Updated texinfo header help text
124 ## 2012-06-08 Support for odfdom-incubator 0.8.8
125 ## '' Replaced tabs by double space
127 ## (Latest update of subfunctions below: 2012-06-08)
129 function [ rawarr, ods, rstatus ] = ods2oct (ods, wsh=1, datrange=[], spsh_opts=[])
131 # Check if ods struct pointer seems valid
132 if (~isstruct (ods)), error ("File ptr struct expected for arg @ 1"); endif
133 test1 = ~isfield (ods, "xtype");
134 test1 = test1 || ~isfield (ods, "workbook");
135 test1 = test1 || isempty (ods.workbook);
136 test1 = test1 || isempty (ods.app);
138 error ("Arg #1 is an invalid ods file struct");
140 # Check worksheet ptr
141 if (~(ischar (wsh) || isnumeric (wsh))), error ("Integer (index) or text (wsh name) expected for arg # 2"); endif
143 if (~(isempty (datrange) || ischar (datrange))), error ("Character string (range) expected for arg # 3"); endif
144 # Check & setup options struct
145 if (nargin < 4 || isempty (spsh_opts))
146 spsh_opts.formulas_as_text = 0;
147 spsh_opts.strip_array = 1;
149 elseif (~isstruct (spsh_opts))
150 error ("struct expected for OPTIONS argument (# 4)");
152 if (~isfield (spsh_opts, 'formulas_as_text')), spsh_opts.formulas_as_text = 0; endif
153 if (~isfield (spsh_opts, 'strip_array')), spsh_opts.strip_array = 1; endif
157 # Select the proper interfaces
158 if (strcmp (ods.xtype, 'OTK'))
159 # Read ods file tru Java & ODF toolkit
162 [rawarr, ods] = ods2jotk2oct (ods, wsh, datrange, spsh_opts);
163 case {'0.8.6', '0.8.7', '0.8.8'}
164 [rawarr, ods] = ods3jotk2oct (ods, wsh, datrange, spsh_opts);
166 error ("Unsupported odfdom version or invalid ods file pointer.");
168 elseif (strcmp (ods.xtype, 'JOD'))
169 # Read ods file tru Java & jOpenDocument. JOD doesn't know about formulas :-(
170 [rawarr, ods] = ods2jod2oct (ods, wsh, datrange);
171 elseif (strcmp (ods.xtype, 'UNO'))
172 # Read ods file tru Java & UNO
173 [rawarr, ods] = ods2uno2oct (ods, wsh, datrange, spsh_opts);
175 # ---- < Other interfaces here >
177 error (sprintf ("ods2oct: unknown OpenOffice.org .ods interface - %s.", ods.xtype));
180 rstatus = ~isempty (rawarr);
182 # Optionally strip empty outer rows and columns & keep track of original data location
183 if (spsh_opts.strip_array && rstatus)
184 emptr = cellfun ('isempty', rawarr);
185 if (all (all (emptr)))
189 nrows = size (rawarr, 1); ncols = size (rawarr, 2);
191 while (all (emptr(irowt, :))), irowt++; endwhile
193 while (all (emptr(irowb, :))), irowb--; endwhile
195 while (all (emptr(:, icoll))), icoll++; endwhile
197 while (all (emptr(:, icolr))), icolr--; endwhile
199 # Crop outer rows and columns and update limits
200 rawarr = rawarr(irowt:irowb, icoll:icolr);
201 ods.limits = ods.limits + [icoll-1, icolr-ncols; irowt-1, irowb-nrows];
208 #=====================================================================
210 ## Copyright (C) 2009,2010,2011,2012 Philip Nienhuis <prnienhuis _at- users.sf.net>
212 ## This program is free software; you can redistribute it and/or modify it under
213 ## the terms of the GNU General Public License as published by the Free Software
214 ## Foundation; either version 3 of the License, or (at your option) any later
217 ## This program is distributed in the hope that it will be useful, but WITHOUT
218 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
219 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
222 ## You should have received a copy of the GNU General Public License along with
223 ## this program; if not, see <http://www.gnu.org/licenses/>.
225 ## odf2jotk2oct - read ODS spreadsheet data using Java & odftoolkit
226 ## You need proper java-for-octave & odfdom.jar + xercesImpl.jar
227 ## in your javaclasspath.
229 ## Author: Philip Nenhuis <pr.nienhuis at users.sf.net>
230 ## Created: 2009-12-24
232 ## 2010-01-08 First working version
233 ## 2010-03-18 Fixed many bugs with wrong row references in case of empty upper rows
234 ## "" Fixed reference to upper row in case of nr-rows-repeated top tablerow
235 ## "" Tamed down memory usage for rawarr when desired data range is given
236 ## "" Added call to getusedrange() for cases when no range was specified
237 ## 2010-03-19 More code cleanup & fixes for bugs introduced 18/3/2010 8-()
238 ## 2010-08-03 Added preliminary support for reading back formulas as text strings
239 ## 2010-10-27 Moved cropping rawarr from empty outer rows & columns to caller
240 ## 2011-09-18 Remove rstatus var (now set in caller)
242 function [ rawarr, ods ] = ods2jotk2oct (ods, wsh, crange, spsh_opts)
244 # Parts after user gfterry in
245 # http://www.oooforum.org/forum/viewtopic.phtml?t=69060
247 # Get contents and table stuff from the workbook
248 odfcont = ods.workbook; # Use a local copy just to be sure. octave
249 # makes physical copies only when needed (?)
250 xpath = ods.app.getXPath;
252 # AFAICS ODS spreadsheets have the following hierarchy (after Xpath processing):
253 # <table:table> - table nodes, the actual worksheets;
254 # <table:table-row> - row nodes, the rows in a worksheet;
255 # <table:table-cell> - cell nodes, the cells in a row;
256 # Styles (formatting) are defined in a section "settings" outside the
257 # contents proper but are referenced in the nodes.
259 # Create an instance of type NODESET for use in subsequent statement
260 NODESET = java_get ('javax.xml.xpath.XPathConstants', 'NODESET');
261 # Parse sheets ("tables") from ODS file
262 sheets = xpath.evaluate ("//table:table", odfcont, NODESET);
263 nr_of_sheets = sheets.getLength ();
265 # Check user input & find sheet pointer (1-based), using ugly hacks
266 if (~isnumeric (wsh))
267 # Search in sheet names, match sheet name to sheet number
269 while (++ii <= nr_of_sheets && ischar (wsh))
270 # Look in first part of the sheet nodeset
271 sh_name = sheets.item(ii-1).getTableNameAttribute ();
272 if (strcmp (sh_name, wsh))
273 # Convert local copy of wsh into a number (pointer)
278 error (sprintf ("No worksheet '%s' found in file %s", wsh, ods.filename));
280 elseif (wsh > nr_of_sheets || wsh < 1)
281 # We already have a numeric sheet pointer. If it's not in range:
282 error (sprintf ("Worksheet no. %d out of range (1 - %d)", wsh, nr_of_sheets));
285 # Get table-rows in sheet no. wsh. Sheet count = 1-based (!)
286 str = sprintf ("//table:table[%d]/table:table-row", wsh);
287 sh = xpath.evaluate (str, odfcont, NODESET);
288 nr_of_rows = sh.getLength ();
290 # Either parse (given cell range) or prepare (unknown range) help variables
291 if (isempty (crange))
292 [ trow, brow, lcol, rcol ] = getusedrange (ods, wsh);
293 nrows = brow - trow + 1; # Number of rows to be read
294 ncols = rcol - lcol + 1; # Number of columns to be read
296 [dummy, nrows, ncols, trow, lcol] = parse_sp_range (crange);
297 brow = min (trow + nrows - 1, nr_of_rows);
298 # Check ODS column limits
299 if (lcol > 1024 || trow > 65536)
300 error ("ods2oct: invalid range; max 1024 columns & 65536 rows.");
302 # Truncate range silently if needed
303 rcol = min (lcol + ncols - 1, 1024);
304 ncols = min (ncols, 1024 - lcol + 1);
305 nrows = min (nrows, 65536 - trow + 1);
307 # Create storage for data content
308 rawarr = cell (nrows, ncols);
310 # Prepare reading sheet row by row
311 rightmcol = 0; # Used to find actual rightmost column
312 ii = trow - 1; # Spreadsheet row counter
314 # Find uppermost requested *tablerow*. It may be influenced by nr-rows-repeated
317 while (tfillrows < ii)
318 row = sh.item(tfillrows);
319 extrarows = row.getTableNumberRowsRepeatedAttribute ();
320 tfillrows = tfillrows + extrarows;
323 # Desired top row may be in a nr-rows-repeated tablerow....
324 if (tfillrows > ii); ii = tfillrows; endif
327 # Read from worksheet row by row. Row numbers are 0-based
329 row = sh.item(rowcnt++);
330 nr_of_cells = min (row.getLength (), rcol);
331 rightmcol = max (rightmcol, nr_of_cells); # Keep track of max row length
332 # Read column (cell, "table-cell" in ODS speak) by column
335 tcell = row.getCellAt(jj-1);
337 if (~isempty (tcell)) # If empty it's possibly in columns-repeated/spanned
338 if (spsh_opts.formulas_as_text) # Get spreadsheet formula rather than value
339 # Check for formula attribute
340 tmp = tcell.getTableFormulaAttribute ();
344 if (strcmp (tolower (tmp(1:3)), 'of:'))
345 tmp (1:end-3) = tmp(4:end);
347 rawarr(ii-trow+2, jj-lcol+1) = tmp;
351 if ~(form || index (char(tcell), 'text:p>Err:') || index (char(tcell), 'text:p>#DIV'))
353 ctype = tcell.getOfficeValueTypeAttribute ();
354 cvalue = tcell.getOfficeValueAttribute ();
355 switch deblank (ctype)
356 case {'float', 'currency', 'percentage'}
357 rawarr(ii-trow+2, jj-lcol+1) = cvalue;
359 cvalue = tcell.getOfficeDateValueAttribute ();
360 # Dates are returned as octave datenums, i.e. 0-0-0000 based
361 yr = str2num (cvalue(1:4));
362 mo = str2num (cvalue(6:7));
363 dy = str2num (cvalue(9:10));
364 if (index (cvalue, 'T'))
365 hh = str2num (cvalue(12:13));
366 mm = str2num (cvalue(15:16));
367 ss = str2num (cvalue(18:19));
368 rawarr(ii-trow+2, jj-lcol+1) = datenum (yr, mo, dy, hh, mm, ss);
370 rawarr(ii-trow+2, jj-lcol+1) = datenum (yr, mo, dy);
373 cvalue = tcell.getOfficeTimeValueAttribute ();
374 if (index (cvalue, 'PT'))
375 hh = str2num (cvalue(3:4));
376 mm = str2num (cvalue(6:7));
377 ss = str2num (cvalue(9:10));
378 rawarr(ii-trow+2, jj-lcol+1) = datenum (0, 0, 0, hh, mm, ss);
381 cvalue = tcell.getOfficeBooleanValueAttribute ();
382 rawarr(ii-trow+2, jj-lcol+1) = cvalue;
384 cvalue = tcell.getOfficeStringValueAttribute ();
385 if (isempty (cvalue)) # Happens with e.g., hyperlinks
387 # Hack string value from between <text:p|r> </text:p|r> tags
388 ist = findstr (tmp, '<text:');
390 ist = ist (length (ist));
392 ien = index (tmp(ist:end), '</text') + ist - 2;
394 cvalue = tmp(ist:ien);
397 rawarr(ii-trow+2, jj-lcol+1)= cvalue;
406 # Check for repeated rows (i.e. condensed in one table-row)
407 extrarows = row.getTableNumberRowsRepeatedAttribute () - 1;
408 if (extrarows > 0 && (ii + extrarows) < 65535)
409 # Expand rawarr cf. table-row
410 nr_of_rows = nr_of_rows + extrarows;
416 # Keep track of data rectangle limits
417 ods.limits = [lcol, rcol; trow, brow];
422 #===========================================================================
424 ## Copyright (C) 2010,2011,2012 Philip Nienhuis <prnienhuis@users.sf.net>
426 ## This program is free software; you can redistribute it and/or modify it under
427 ## the terms of the GNU General Public License as published by the Free Software
428 ## Foundation; either version 3 of the License, or (at your option) any later
431 ## This program is distributed in the hope that it will be useful, but WITHOUT
432 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
433 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
436 ## You should have received a copy of the GNU General Public License along with
437 ## this program; if not, see <http://www.gnu.org/licenses/>.
439 ## ods3jotk2oct: internal function for reading odf files using odfdom-0.8.6
441 ## Author: Philip Nienhuis <Philip@DESKPRN>
442 ## Created: 2010-08-24. First workable version Aug 27, 2010
444 ## 2010-10-27 Moved cropping rawarr from empty outer rows & columns to caller
445 ## 2010-11-13 Added workaround for reading text cells in files made by jOpenDocument 1.2bx
446 ## 2011-09-18 Comment out workaround for jOpenDocument bug (no OfficeValueAttr set)
447 ## because this casts all numeric cells to string type for properly written ODS1.2
448 ## '' Remove rstatus var (now set in caller)
450 function [ rawarr, ods ] = ods3jotk2oct (ods, wsh, crange, spsh_opts)
452 # Get contents and table stuff from the workbook
453 odfcont = ods.workbook; # Use a local copy just to be sure. octave
454 # makes physical copies only when needed (?)
456 # Parse sheets ("tables") from ODS file
457 sheets = ods.app.getTableList();
458 nr_of_sheets = sheets.size ();
460 # Check user input & find sheet pointer (1-based)
461 if (~isnumeric (wsh))
463 sh = ods.app.getTableByName (wsh);
464 sh_err = isempty (sh);
469 error (sprintf ("Sheet %s not found in file %s\n", wsh, ods.filename));
471 elseif (wsh > nr_of_sheets || wsh < 1)
472 # We already have a numeric sheet pointer. If it's not in range:
473 error (sprintf ("Worksheet no. %d out of range (1 - %d)", wsh, nr_of_sheets));
475 sh = sheets.get (wsh - 1);
478 # Either parse (given cell range) or prepare (unknown range) help variables
479 if (isempty (crange))
484 if (strcmp (wsh, sheets.get(jj).getTableName()) == 1)
490 [ trow, brow, lcol, rcol ] = getusedrange (ods, wsh);
491 nrows = brow - trow + 1; # Number of rows to be read
492 ncols = rcol - lcol + 1; # Number of columns to be read
494 [dummy, nrows, ncols, trow, lcol] = parse_sp_range (crange);
495 # Check ODS row/column limits
496 if (lcol > 1024 || trow > 65536)
497 error ("ods2oct: invalid range; max 1024 columns & 65536 rows.");
499 # Truncate range silently if needed
500 rcol = min (lcol + ncols - 1, 1024);
501 ncols = min (ncols, 1024 - lcol + 1);
502 nrows = min (nrows, 65536 - trow + 1);
503 brow = trow + nrows - 1;
506 # Create storage for data content
507 rawarr = cell (nrows, ncols);
509 # Read from worksheet row by row. Row numbers are 0-based
510 for ii=trow:nrows+trow-1
511 row = sh.getRowByIndex (ii-1);
512 for jj=lcol:ncols+lcol-1;
513 ocell = row.getCellByIndex (jj-1);
515 otype = deblank (tolower (ocell.getValueType ()));
516 if (spsh_opts.formulas_as_text)
517 if ~isempty (ocell.getFormula ())
521 # # Provisions for catching jOpenDocument 1.2b bug where text cells
522 # # haven't been assigned an <office:value-type='string'> attribute
523 # if (~isempty (ocell))
524 # if (findstr ('<text:', char (ocell.getOdfElement ()))), otype = 'string'; endif
526 # At last, read the data
528 case {'float', 'currency', 'percentage'}
529 rawarr(ii-trow+1, jj-lcol+1) = ocell.getDoubleValue ();
531 # Dive into TableTable API
532 tvalue = ocell.getOdfElement ().getOfficeDateValueAttribute ();
533 # Dates are returned as octave datenums, i.e. 0-0-0000 based
534 yr = str2num (tvalue(1:4));
535 mo = str2num (tvalue(6:7));
536 dy = str2num (tvalue(9:10));
537 if (index (tvalue, 'T'))
538 hh = str2num (tvalue(12:13));
539 mm = str2num (tvalue(15:16));
540 ss = str2num (tvalue(18:19));
541 rawarr(ii-trow+1, jj-lcol+1) = datenum (yr, mo, dy, hh, mm, ss);
543 rawarr(ii-trow+1, jj-lcol+1) = datenum (yr, mo, dy);
546 # Dive into TableTable API
547 tvalue = ocell.getOdfElement ().getOfficeTimeValueAttribute ();
548 if (index (tvalue, 'PT'))
549 hh = str2num (tvalue(3:4));
550 mm = str2num (tvalue(6:7));
551 ss = str2num (tvalue(9:10));
552 rawarr(ii-trow+1, jj-lcol+1) = datenum (0, 0, 0, hh, mm, ss);
555 rawarr(ii-trow+1, jj-lcol+1) = ocell.getBooleanValue ();
557 rawarr(ii-trow+1, jj-lcol+1) = ocell.getStringValue ();
558 # # Code left in for in case odfdom 0.8.6+ has similar bug
560 # cvalue = tcell.getOfficeStringValueAttribute ();
561 # if (isempty (cvalue)) # Happens with e.g., hyperlinks
562 # tmp = char (tcell);
563 # # Hack string value from between <text:p|r> </text:p|r> tags
564 # ist = findstr (tmp, '<text:');
566 # ist = ist (length (ist));
568 # ien = index (tmp(ist:end), '</text') + ist - 2;
570 # cvalue = tmp(ist:ien);
573 # rawarr(ii-trow+1, jj-lcol+1)= cvalue;
575 rawarr(ii-trow+1, jj-lcol+1) = ocell.getFormula ();
583 # Keep track of data rectangle limits
584 ods.limits = [lcol, rcol; trow, brow];
589 #===========================================================================
591 ## Copyright (C) 2009,2010,2011,2012 Philip Nienhuis <pr.nienhuis at users.sf.net>
593 ## This program is free software; you can redistribute it and/or modify it under
594 ## the terms of the GNU General Public License as published by the Free Software
595 ## Foundation; either version 3 of the License, or (at your option) any later
598 ## This program is distributed in the hope that it will be useful, but WITHOUT
599 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
600 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
603 ## You should have received a copy of the GNU General Public License along with
604 ## this program; if not, see <http://www.gnu.org/licenses/>.
606 ## ods2oct - get data out of an ODS spreadsheet into octave using jOpenDocument.
607 ## Watch out, no error checks, and spreadsheet formula error results
608 ## are conveyed as 0 (zero).
610 ## Author: Philip Nienhuis
611 ## Created: 2009-12-13
614 ## 2010-08-12 Added separate stanzas for jOpenDocument v 1.2b3 and up. This version
615 ## allows better cell type parsing and is therefore more reliable
616 ## 2010-10-27 Moved cropping rawarr from empty outer rows & columns to here
617 ## 2010-11-13 Added workaround for reading text cells in files made by jOpenDocument 1.2bx
618 ## 2011-09-18 Comment out workaround for jOpenDocument bug (no OfficeValueAttr set)
619 ## because this casts all numeric cells to string type for properly written ODS1.2
620 ## '' Remove rstatus var (now set in caller)
621 ## 2012-02-25 Fix reading string values written by JOD itself (no text attribue!!). But
622 ## the cntents could be BOOLEAN as well (JOD doesn't write OffVal attr either)
623 ## 2012-02-26 Further workaround for reading strings (actually: cells w/o OfficeValueAttr)
625 function [ rawarr, ods] = ods2jod2oct (ods, wsh, crange)
628 months = {"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"};
630 # Check jOpenDocument version
631 sh = ods.workbook.getSheet (0);
632 cl = sh.getCellAt (0, 0);
634 # 1.2b3+ has public getValueType ()
637 BOOLEAN = char (java_get ('org.jopendocument.dom.ODValueType', 'BOOLEAN'));
638 CURRENCY = char (java_get ('org.jopendocument.dom.ODValueType', 'CURRENCY'));
639 DATE = char (java_get ('org.jopendocument.dom.ODValueType', 'DATE'));
640 FLOAT = char (java_get ('org.jopendocument.dom.ODValueType', 'FLOAT'));
641 PERCENTAGE = char (java_get ('org.jopendocument.dom.ODValueType', 'PERCENTAGE'));
642 STRING = char (java_get ('org.jopendocument.dom.ODValueType', 'STRING'));
643 TIME = char (java_get ('org.jopendocument.dom.ODValueType', 'TIME'));
650 if (isnumeric (wsh)) wsh = wsh - 1; endif # Sheet INDEX starts at 0
651 # Check if sheet exists. If wsh = numeric, nonexistent sheets throw errors.
653 sh = ods.workbook.getSheet (wsh);
655 error ("Illegal sheet number (%d) requested for file %s\n", wsh+1, ods.filename);
657 # If wsh = string, nonexistent sheets yield empty results
659 error ("No sheet called '%s' present in file %s\n", wsh, ods.filename);
662 # Either parse (given cell range) or prepare (unknown range) help variables
663 if (isempty (crange))
665 error ("No empty read range allowed in jOpenDocument version 1.2b2")
667 if (isnumeric (wsh)) wsh = wsh + 1; endif
668 [ trow, brow, lcol, rcol ] = getusedrange (ods, wsh);
669 nrows = brow - trow + 1; # Number of rows to be read
670 ncols = rcol - lcol + 1; # Number of columns to be read
673 [dummy, nrows, ncols, trow, lcol] = parse_sp_range (crange);
674 # Check ODS column limits
675 if (lcol > 1024 || trow > 65536)
676 error ("ods2oct: invalid range; max 1024 columns & 65536 rows.");
678 # Truncate range silently if needed
679 rcol = min (lcol + ncols - 1, 1024);
680 ncols = min (ncols, 1024 - lcol + 1);
681 nrows = min (nrows, 65536 - trow + 1);
682 brow= trow + nrows - 1;
684 # Create storage for data content
685 rawarr = cell (nrows, ncols);
692 scell = sh.getCellAt (lcol+jj-2, trow+ii-2);
693 sctype = char (scell.getValueType ());
695 case { FLOAT, CURRENCY, PERCENTAGE }
696 rawarr{ii, jj} = scell.getValue ().doubleValue ();
698 rawarr {ii, jj} = scell.getValue () == 1;
700 rawarr{ii, jj} = scell.getValue();
702 tmp = strsplit (char (scell.getValue ()), ' ');
703 yy = str2num (tmp{6});
704 mo = find (ismember (months, toupper (tmp{2})) == 1);
705 dd = str2num (tmp{3});
706 hh = str2num (tmp{4}(1:2));
707 mi = str2num (tmp{4}(4:5));
708 ss = str2num (tmp{4}(7:8));
709 rawarr{ii, jj} = datenum (yy, mo, dd, hh, mi, ss);
711 tmp = strsplit (char (scell.getValue ().getTime ()), ' ');
712 hh = str2num (tmp{4}(1:2)) / 24.0;
713 mi = str2num (tmp{4}(4:5)) / 1440.0;
714 ss = str2num (tmp{4}(7:8)) / 86600.0;
715 rawarr {ii, jj} = hh + mi + ss;
717 # Workaround for sheets written by jOpenDocument (no value-type attrb):
718 if (~isempty (scell.getValue) )
719 # FIXME Assume cell contains string if there's a text attr. But it could be BOOLEAN too...
720 if (findstr ('<text:', char (scell))), sctype = STRING; endif
721 rawarr{ii, jj} = scell.getValue();
726 # Probably a merged cell, just skip
727 # printf ("Error in row %d, col %d (addr. %s)\n", ii, jj, calccelladdress (lcol+jj-2, trow+ii-2));
731 else # ods.odfvsn == 3
735 celladdress = calccelladdress (trow+ii-1, lcol+jj-1);
737 val = sh.getCellAt (celladdress).getValue ();
739 # No panic, probably a merged cell
745 rawarr(ii, jj) = val;
746 elseif (isnumeric (val))
748 if (val) rawarr(ii, jj) = true; else; rawarr(ii, jj) = false; endif
751 val = sh.getCellAt (celladdress).getValue ().doubleValue ();
752 rawarr(ii, jj) = val;
756 # Probably empty Cell
758 # Maybe date / time value. Dirty hack to get values:
759 mo = strmatch (toupper (val(5:7)), months);
760 dd = str2num (val(9:10));
761 yy = str2num (val(25:end));
762 hh = str2num (val(12:13));
763 mm = str2num (val(15:16));
764 ss = str2num (val(18:19));
765 rawarr(ii, jj) = datenum (yy, mo, dd, hh, mm,ss);
775 # Keep track of data rectangle limits
776 ods.limits = [lcol, rcol; trow, brow];
780 ## Copyright (C) 2011,2012 Philip Nienhuis <prnienhuis@users.sf.net>
782 ## This program is free software; you can redistribute it and/or modify it under
783 ## the terms of the GNU General Public License as published by the Free Software
784 ## Foundation; either version 3 of the License, or (at your option) any later
787 ## This program is distributed in the hope that it will be useful, but WITHOUT
788 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
789 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
792 ## You should have received a copy of the GNU General Public License along with
793 ## this program; if not, see <http://www.gnu.org/licenses/>.
797 ## Author: Philip Nienhuis <prnienhuis@users.sf.net>
798 ## Created: 2011-05-05
800 ## 2011-09-18 Adapted sh_names type to LO 3.4.1
801 ## '' Remove default 2 last sheets (LibreOffice 3.4.+)
802 ## '' Remove rstatus var (now set in caller)
803 ## 2011-09-19 Try to decipher if formulas return numeric or string values
805 function [rawarr, ods] = ods2uno2oct (ods, wsh, datrange, spsh_opts)
807 sheets = ods.workbook.getSheets ();
808 sh_names = sheets.getElementNames ();
809 if (! iscell (sh_names))
810 # Java array (LibreOffice 3.4.+); convert to cellstr
811 sh_names = char (sh_names);
813 sh_names = {sh_names};
816 # Check sheet pointer
818 if (wsh < 1 || wsh > numel (sh_names))
819 error ("Sheet index %d out of range 1-%d", wsh, numel (sh_names));
822 ii = strmatch (wsh, sh_names);
823 if (isempty (ii)), error ("Sheet '%s' not found", wsh); endif
826 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.sheet.XSpreadsheet');
827 sh = sheets.getByName (sh_names{wsh}).getObject.queryInterface (unotmp);
829 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.sheet.XCellRangesQuery');
830 xRQ = sh.queryInterface (unotmp);
831 # Get cell ranges of all rectangles containing data. Type values:
832 #java_get ('com.sun.star.sheet.CellFlags', 'VALUE') ans = 1
833 #java_get ('com.sun.star.sheet.CellFlags', 'DATETIME') ans = 2
834 #java_get ('com.sun.star.sheet.CellFlags', 'STRING') ans = 4
835 #java_get ('com.sun.star.sheet.CellFlags', 'FORMULA') ans = 16
836 # Yep, boolean is lacking...
837 Cellflgs = javaObject ("java.lang.Short", "23");
838 ccells = xRQ.queryContentCells (Cellflgs);
839 addrs = ccells.getRangeAddressesAsString ();
841 # Strip sheet name from addresses
842 adrblks = strsplit (addrs, ',');
843 if (isempty (adrblks))
844 warning ('Sheet %s contains no data', sh_names{wsh});
848 # Either parse (given cell range) or prepare (unknown range) help variables.
849 # As OpenOffice knows the occupied range, we need the limits anyway to avoid
850 # out-of-range errors
851 [ trow, brow, lcol, rcol ] = getusedrange (ods, wsh);
852 if (isempty (datrange))
853 nrows = brow - trow + 1; # Number of rows to be read
854 ncols = rcol - lcol + 1; # Number of columns to be read
856 [dummy, nrows, ncols, srow, scol] = parse_sp_range (datrange);
857 # Truncate range silently if needed
858 brow = min (srow + nrows - 1, brow);
859 rcol = min (scol + ncols - 1, rcol);
860 trow = max (trow, srow);
861 lcol = max (lcol, scol);
862 nrows = min (brow - trow + 1, nrows); # Number of rows to be read
863 ncols = min (rcol - lcol + 1, ncols); # Number of columns to be read
865 # Create storage for data at Octave side
866 rawarr = cell (nrows, ncols);
868 # Get data. Apparently row & column indices are 0-based in UNO
871 XCell = sh.getCellByPosition (jj, ii);
872 cType = XCell.getType().getValue ();
875 rawarr{ii-trow+2, jj-lcol+2} = XCell.getValue ();
877 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.text.XText');
878 rawarr{ii-trow+2, jj-lcol+2} = XCell.queryInterface (unotmp).getString ();
880 if (spsh_opts.formulas_as_text)
881 rawarr{ii-trow+2, jj-lcol+2} = XCell.getFormula ();
883 # Unfortunately OOo gives no clue as to the type of formula result
884 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.text.XText');
885 rawarr{ii-trow+2, jj-lcol+2} = XCell.queryInterface (unotmp).getString ();
886 tmp = str2double (rawarr{ii-trow+2, jj-lcol+2});
887 # If the string happens to contain just a number we'll assume it is numeric
888 if (~isnan (tmp)); rawarr{ii-trow+2, jj-lcol+2} = tmp; endif
896 # Keep track of data rectangle limits
897 ods.limits = [lcol, rcol; trow, brow];