1 ## Copyright (C) 2009,2010,2011,12 Philip Nienhuis <prnienhuis at users.sf.net>
3 ## This program is free software; you can redistribute it and/or modify it under
4 ## the terms of the GNU General Public License as published by the Free Software
5 ## Foundation; either version 3 of the License, or (at your option) any later
8 ## This program is distributed in the hope that it will be useful, but WITHOUT
9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 ## You should have received a copy of the GNU General Public License along with
14 ## this program; if not, see <http://www.gnu.org/licenses/>.
17 ## @deftypefn {Function File} [ @var{rawarr}, @var{xls}, @var{rstatus} ] = xls2oct (@var{xls})
18 ## @deftypefnx {Function File} [ @var{rawarr}, @var{xls}, @var{rstatus} ] = xls2oct (@var{xls}, @var{wsh})
19 ## @deftypefnx {Function File} [ @var{rawarr}, @var{xls}, @var{rstatus} ] = xls2oct (@var{xls}, @var{wsh}, @var{range})
20 ## @deftypefnx {Function File} [ @var{rawarr}, @var{xls}, @var{rstatus} ] = xls2oct (@var{xls}, @var{wsh}, @var{range}, @var{options})
22 ## Read data contained within cell range @var{range} from worksheet @var{wsh}
23 ## in an Excel spreadsheet file pointed to in struct @var{xls}.
25 ## @var{xls} is supposed to have been created earlier by xlsopen in the
26 ## same octave session.
28 ## @var{wsh} is either numerical or text, in the latter case it is
29 ## case-sensitive and it may be max. 31 characters long.
30 ## Note that in case of a numerical @var{wsh} this number refers to the
31 ## position in the worksheet stack, counted from the left in an Excel
32 ## window. The default is numerical 1, i.e. the leftmost worksheet
35 ## @var{range} is expected to be a regular spreadsheet range format,
36 ## or "" (empty string, indicating all data in a worksheet).
37 ## If no range is specified the occupied cell range will have to be
38 ## determined behind the scenes first; this can take some time for the
39 ## Java-based interfaces. Be aware that in COM/ActiveX interface the
40 ## used range can be outdated. The Java-based interfaces are more
41 ## reliable in this respect albeit much slower.
43 ## Optional argument @var{options}, a structure, can be used to
44 ## specify various read modes by setting option fields in the struct
45 ## to true (1) or false (0). Currently recognized option fields are:
48 ## @item "formulas_as_text"
49 ## If set to TRUE or 1, spreadsheet formulas (if at all present)
50 ## are read as formula strings rather than the evaluated formula
51 ## result values. The default value is 0 (FALSE).
53 ## @item 'strip_array'
54 ## Set the value of this field set to TRUE or 1 to strip the returned
55 ## output array @var{rawarr} from empty outer columns and rows. The
56 ## spreadsheet cell rectangle limits from where the data actually
57 ## came will be updated. The default value is FALSE or 0 (no cropping).
58 ## When using the COM interface, the output array is always cropped.
61 ## If only the first argument @var{xls} is specified, xls2oct will try
62 ## to read all contents from the first = leftmost (or the only)
63 ## worksheet (as if a range of @'' (empty string) was specified).
65 ## If only two arguments are specified, xls2oct assumes the second
66 ## argument to be @var{wsh}. In that case xls2oct will try to read
67 ## all data contained in that worksheet.
69 ## Return argument @var{rawarr} contains the raw spreadsheet cell data.
70 ## Use parsecell() to separate numeric and text values from @var{rawarr}.
72 ## Optional return argument @var{xls} contains the pointer struct,
73 ## If any data have been read, field @var{xls}.limits contains the
74 ## outermost column and row numbers of the actually returned cell range.
76 ## Optional return argument @var{rstatus} will be set to 1 if the
77 ## requested data have been read successfully, 0 otherwise.
79 ## Erroneous data and empty cells turn up empty in @var{rawarr}.
80 ## Date/time values in Excel are returned as numerical values.
81 ## Note that Excel and Octave have different date base values (1/1/1900 &
83 ## Be aware that Excel trims @var{rawarr} from empty outer rows & columns,
84 ## so any returned cell array may turn out to be smaller than requested
85 ## in @var{range}, independent of field 'formulas_as_text' in @var{options}.
86 ## When using COM, POI, or UNO interface, formulas in cells are evaluated; if
87 ## that fails cached values are retrieved. These may be outdated depending
88 ## on Excel's "Automatic calculation" settings when the spreadsheet was saved.
90 ## When reading from merged cells, all array elements NOT corresponding
91 ## to the leftmost or upper Excel cell will be treated as if the
92 ## "corresponding" Excel cells are empty.
94 ## Beware: when the COM interface is used, hidden Excel invocations may be
95 ## kept running silently in case of COM errors.
100 ## A = xls2oct (xls1, '2nd_sheet', 'C3:AB40');
101 ## (which returns the numeric contents in range C3:AB40 in worksheet
102 ## '2nd_sheet' from a spreadsheet file pointed to in pointer struct xls1,
103 ## into numeric array A)
107 ## [An, xls2, status] = xls2oct (xls2, 'Third_sheet');
110 ## @seealso {oct2xls, xlsopen, xlsclose, parsecell, xlsread, xlsfinfo, xlswrite }
114 ## Author: Philip Nienhuis
115 ## Created: 2010-10-16
117 ## 2009-01-03 (added OOXML support & cleaned up code. Excel
118 ## ADDRESS function still not implemented in Apache POI)
119 ## 2010-03-14 Updated help text
120 ## 2010-05-31 Updated help text (delay i.c.o. empty range due to getusedrange call)
121 ## 2010-07-28 Added option to read formulas as text strings rather than evaluated value
122 ## 2010-08-25 Small typo in help text
123 ## 2010-10-20 Added option fornot stripping output arrays
124 ## 2010-11-07 More rigorous input checks.
125 ## 2010-11-12 Moved pointer check into main func
126 ## 2010-11-13 Catch empty sheets when no range was specified
127 ## 2011-03-26 OpenXLS support added
128 ## 2011-03-29 Test for proper input xls struct extended
129 ## 2011-05-18 Experimental UNO support added
130 ## 2011-09-08 Minor code layout
131 ## 2012-01-26 Fixed "seealso" help string
132 ## 2012-02-25 Fixed missing quotes in struct check L.149-153
133 ## 2012-02-26 Updated texinfo header help text
134 ## 2012-06-06 Implemented "formulas_as_text" option for COM
135 ## 2012-06-07 Replaced all tabs by double space
137 ## Latest subfunc update: 2012-06-06
139 function [ rawarr, xls, rstatus ] = xls2oct (xls, wsh=1, datrange='', spsh_opts=[])
141 # Check if xls struct pointer seems valid
142 if (~isstruct (xls)), error ("File ptr struct expected for arg @ 1"); endif
143 test1 = ~isfield (xls, "xtype");
144 test1 = test1 || ~isfield (xls, "workbook");
145 test1 = test1 || isempty (xls.workbook);
146 test1 = test1 || isempty (xls.app);
147 test1 = test1 || ~ischar (xls.xtype);
149 error ("Invalid xls file pointer struct");
152 # Check worksheet ptr
153 if (~(ischar (wsh) || isnumeric (wsh))), error ("Integer (index) or text (wsh name) expected for arg # 2"); endif
155 if (~(isempty (datrange) || ischar (datrange))), error ("Character string (range) expected for arg # 3"); endif
157 # Check & setup options struct
158 if (nargin < 4 || isempty (spsh_opts))
159 spsh_opts.formulas_as_text = 0;
160 spsh_opts.strip_array = 1;
162 elseif (isstruct (spsh_opts))
163 if (~isfield (spsh_opts', 'formulas_as_text')), spsh_opts.formulas_as_text = 0; endif
164 if (~isfield (spsh_opts', 'strip_array')), spsh_opts.strip_array = 1; endif
167 error ("Structure expected for arg # 4");
170 # Select the proper interfaces
171 if (strcmp (xls.xtype, 'COM'))
172 # Call Excel tru COM / ActiveX server
173 [rawarr, xls, rstatus] = xls2com2oct (xls, wsh, datrange, spsh_opts);
174 elseif (strcmp (xls.xtype, 'POI'))
175 # Read xls file tru Java POI
176 [rawarr, xls, rstatus] = xls2jpoi2oct (xls, wsh, datrange, spsh_opts);
177 elseif (strcmp (xls.xtype, 'JXL'))
178 # Read xls file tru JExcelAPI
179 [rawarr, xls, rstatus] = xls2jxla2oct (xls, wsh, datrange, spsh_opts);
180 elseif (strcmp (xls.xtype, 'OXS'))
181 # Read xls file tru OpenXLS
182 [rawarr, xls, rstatus] = xls2oxs2oct (xls, wsh, datrange, spsh_opts);
183 elseif (strcmp (xls.xtype, 'UNO'))
184 # Read xls file tru OpenOffice.org UNO (Java) bridge
185 [rawarr, xls, rstatus] = xls2uno2oct (xls, wsh, datrange, spsh_opts);
186 # elseif ---- <Other interfaces here>
187 # Call to next interface
189 error (sprintf ("xls2oct: unknown Excel .xls interface - %s.", xls.xtype));
192 # Optionally strip empty outer rows and columns & keep track of original data location
193 if (spsh_opts.strip_array)
194 emptr = cellfun ('isempty', rawarr);
195 if (all (all (emptr)))
199 nrows = size (rawarr, 1); ncols = size (rawarr, 2);
201 while (all (emptr(irowt, :))), irowt++; endwhile
203 while (all (emptr(irowb, :))), irowb--; endwhile
205 while (all (emptr(:, icoll))), icoll++; endwhile
207 while (all (emptr(:, icolr))), icolr--; endwhile
209 # Crop output cell array and update limits
210 rawarr = rawarr(irowt:irowb, icoll:icolr);
211 xls.limits = xls.limits + [icoll-1, icolr-ncols; irowt-1, irowb-nrows];
218 #====================================================================================
219 ## Copyright (C) 2009,2010,2011,2012 P.R. Nienhuis, <pr.nienhuis at hccnet.nl>
221 ## based on mat2xls by Michael Goffioul (2007) <michael.goffioul@swing.be>
223 ## This program is free software; you can redistribute it and/or modify it under
224 ## the terms of the GNU General Public License as published by the Free Software
225 ## Foundation; either version 3 of the License, or (at your option) any later
228 ## This program is distributed in the hope that it will be useful, but WITHOUT
229 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
230 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
233 ## You should have received a copy of the GNU General Public License along with
234 ## this program; if not, see <http://www.gnu.org/licenses/>.
237 ## @deftypefn {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2com2oct (@var{xls})
238 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2com2oct (@var{xls}, @var{wsh})
239 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2com2oct (@var{xls}, @var{wsh}, @var{range})
240 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2com2oct (@var{xls}, @var{wsh}, @var{range}, @var{spsh_opts})
241 ## Get cell contents in @var{range} in worksheet @var{wsh} in an Excel
242 ## file pointed to in struct @var{xls} into the cell array @var{obj}.
244 ## xls2com2oct should not be invoked directly but rather through xls2oct.
249 ## [Arr, status, xls] = xls2com2oct (xls, 'Second_sheet', 'B3:AY41');
250 ## Arr = xls2com2oct (xls, 'Second_sheet');
253 ## @seealso {xls2oct, oct2xls, xlsopen, xlsclose, xlsread, xlswrite}
257 ## Author: Philip Nienhuis
258 ## Created: 2009-09-23
260 ## 2009-12-11 <forgot what it was>
261 ## 2010-10-07 Implemented limits (only reliable for empty input ranges)
262 ## 2010-10-08 Resulting data array now cropped (also in case of specified range)
263 ## 2010-10-10 More code cleanup (shuffled xls tests & wsh ptr code before range checks)
264 ## 2010-10-20 Slight change to Excel range setup
265 ## 2010-10-24 Added check for "live" ActiveX server
266 ## 2010-11-12 Moved ptr struct check into main func
267 ## 2010-11-13 Catch empty sheets when no range was specified
268 ## 2012-01-26 Fixed "seealso" help string
269 ## 2012-06-06 Implemented "formulas_as_text option"
271 function [rawarr, xls, rstatus ] = xls2com2oct (xls, wsh, crange, spsh_opts)
273 rstatus = 0; rawarr = {};
276 if (nargin < 2) error ("xls2com2oct needs a minimum of 2 arguments."); endif
277 if (size (wsh, 2) > 31)
278 warning ("Worksheet name too long - truncated")
283 # Check to see if ActiveX is still alive
285 wb_cnt = wb.Worksheets.count;
287 error ("ActiveX invocation in file ptr struct seems non-functional");
290 # Check & get handle to requested worksheet
291 wb_cnt = wb.Worksheets.count;
294 if (wsh < 1 || wsh > wb_cnt)
295 errstr = sprintf ("Worksheet number: %d out of range 1-%d", wsh, wb_cnt);
303 # Find worksheet number corresponding to name in wsh
304 wb_cnt = wb.Worksheets.count;
306 sh_name = wb.Worksheets(ii).name;
307 if (strcmp (sh_name, wsh)) old_sh = ii; endif
310 errstr = sprintf ("Worksheet name \"%s\" not present", wsh);
316 sh = wb.Worksheets (wsh);
319 if ((nargin == 2) || (isempty (crange)))
320 allcells = sh.UsedRange;
321 # Get actually used range indices
322 [trow, brow, lcol, rcol] = getusedrange (xls, old_sh);
323 if (trow == 0 && brow == 0)
326 printf ("Worksheet '%s' contains no data\n", sh.Name);
329 nrows = brow - trow + 1; ncols = rcol - lcol + 1;
330 topleft = calccelladdress (trow, lcol);
331 lowerright = calccelladdress (brow, rcol);
332 crange = [topleft ':' lowerright];
335 # Extract top_left_cell from range
336 [topleft, nrows, ncols, trow, lcol] = parse_sp_range (crange);
337 brow = trow + nrows - 1;
338 rcol = lcol + ncols - 1;
342 # Get object from Excel sheet, starting at cell top_left_cell
343 rr = sh.Range (crange);
344 if (spsh_opts.formulas_as_text)
351 # Take care of actual singe cell range
352 if (isnumeric (rawarr) || ischar (rawarr))
356 # If we get here, all seems to have gone OK
358 # Keep track of data rectangle limits
359 xls.limits = [lcol, rcol; trow, brow];
361 error ("No data read from Excel file");
368 #==================================================================================
370 ## Copyright (C) 2009,2010,2011,2012 Philip Nienhuis <prnienhuis at users.sf.net>
372 ## This program is free software; you can redistribute it and/or modify it under
373 ## the terms of the GNU General Public License as published by the Free Software
374 ## Foundation; either version 3 of the License, or (at your option) any later
377 ## This program is distributed in the hope that it will be useful, but WITHOUT
378 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
379 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
382 ## You should have received a copy of the GNU General Public License along with
383 ## this program; if not, see <http://www.gnu.org/licenses/>.
386 ## @deftypefn {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2jpoi2oct (@var{xls})
387 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2jpoi2oct (@var{xls}, @var{wsh})
388 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2jpoi2oct (@var{xls}, @var{wsh}, @var{range})
389 ## Get cell contents in @var{range} in worksheet @var{wsh} in an Excel
390 ## file pointed to in struct @var{xls} into the cell array @var{obj}.
391 ## @var{range} can be a range or just the top left cell of the range.
393 ## xls2jpoi2oct should not be invoked directly but rather through xls2oct.
398 ## [Arr, status, xls] = xls2jpoi2oct (xls, 'Second_sheet', 'B3:AY41');
399 ## B = xls2jpoi2oct (xls, 'Second_sheet', 'B3');
402 ## @seealso {xls2oct, oct2xls, xlsopen, xlsclose, xlsread, xlswrite, oct2jpoi2xls}
406 ## Author: Philip Nienhuis
407 ## Created: 2009-11-23
409 ## 2010-01-11 Fall back to cached values when formula evaluator fails
410 ## 2010-03-14 Fixed max column nr for OOXML for empty given range
411 ## 2010-07-28 Added option to read formulas as text strings rather than evaluated value
412 ## 2010-08-01 Some bug fixes for formula reading (cvalue rather than scell)
413 ## 2010-10-10 Code cleanup: -getusedrange called; - fixed typo in formula evaluation msg;
414 ## '' moved cropping output array to calling function.
415 ## 2010-11-12 Moved ptr struct check into main func
416 ## 2010-11-13 Catch empty sheets when no range was specified
417 ## 2010-11-14 Fixed sheet # index (was offset by -1) in call to getusedrange() in case
418 ## of text sheet name arg
419 ## 2012-01-26 Fixed "seealso" help string
421 function [ rawarr, xls, rstatus ] = xls2jpoi2oct (xls, wsh, cellrange, spsh_opts)
425 # Get enumerated cell types. Beware as they start at 0 not 1
426 ctype(1) = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_NUMERIC');
427 ctype(2) = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_STRING');
428 ctype(3) = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_FORMULA');
429 ctype(4) = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_BLANK');
430 ctype(5) = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_BOOLEAN');
431 ctype(6) = java_get ('org.apache.poi.ss.usermodel.Cell', 'CELL_TYPE_ERROR');
434 rstatus = 0; jerror = 0;
437 # Check if requested worksheet exists in the file & if so, get pointer
438 nr_of_sheets = wb.getNumberOfSheets ();
440 if (wsh > nr_of_sheets), error (sprintf ("Worksheet # %d bigger than nr. of sheets (%d) in file %s", wsh, nr_of_sheets, xls.filename)); endif
441 sh = wb.getSheetAt (wsh - 1); # POI sheet count 0-based
442 printf ("(Reading from worksheet %s)\n", sh.getSheetName ());
444 sh = wb.getSheet (wsh);
445 if (isempty (sh)), error (sprintf ("Worksheet %s not found in file %s", wsh, xls.filename)); endif
449 firstrow = sh.getFirstRowNum (); # 0-based
450 lastrow = sh.getLastRowNum (); # 0-based
451 if (isempty (cellrange))
453 # get numeric sheet index
454 ii = wb.getSheetIndex (sh) + 1;
458 [ firstrow, lastrow, lcol, rcol ] = getusedrange (xls, ii);
459 if (firstrow == 0 && lastrow == 0)
462 printf ("Worksheet '%s' contains no data\n", sh.getSheetName ());
466 nrows = lastrow - firstrow + 1;
467 ncols = rcol - lcol + 1;
470 # Translate range to HSSF POI row & column numbers
471 [topleft, nrows, ncols, firstrow, lcol] = parse_sp_range (cellrange);
472 lastrow = firstrow + nrows - 1;
473 rcol = lcol + ncols - 1;
476 # Create formula evaluator (needed to infer proper cell type into rawarr)
477 frm_eval = wb.getCreationHelper().createFormulaEvaluator ();
479 # Read contents into rawarr
480 rawarr = cell (nrows, ncols); # create placeholder
481 for ii = firstrow:lastrow
482 irow = sh.getRow (ii-1);
484 scol = (irow.getFirstCellNum).intValue ();
485 ecol = (irow.getLastCellNum).intValue () - 1;
487 scell = irow.getCell (jj-1);
489 # Explore cell contents
490 type_of_cell = scell.getCellType ();
491 if (type_of_cell == ctype(3)) # Formula
492 if ~(spsh_opts.formulas_as_text)
493 try # Because not al Excel formulas have been implemented in POI
494 cvalue = frm_eval.evaluate (scell);
495 type_of_cell = cvalue.getCellType();
496 # Separate switch because form.eval. yields different type
498 case ctype (1) # Numeric
499 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getNumberValue ();
500 case ctype(2) # String
501 rawarr {ii+1-firstrow, jj+1-lcol} = char (scell.getStringValue ());
502 case ctype (5) # Boolean
503 rawarr {ii+1-firstrow, jj+1-lcol} = scell.BooleanValue ();
507 # Set cell type to blank to skip switch below
508 type_of_cell = ctype(4);
510 # In case of formula errors we take the cached results
511 type_of_cell = scell.getCachedFormulaResultType ();
512 ++jerror; # We only need one warning even for multiple errors
516 # Preparations done, get data values into data array
518 case ctype(1) # 0 Numeric
519 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getNumericCellValue ();
520 case ctype(2) # 1 String
521 rawarr {ii+1-firstrow, jj+1-lcol} = char (scell.getRichStringCellValue ());
523 if (spsh_opts.formulas_as_text)
524 tmp = char (scell.getCellFormula ());
525 rawarr {ii+1-firstrow, jj+1-lcol} = ['=' tmp];
527 case ctype(4) # 3 Blank
528 # Blank; ignore until further notice
529 case ctype(5) # 4 Boolean
530 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getBooleanCellValue ();
539 if (jerror > 0) warning (sprintf ("xls2oct: %d cached values instead of formula evaluations.\n", jerror)); endif
542 xls.limits = [lcol, rcol; firstrow, lastrow];
547 #==================================================================================
548 ## Copyright (C) 2009,2010,2011,2012 Philip Nienhuis <prnienhuis at users.sf.net>
550 ## This program is free software; you can redistribute it and/or modify
551 ## it under the terms of the GNU General Public License as published by
552 ## the Free Software Foundation; either version 2 of the License, or
553 ## (at your option) any later version.
555 ## This program is distributed in the hope that it will be useful,
556 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
557 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
558 ## GNU General Public License for more details.
560 ## You should have received a copy of the GNU General Public License
561 ## along with Octave; see the file COPYING. If not, see
562 ## <http://www.gnu.org/licenses/>.
565 ## @deftypefn {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2jxla2oct (@var{xls})
566 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2jxla2oct (@var{xls}, @var{wsh})
567 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2jxla2oct (@var{xls}, @var{wsh}, @var{range})
568 ## Get cell contents in @var{range} in worksheet @var{wsh} in an Excel
569 ## file pointed to in struct @var{xls} into the cell array @var{obj}.
570 ## @var{range} can be a range or just the top left cell of the range.
572 ## xls2jxla2oct should not be invoked directly but rather through xls2oct.
577 ## [Arr, status, xls] = xls2jxla2oct (xls, 'Second_sheet', 'B3:AY41');
578 ## B = xls2jxla2oct (xls, 'Second_sheet');
581 ## @seealso {xls2oct, oct2xls, xlsopen, xlsclose, xlsread, xlswrite, oct2jxla2xls}
585 ## Author: Philip Nienhuis
586 ## Created: 2009-12-04
588 ## 2009-12-11 ??? some bug fix
589 ## 2010-07-28 Added option to read formulas as text strings rather than evaluated value
590 ## Added check for proper xls structure
591 ## 2010-07-29 Added check for too latge requested data rectangle
592 ## 2010-10-10 Code cleanup: -getusedrange(); moved cropping result array to
593 ## '' calling function
594 ## 2010-11-12 Moved ptr struct check into main func
595 ## 2010-11-13 Catch empty sheets when no range was specified
596 ## 2011-04-11 (Ron Goldman <ron@ocean.org.il>) Fixed missing months var, wrong arg
597 ## '' order in strsplit, wrong isTime condition
598 ## 2012-01-26 Fixed "seealso" help string
600 function [ rawarr, xls, rstatus ] = xls2jxla2oct (xls, wsh, cellrange, spsh_opts)
602 persistent ctype; persistent months;
604 ctype = cell (11, 1);
605 # Get enumerated cell types. Beware as they start at 0 not 1
606 ctype( 1) = (java_get ('jxl.CellType', 'BOOLEAN')).toString ();
607 ctype( 2) = (java_get ('jxl.CellType', 'BOOLEAN_FORMULA')).toString ();
608 ctype( 3) = (java_get ('jxl.CellType', 'DATE')).toString ();
609 ctype( 4) = (java_get ('jxl.CellType', 'DATE_FORMULA')).toString ();
610 ctype( 5) = (java_get ('jxl.CellType', 'EMPTY')).toString ();
611 ctype( 6) = (java_get ('jxl.CellType', 'ERROR')).toString ();
612 ctype( 7) = (java_get ('jxl.CellType', 'FORMULA_ERROR')).toString ();
613 ctype( 8) = (java_get ('jxl.CellType', 'NUMBER')).toString ();
614 ctype( 9) = (java_get ('jxl.CellType', 'LABEL')).toString ();
615 ctype(10) = (java_get ('jxl.CellType', 'NUMBER_FORMULA')).toString ();
616 ctype(11) = (java_get ('jxl.CellType', 'STRING_FORMULA')).toString ();
617 months = {'JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'};
623 # Check if requested worksheet exists in the file & if so, get pointer
624 nr_of_sheets = wb.getNumberOfSheets ();
625 shnames = char (wb.getSheetNames ());
627 if (wsh > nr_of_sheets), error (sprintf ("Worksheet # %d bigger than nr. of sheets (%d) in file %s", wsh, nr_of_sheets, xls.filename)); endif
628 sh = wb.getSheet (wsh - 1); # JXL sheet count 0-based
629 printf ("(Reading from worksheet %s)\n", shnames {wsh});
631 sh = wb.getSheet (wsh);
632 if (isempty (sh)), error (sprintf ("Worksheet %s not found in file %s", wsh, xls.filename)); endif
635 if (isempty (cellrange))
636 # Get numeric sheet pointer (1-based)
638 while (ii <= nr_of_sheets)
639 if (strcmp (wsh, shnames{ii}) == 1)
641 ii = nr_of_sheets + 1;
646 # Get data rectangle row & column numbers (1-based)
647 [firstrow, lastrow, lcol, rcol] = getusedrange (xls, wsh);
648 if (firstrow == 0 && lastrow == 0)
651 printf ("Worksheet '%s' contains no data\n", shnames {wsh});
655 nrows = lastrow - firstrow + 1;
656 ncols = rcol - lcol + 1;
659 # Translate range to row & column numbers (1-based)
660 [dummy, nrows, ncols, firstrow, lcol] = parse_sp_range (cellrange);
661 # Check for too large requested range against actually present range
662 lastrow = min (firstrow + nrows - 1, sh.getRows ());
663 nrows = min (nrows, sh.getRows () - firstrow + 1);
664 ncols = min (ncols, sh.getColumns () - lcol + 1);
665 rcol = lcol + ncols - 1;
668 # Read contents into rawarr
669 rawarr = cell (nrows, ncols); # create placeholder
671 for ii = firstrow:lastrow
672 scell = sh.getCell (jj-1, ii-1);
673 switch char (scell.getType ())
674 case ctype {1} # Boolean
675 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getValue ();
676 case ctype {2} # Boolean formula
677 if (spsh_opts.formulas_as_text)
678 tmp = scell.getFormula ();
679 rawarr {ii+1-firstrow, jj+1-lcol} = ["=" tmp];
681 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getValue ();
683 case ctype {3} # Date
685 % Older JXL.JAR, returns float
686 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getValue ();
688 % Newer JXL.JAR, returns date string w. epoch = 1-1-1900 :-(
689 tmp = strsplit (char (scell.getDate ()), ' ');
690 yy = str2num (tmp{6});
691 mo = find (ismember (months, upper (tmp{2})) == 1);
692 dd = str2num (tmp{3});
693 hh = str2num (tmp{4}(1:2));
694 mi = str2num (tmp{4}(4:5));
695 ss = str2num (tmp{4}(7:8));
699 rawarr {ii+1-firstrow, jj+1-lcol} = datenum (yy, mo, dd, hh, mi, ss);
701 case ctype {4} # Date formula
702 if (spsh_opts.formulas_as_text)
703 tmp = scell.getFormula ();
704 rawarr {ii+1-firstrow, jj+1-lcol} = ["=" tmp];
707 % Older JXL.JAR, returns float
708 tmp = scell.getValue ();
709 % if we get here, we got a float (old JXL).
710 % Check if it is time
711 if (~scell.isTime ())
712 % Reset rawarr <> so it can be processed below as date string
713 rawarr {ii+1-firstrow, jj+1-lcol} = [];
715 rawarr {ii+1-firstrow, jj+1-lcol} = tmp;
717 unwind_protect_cleanup
718 if (isempty (rawarr {ii+1-firstrow, jj+1-lcol}))
719 % Newer JXL.JAR, returns date string w. epoch = 1-1-1900 :-(
720 tmp = strsplit (char (scell.getDate ()), ' ');
721 yy = str2num (tmp{6});
722 mo = find (ismember (months, upper (tmp{2})) == 1);
723 dd = str2num (tmp{3});
724 hh = str2num (tmp{4}(1:2));
725 mi = str2num (tmp{4}(4:5));
726 ss = str2num (tmp{4}(7:8));
728 yy = 0; mo = 0; dd = 0;
730 rawarr {ii+1-firstrow, jj+1-lcol} = datenum (yy, mo, dd, hh, mi, ss);
734 case { ctype {5}, ctype {6}, ctype {7} }
735 # Empty, Error or Formula error. Nothing to do here
736 case ctype {8} # Number
737 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getValue ();
738 case ctype {9} # String
739 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getString ();
740 case ctype {10} # Numerical formula
741 if (spsh_opts.formulas_as_text)
742 tmp = scell.getFormula ();
743 rawarr {ii+1-firstrow, jj+1-lcol} = ["=" tmp];
745 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getValue ();
747 case ctype {11} # String formula
748 if (spsh_opts.formulas_as_text)
749 tmp = scell.getFormula ();
750 rawarr {ii+1-firstrow, jj+1-lcol} = ["=" tmp];
752 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getString ();
761 xls.limits = [lcol, rcol; firstrow, lastrow];
766 ## Copyright (C) 2011 Philip Nienhuis <prnienhuis at users.sf.net>
768 ## This program is free software; you can redistribute it and/or modify it under
769 ## the terms of the GNU General Public License as published by the Free Software
770 ## Foundation; either version 3 of the License, or (at your option) any later
773 ## This program is distributed in the hope that it will be useful, but WITHOUT
774 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
775 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
778 ## You should have received a copy of the GNU General Public License along with
779 ## this program; if not, see <http://www.gnu.org/licenses/>.
782 ## @deftypefn {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2oxs2oct (@var{xls})
783 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2oxs2oct (@var{xls}, @var{wsh})
784 ## @deftypefnx {Function File} [@var{obj}, @var{rstatus}, @var{xls} ] = xls2oxs2oct (@var{xls}, @var{wsh}, @var{range})
785 ## Get cell contents in @var{range} in worksheet @var{wsh} in an Excel
786 ## file pointed to in struct @var{xls} into the cell array @var{obj}.
787 ## @var{range} can be a range or just the top left cell of the range.
789 ## xls2oxs2oct should not be invoked directly but rather through xls2oct.
792 ## Author: Philip Nienhuis
793 ## Created: 2011-03-26
795 ## 2012-02-25 Changed ctype into num array rather than cell array
797 function [ rawarr, xls, rstatus ] = xls2oxs2oct (xls, wsh, cellrange, spsh_opts)
801 ctype = zeros (6, 1);
802 # Get enumerated cell types. Beware as they start at 0 not 1
803 ctype( 1) = (java_get ('com.extentech.ExtenXLS.CellHandle', 'TYPE_STRING')); # 0
804 ctype( 2) = (java_get ('com.extentech.ExtenXLS.CellHandle', 'TYPE_FP')); # 1
805 ctype( 3) = (java_get ('com.extentech.ExtenXLS.CellHandle', 'TYPE_INT')); # 2
806 ctype( 4) = (java_get ('com.extentech.ExtenXLS.CellHandle', 'TYPE_FORMULA')); # 3
807 ctype( 5) = (java_get ('com.extentech.ExtenXLS.CellHandle', 'TYPE_BOOLEAN')); # 4
808 ctype( 6) = (java_get ('com.extentech.ExtenXLS.CellHandle', 'TYPE_DOUBLE')); # 5
814 # Check if requested worksheet exists in the file & if so, get pointer
815 nr_of_sheets = wb.getNumWorkSheets ();
817 if (wsh > nr_of_sheets), error (sprintf ("Worksheet # %d bigger than nr. of sheets (%d) in file %s", wsh, nr_of_sheets, xls.filename)); endif
818 sh = wb.getWorkSheet (wsh - 1); # OXS sheet count 0-based
819 printf ("(Reading from worksheet %s)\n", sh.getSheetName ());
822 sh = wb.getWorkSheet (wsh);
824 error (sprintf ("Worksheet %s not found in file %s", wsh, xls.filename));
828 if (isempty (cellrange))
829 # Get numeric sheet pointer (0-based)
830 wsh = sh.getTabIndex ();
831 # Get data rectangle row & column numbers (1-based)
832 [firstrow, lastrow, lcol, rcol] = getusedrange (xls, wsh+1);
833 if (firstrow == 0 && lastrow == 0)
836 printf ("Worksheet '%s' contains no data\n", shnames {wsh});
840 nrows = lastrow - firstrow + 1;
841 ncols = rcol - lcol + 1;
844 # Translate range to row & column numbers (1-based)
845 [dummy, nrows, ncols, firstrow, lcol] = parse_sp_range (cellrange);
846 # Check for too large requested range against actually present range
847 lastrow = min (firstrow + nrows - 1, sh.getLastRow + 1 ());
848 nrows = min (nrows, sh.getLastRow () - firstrow + 1);
849 ncols = min (ncols, sh.getLastCol () - lcol + 1);
850 rcol = lcol + ncols - 1;
853 # Read contents into rawarr
854 rawarr = cell (nrows, ncols); # create placeholder
856 for ii = firstrow:lastrow
858 scell = sh.getCell (ii-1, jj-1);
859 sctype = scell.getCellType ();
860 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getVal ();
861 if (sctype == ctype(2) || sctype == ctype(3) || sctype == ctype(6))
862 rawarr {ii+1-firstrow, jj+1-lcol} = scell.getDoubleVal ();
865 # Empty or non-existing cell
871 xls.limits = [lcol, rcol; firstrow, lastrow];
876 ## Copyright (C) 2011,2012 Philip Nienhuis <prnienhuis@users.sf.net>
878 ## This program is free software; you can redistribute it and/or modify it under
879 ## the terms of the GNU General Public License as published by the Free Software
880 ## Foundation; either version 3 of the License, or (at your option) any later
883 ## This program is distributed in the hope that it will be useful, but WITHOUT
884 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
885 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
888 ## You should have received a copy of the GNU General Public License along with
889 ## this program; if not, see <http://www.gnu.org/licenses/>.
893 ## Author: Philip Nienhuis <prnienhuis@users.sf.net>
894 ## Created: 2011-05-05
896 ## 2011-09-18 Adapted sh_names type to LO 3.4.1
897 ## 2011-09-19 Try to decipher if formulas return numeric or string values
899 function [rawarr, xls, rstatus] = xls2uno2oct (xls, wsh, datrange, spsh_opts)
901 sheets = xls.workbook.getSheets ();
902 sh_names = sheets.getElementNames ();
903 if (! iscell (sh_names))
904 # Java array (LibreOffice 3.4.+); convert to cellstr
905 sh_names = char (sh_names);
907 sh_names = {sh_names};
910 # Check sheet pointer
912 if (wsh < 1 || wsh > numel (sh_names))
913 error ("Sheet index %d out of range 1-%d", wsh, numel (sh_names));
916 ii = strmatch (wsh, sh_names);
917 if (isempty (ii)), error ("Sheet '%s' not found", wsh); endif
920 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.sheet.XSpreadsheet');
921 sh = sheets.getByName(sh_names{wsh}).getObject.queryInterface (unotmp);
923 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.sheet.XCellRangesQuery');
924 xRQ = sh.queryInterface (unotmp);
925 # Get cell ranges of all rectangles containing data. Type values:
926 #java_get ('com.sun.star.sheet.CellFlags', 'VALUE') ans = 1
927 #java_get ('com.sun.star.sheet.CellFlags', 'DATETIME') ans = 2
928 #java_get ('com.sun.star.sheet.CellFlags', 'STRING') ans = 4
929 #java_get ('com.sun.star.sheet.CellFlags', 'FORMULA') ans = 16
930 # Yep, boolean is lacking...
931 Cellflgs = javaObject ("java.lang.Short", "23");
932 ccells = xRQ.queryContentCells (Cellflgs);
933 addrs = ccells.getRangeAddressesAsString ();
935 # Strip sheet name from addresses
936 adrblks = strsplit (addrs, ',');
937 if (isempty (adrblks))
938 warning ('Sheet %s contains no data', sh_names{wsh});
942 # Either parse (given cell range) or prepare (unknown range) help variables.
943 # As OpenOffice knows the occupied range, we need the limits anyway to avoid
944 # out-of-range errors
945 [ trow, brow, lcol, rcol ] = getusedrange (xls, wsh);
946 if (isempty (datrange))
947 nrows = brow - trow + 1; # Number of rows to be read
948 ncols = rcol - lcol + 1; # Number of columns to be read
950 [dummy, nrows, ncols, srow, scol] = parse_sp_range (datrange);
951 # Truncate range silently if needed
952 brow = min (srow + nrows - 1, brow);
953 rcol = min (scol + ncols - 1, rcol);
954 trow = max (trow, srow);
955 lcol = max (lcol, scol);
956 nrows = min (brow - trow + 1, nrows); # Number of rows to be read
957 ncols = min (rcol - lcol + 1, ncols); # Number of columns to be read
959 # Create storage for data at Octave side
960 rawarr = cell (nrows, ncols);
962 # Get data. Apparently row & column indices are 0-based in UNO
965 XCell = sh.getCellByPosition (jj, ii);
966 cType = XCell.getType().getValue ();
969 rawarr{ii-trow+2, jj-lcol+2} = XCell.getValue ();
971 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.text.XText');
972 rawarr{ii-trow+2, jj-lcol+2} = XCell.queryInterface (unotmp).getString ();
974 if (spsh_opts.formulas_as_text)
975 rawarr{ii-trow+2, jj-lcol+2} = XCell.getFormula ();
977 # Unfortunately OOo gives no clue as to the type of formula result
978 unotmp = java_new ('com.sun.star.uno.Type', 'com.sun.star.text.XText');
979 rawarr{ii-trow+2, jj-lcol+2} = XCell.queryInterface (unotmp).getString ();
980 tmp = str2double (rawarr{ii-trow+2, jj-lcol+2});
981 # If the string happens to contain just a number we'll assume it is numeric
982 if (~isnan (tmp)); rawarr{ii-trow+2, jj-lcol+2} = tmp; endif
990 # Keep track of data rectangle limits
991 xls.limits = [lcol, rcol; trow, brow];
993 rstatus = ~isempty (rawarr);