]> Creatis software - clitk.git/blob - utilities/pugixml/pugixml.cpp
Merge branch 'master' of git://git.creatis.insa-lyon.fr/clitk
[clitk.git] / utilities / pugixml / pugixml.cpp
1 /**
2  * pugixml parser - version 1.9
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16
17 #include "pugixml.hpp"
18
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24
25 #ifdef PUGIXML_WCHAR_MODE
26 #       include <wchar.h>
27 #endif
28
29 #ifndef PUGIXML_NO_XPATH
30 #       include <math.h>
31 #       include <float.h>
32 #endif
33
34 #ifndef PUGIXML_NO_STL
35 #       include <istream>
36 #       include <ostream>
37 #       include <string>
38 #endif
39
40 // For placement new
41 #include <new>
42
43 #ifdef _MSC_VER
44 #       pragma warning(push)
45 #       pragma warning(disable: 4127) // conditional expression is constant
46 #       pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47 #       pragma warning(disable: 4702) // unreachable code
48 #       pragma warning(disable: 4996) // this function or variable may be unsafe
49 #endif
50
51 #if defined(_MSC_VER) && defined(__c2__)
52 #       pragma clang diagnostic push
53 #       pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
54 #endif
55
56 #ifdef __INTEL_COMPILER
57 #       pragma warning(disable: 177) // function was declared but never referenced
58 #       pragma warning(disable: 279) // controlling expression is constant
59 #       pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 #       pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 #       pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66
67 #ifdef __BORLANDC__
68 #       pragma option push
69 #       pragma warn -8008 // condition is always false
70 #       pragma warn -8066 // unreachable code
71 #endif
72
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 #       pragma diag_suppress=178 // function was declared but never referenced
76 #       pragma diag_suppress=237 // controlling expression is constant
77 #endif
78
79 #ifdef __TI_COMPILER_VERSION__
80 #       pragma diag_suppress 179 // function was declared but never referenced
81 #endif
82
83 // Inlining controls
84 #if defined(_MSC_VER) && _MSC_VER >= 1300
85 #       define PUGI__NO_INLINE __declspec(noinline)
86 #elif defined(__GNUC__)
87 #       define PUGI__NO_INLINE __attribute__((noinline))
88 #else
89 #       define PUGI__NO_INLINE
90 #endif
91
92 // Branch weight controls
93 #if defined(__GNUC__) && !defined(__c2__)
94 #       define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
95 #else
96 #       define PUGI__UNLIKELY(cond) (cond)
97 #endif
98
99 // Simple static assertion
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
101
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
103 #ifdef __DMC__
104 #       define PUGI__DMC_VOLATILE volatile
105 #else
106 #       define PUGI__DMC_VOLATILE
107 #endif
108
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110 #if defined(__clang__) && defined(__has_attribute)
111 #       if __has_attribute(no_sanitize)
112 #               define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
113 #       else
114 #               define PUGI__UNSIGNED_OVERFLOW
115 #       endif
116 #else
117 #       define PUGI__UNSIGNED_OVERFLOW
118 #endif
119
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
122 using std::memcpy;
123 using std::memmove;
124 using std::memset;
125 #endif
126
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129 #       define LLONG_MIN (-LLONG_MAX - 1LL)
130 #       define LLONG_MAX __LONG_LONG_MAX__
131 #       define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
132 #endif
133
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135 #if defined(_MSC_VER) && !defined(__S3E__)
136 #       define PUGI__MSVC_CRT_VERSION _MSC_VER
137 #endif
138
139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
140 #if __cplusplus >= 201103
141 #       define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
143 #       define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
144 #else
145 #       define PUGI__SNPRINTF sprintf
146 #endif
147
148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
149 #ifdef PUGIXML_HEADER_ONLY
150 #       define PUGI__NS_BEGIN namespace pugi { namespace impl {
151 #       define PUGI__NS_END } }
152 #       define PUGI__FN inline
153 #       define PUGI__FN_NO_INLINE inline
154 #else
155 #       if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
156 #               define PUGI__NS_BEGIN namespace pugi { namespace impl {
157 #               define PUGI__NS_END } }
158 #       else
159 #               define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
160 #               define PUGI__NS_END } } }
161 #       endif
162 #       define PUGI__FN
163 #       define PUGI__FN_NO_INLINE PUGI__NO_INLINE
164 #endif
165
166 // uintptr_t
167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
168 namespace pugi
169 {
170 #       ifndef _UINTPTR_T_DEFINED
171         typedef size_t uintptr_t;
172 #       endif
173
174         typedef unsigned __int8 uint8_t;
175         typedef unsigned __int16 uint16_t;
176         typedef unsigned __int32 uint32_t;
177 }
178 #else
179 #       include <stdint.h>
180 #endif
181
182 // Memory allocation
183 PUGI__NS_BEGIN
184         PUGI__FN void* default_allocate(size_t size)
185         {
186                 return malloc(size);
187         }
188
189         PUGI__FN void default_deallocate(void* ptr)
190         {
191                 free(ptr);
192         }
193
194         template <typename T>
195         struct xml_memory_management_function_storage
196         {
197                 static allocation_function allocate;
198                 static deallocation_function deallocate;
199         };
200
201         // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
202         // Without a template<> we'll get multiple definitions of the same static
203         template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
204         template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
205
206         typedef xml_memory_management_function_storage<int> xml_memory;
207 PUGI__NS_END
208
209 // String utilities
210 PUGI__NS_BEGIN
211         // Get string length
212         PUGI__FN size_t strlength(const char_t* s)
213         {
214                 assert(s);
215
216         #ifdef PUGIXML_WCHAR_MODE
217                 return wcslen(s);
218         #else
219                 return strlen(s);
220         #endif
221         }
222
223         // Compare two strings
224         PUGI__FN bool strequal(const char_t* src, const char_t* dst)
225         {
226                 assert(src && dst);
227
228         #ifdef PUGIXML_WCHAR_MODE
229                 return wcscmp(src, dst) == 0;
230         #else
231                 return strcmp(src, dst) == 0;
232         #endif
233         }
234
235         // Compare lhs with [rhs_begin, rhs_end)
236         PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
237         {
238                 for (size_t i = 0; i < count; ++i)
239                         if (lhs[i] != rhs[i])
240                                 return false;
241
242                 return lhs[count] == 0;
243         }
244
245         // Get length of wide string, even if CRT lacks wide character support
246         PUGI__FN size_t strlength_wide(const wchar_t* s)
247         {
248                 assert(s);
249
250         #ifdef PUGIXML_WCHAR_MODE
251                 return wcslen(s);
252         #else
253                 const wchar_t* end = s;
254                 while (*end) end++;
255                 return static_cast<size_t>(end - s);
256         #endif
257         }
258 PUGI__NS_END
259
260 // auto_ptr-like object for exception recovery
261 PUGI__NS_BEGIN
262         template <typename T> struct auto_deleter
263         {
264                 typedef void (*D)(T*);
265
266                 T* data;
267                 D deleter;
268
269                 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
270                 {
271                 }
272
273                 ~auto_deleter()
274                 {
275                         if (data) deleter(data);
276                 }
277
278                 T* release()
279                 {
280                         T* result = data;
281                         data = 0;
282                         return result;
283                 }
284         };
285 PUGI__NS_END
286
287 #ifdef PUGIXML_COMPACT
288 PUGI__NS_BEGIN
289         class compact_hash_table
290         {
291         public:
292                 compact_hash_table(): _items(0), _capacity(0), _count(0)
293                 {
294                 }
295
296                 void clear()
297                 {
298                         if (_items)
299                         {
300                                 xml_memory::deallocate(_items);
301                                 _items = 0;
302                                 _capacity = 0;
303                                 _count = 0;
304                         }
305                 }
306
307                 void* find(const void* key)
308                 {
309                         if (_capacity == 0) return 0;
310
311                         item_t* item = get_item(key);
312                         assert(item);
313                         assert(item->key == key || (item->key == 0 && item->value == 0));
314
315                         return item->value;
316                 }
317
318                 void insert(const void* key, void* value)
319                 {
320                         assert(_capacity != 0 && _count < _capacity - _capacity / 4);
321
322                         item_t* item = get_item(key);
323                         assert(item);
324
325                         if (item->key == 0)
326                         {
327                                 _count++;
328                                 item->key = key;
329                         }
330
331                         item->value = value;
332                 }
333
334                 bool reserve(size_t extra = 16)
335                 {
336                         if (_count + extra >= _capacity - _capacity / 4)
337                                 return rehash(_count + extra);
338
339                         return true;
340                 }
341
342         private:
343                 struct item_t
344                 {
345                         const void* key;
346                         void* value;
347                 };
348
349                 item_t* _items;
350                 size_t _capacity;
351
352                 size_t _count;
353
354                 bool rehash(size_t count);
355
356                 item_t* get_item(const void* key)
357                 {
358                         assert(key);
359                         assert(_capacity > 0);
360
361                         size_t hashmod = _capacity - 1;
362                         size_t bucket = hash(key) & hashmod;
363
364                         for (size_t probe = 0; probe <= hashmod; ++probe)
365                         {
366                                 item_t& probe_item = _items[bucket];
367
368                                 if (probe_item.key == key || probe_item.key == 0)
369                                         return &probe_item;
370
371                                 // hash collision, quadratic probing
372                                 bucket = (bucket + probe + 1) & hashmod;
373                         }
374
375                         assert(false && "Hash table is full"); // unreachable
376                         return 0;
377                 }
378
379                 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
380                 {
381                         unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
382
383                         // MurmurHash3 32-bit finalizer
384                         h ^= h >> 16;
385                         h *= 0x85ebca6bu;
386                         h ^= h >> 13;
387                         h *= 0xc2b2ae35u;
388                         h ^= h >> 16;
389
390                         return h;
391                 }
392         };
393
394         PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
395         {
396                 size_t capacity = 32;
397                 while (count >= capacity - capacity / 4)
398                         capacity *= 2;
399
400                 compact_hash_table rt;
401                 rt._capacity = capacity;
402                 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
403
404                 if (!rt._items)
405                         return false;
406
407                 memset(rt._items, 0, sizeof(item_t) * capacity);
408
409                 for (size_t i = 0; i < _capacity; ++i)
410                         if (_items[i].key)
411                                 rt.insert(_items[i].key, _items[i].value);
412
413                 if (_items)
414                         xml_memory::deallocate(_items);
415
416                 _capacity = capacity;
417                 _items = rt._items;
418
419                 assert(_count == rt._count);
420
421                 return true;
422         }
423
424 PUGI__NS_END
425 #endif
426
427 PUGI__NS_BEGIN
428 #ifdef PUGIXML_COMPACT
429         static const uintptr_t xml_memory_block_alignment = 4;
430 #else
431         static const uintptr_t xml_memory_block_alignment = sizeof(void*);
432 #endif
433
434         // extra metadata bits
435         static const uintptr_t xml_memory_page_contents_shared_mask = 64;
436         static const uintptr_t xml_memory_page_name_allocated_mask = 32;
437         static const uintptr_t xml_memory_page_value_allocated_mask = 16;
438         static const uintptr_t xml_memory_page_type_mask = 15;
439
440         // combined masks for string uniqueness
441         static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
442         static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
443
444 #ifdef PUGIXML_COMPACT
445         #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
446         #define PUGI__GETPAGE_IMPL(header) (header).get_page()
447 #else
448         #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
449         // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
450         #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
451 #endif
452
453         #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
454         #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
455
456         struct xml_allocator;
457
458         struct xml_memory_page
459         {
460                 static xml_memory_page* construct(void* memory)
461                 {
462                         xml_memory_page* result = static_cast<xml_memory_page*>(memory);
463
464                         result->allocator = 0;
465                         result->prev = 0;
466                         result->next = 0;
467                         result->busy_size = 0;
468                         result->freed_size = 0;
469
470                 #ifdef PUGIXML_COMPACT
471                         result->compact_string_base = 0;
472                         result->compact_shared_parent = 0;
473                         result->compact_page_marker = 0;
474                 #endif
475
476                         return result;
477                 }
478
479                 xml_allocator* allocator;
480
481                 xml_memory_page* prev;
482                 xml_memory_page* next;
483
484                 size_t busy_size;
485                 size_t freed_size;
486
487         #ifdef PUGIXML_COMPACT
488                 char_t* compact_string_base;
489                 void* compact_shared_parent;
490                 uint32_t* compact_page_marker;
491         #endif
492         };
493
494         static const size_t xml_memory_page_size =
495         #ifdef PUGIXML_MEMORY_PAGE_SIZE
496                 (PUGIXML_MEMORY_PAGE_SIZE)
497         #else
498                 32768
499         #endif
500                 - sizeof(xml_memory_page);
501
502         struct xml_memory_string_header
503         {
504                 uint16_t page_offset; // offset from page->data
505                 uint16_t full_size; // 0 if string occupies whole page
506         };
507
508         struct xml_allocator
509         {
510                 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
511                 {
512                 #ifdef PUGIXML_COMPACT
513                         _hash = 0;
514                 #endif
515                 }
516
517                 xml_memory_page* allocate_page(size_t data_size)
518                 {
519                         size_t size = sizeof(xml_memory_page) + data_size;
520
521                         // allocate block with some alignment, leaving memory for worst-case padding
522                         void* memory = xml_memory::allocate(size);
523                         if (!memory) return 0;
524
525                         // prepare page structure
526                         xml_memory_page* page = xml_memory_page::construct(memory);
527                         assert(page);
528
529                         page->allocator = _root->allocator;
530
531                         return page;
532                 }
533
534                 static void deallocate_page(xml_memory_page* page)
535                 {
536                         xml_memory::deallocate(page);
537                 }
538
539                 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
540
541                 void* allocate_memory(size_t size, xml_memory_page*& out_page)
542                 {
543                         if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
544                                 return allocate_memory_oob(size, out_page);
545
546                         void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
547
548                         _busy_size += size;
549
550                         out_page = _root;
551
552                         return buf;
553                 }
554
555         #ifdef PUGIXML_COMPACT
556                 void* allocate_object(size_t size, xml_memory_page*& out_page)
557                 {
558                         void* result = allocate_memory(size + sizeof(uint32_t), out_page);
559                         if (!result) return 0;
560
561                         // adjust for marker
562                         ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
563
564                         if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
565                         {
566                                 // insert new marker
567                                 uint32_t* marker = static_cast<uint32_t*>(result);
568
569                                 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
570                                 out_page->compact_page_marker = marker;
571
572                                 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
573                                 // this will make sure deallocate_memory correctly tracks the size
574                                 out_page->freed_size += sizeof(uint32_t);
575
576                                 return marker + 1;
577                         }
578                         else
579                         {
580                                 // roll back uint32_t part
581                                 _busy_size -= sizeof(uint32_t);
582
583                                 return result;
584                         }
585                 }
586         #else
587                 void* allocate_object(size_t size, xml_memory_page*& out_page)
588                 {
589                         return allocate_memory(size, out_page);
590                 }
591         #endif
592
593                 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
594                 {
595                         if (page == _root) page->busy_size = _busy_size;
596
597                         assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
598                         (void)!ptr;
599
600                         page->freed_size += size;
601                         assert(page->freed_size <= page->busy_size);
602
603                         if (page->freed_size == page->busy_size)
604                         {
605                                 if (page->next == 0)
606                                 {
607                                         assert(_root == page);
608
609                                         // top page freed, just reset sizes
610                                         page->busy_size = 0;
611                                         page->freed_size = 0;
612
613                                 #ifdef PUGIXML_COMPACT
614                                         // reset compact state to maximize efficiency
615                                         page->compact_string_base = 0;
616                                         page->compact_shared_parent = 0;
617                                         page->compact_page_marker = 0;
618                                 #endif
619
620                                         _busy_size = 0;
621                                 }
622                                 else
623                                 {
624                                         assert(_root != page);
625                                         assert(page->prev);
626
627                                         // remove from the list
628                                         page->prev->next = page->next;
629                                         page->next->prev = page->prev;
630
631                                         // deallocate
632                                         deallocate_page(page);
633                                 }
634                         }
635                 }
636
637                 char_t* allocate_string(size_t length)
638                 {
639                         static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
640
641                         PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
642
643                         // allocate memory for string and header block
644                         size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
645
646                         // round size up to block alignment boundary
647                         size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
648
649                         xml_memory_page* page;
650                         xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
651
652                         if (!header) return 0;
653
654                         // setup header
655                         ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
656
657                         assert(page_offset % xml_memory_block_alignment == 0);
658                         assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
659                         header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
660
661                         // full_size == 0 for large strings that occupy the whole page
662                         assert(full_size % xml_memory_block_alignment == 0);
663                         assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
664                         header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
665
666                         // round-trip through void* to avoid 'cast increases required alignment of target type' warning
667                         // header is guaranteed a pointer-sized alignment, which should be enough for char_t
668                         return static_cast<char_t*>(static_cast<void*>(header + 1));
669                 }
670
671                 void deallocate_string(char_t* string)
672                 {
673                         // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
674                         // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
675
676                         // get header
677                         xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
678                         assert(header);
679
680                         // deallocate
681                         size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
682                         xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
683
684                         // if full_size == 0 then this string occupies the whole page
685                         size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
686
687                         deallocate_memory(header, full_size, page);
688                 }
689
690                 bool reserve()
691                 {
692                 #ifdef PUGIXML_COMPACT
693                         return _hash->reserve();
694                 #else
695                         return true;
696                 #endif
697                 }
698
699                 xml_memory_page* _root;
700                 size_t _busy_size;
701
702         #ifdef PUGIXML_COMPACT
703                 compact_hash_table* _hash;
704         #endif
705         };
706
707         PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
708         {
709                 const size_t large_allocation_threshold = xml_memory_page_size / 4;
710
711                 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
712                 out_page = page;
713
714                 if (!page) return 0;
715
716                 if (size <= large_allocation_threshold)
717                 {
718                         _root->busy_size = _busy_size;
719
720                         // insert page at the end of linked list
721                         page->prev = _root;
722                         _root->next = page;
723                         _root = page;
724
725                         _busy_size = size;
726                 }
727                 else
728                 {
729                         // insert page before the end of linked list, so that it is deleted as soon as possible
730                         // the last page is not deleted even if it's empty (see deallocate_memory)
731                         assert(_root->prev);
732
733                         page->prev = _root->prev;
734                         page->next = _root;
735
736                         _root->prev->next = page;
737                         _root->prev = page;
738
739                         page->busy_size = size;
740                 }
741
742                 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
743         }
744 PUGI__NS_END
745
746 #ifdef PUGIXML_COMPACT
747 PUGI__NS_BEGIN
748         static const uintptr_t compact_alignment_log2 = 2;
749         static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
750
751         class compact_header
752         {
753         public:
754                 compact_header(xml_memory_page* page, unsigned int flags)
755                 {
756                         PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
757
758                         ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
759                         assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
760
761                         _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
762                         _flags = static_cast<unsigned char>(flags);
763                 }
764
765                 void operator&=(uintptr_t mod)
766                 {
767                         _flags &= static_cast<unsigned char>(mod);
768                 }
769
770                 void operator|=(uintptr_t mod)
771                 {
772                         _flags |= static_cast<unsigned char>(mod);
773                 }
774
775                 uintptr_t operator&(uintptr_t mod) const
776                 {
777                         return _flags & mod;
778                 }
779
780                 xml_memory_page* get_page() const
781                 {
782                         // round-trip through void* to silence 'cast increases required alignment of target type' warnings
783                         const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
784                         const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
785
786                         return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
787                 }
788
789         private:
790                 unsigned char _page;
791                 unsigned char _flags;
792         };
793
794         PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
795         {
796                 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
797
798                 return header->get_page();
799         }
800
801         template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
802         {
803                 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
804         }
805
806         template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
807         {
808                 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
809         }
810
811         template <typename T, int header_offset, int start = -126> class compact_pointer
812         {
813         public:
814                 compact_pointer(): _data(0)
815                 {
816                 }
817
818                 void operator=(const compact_pointer& rhs)
819                 {
820                         *this = rhs + 0;
821                 }
822
823                 void operator=(T* value)
824                 {
825                         if (value)
826                         {
827                                 // value is guaranteed to be compact-aligned; 'this' is not
828                                 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
829                                 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
830                                 // compensate for arithmetic shift rounding for negative values
831                                 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
832                                 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
833
834                                 if (static_cast<uintptr_t>(offset) <= 253)
835                                         _data = static_cast<unsigned char>(offset + 1);
836                                 else
837                                 {
838                                         compact_set_value<header_offset>(this, value);
839
840                                         _data = 255;
841                                 }
842                         }
843                         else
844                                 _data = 0;
845                 }
846
847                 operator T*() const
848                 {
849                         if (_data)
850                         {
851                                 if (_data < 255)
852                                 {
853                                         uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
854
855                                         return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
856                                 }
857                                 else
858                                         return compact_get_value<header_offset, T>(this);
859                         }
860                         else
861                                 return 0;
862                 }
863
864                 T* operator->() const
865                 {
866                         return *this;
867                 }
868
869         private:
870                 unsigned char _data;
871         };
872
873         template <typename T, int header_offset> class compact_pointer_parent
874         {
875         public:
876                 compact_pointer_parent(): _data(0)
877                 {
878                 }
879
880                 void operator=(const compact_pointer_parent& rhs)
881                 {
882                         *this = rhs + 0;
883                 }
884
885                 void operator=(T* value)
886                 {
887                         if (value)
888                         {
889                                 // value is guaranteed to be compact-aligned; 'this' is not
890                                 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
891                                 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
892                                 // compensate for arithmetic shift behavior for negative values
893                                 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
894                                 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
895
896                                 if (static_cast<uintptr_t>(offset) <= 65533)
897                                 {
898                                         _data = static_cast<unsigned short>(offset + 1);
899                                 }
900                                 else
901                                 {
902                                         xml_memory_page* page = compact_get_page(this, header_offset);
903
904                                         if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
905                                                 page->compact_shared_parent = value;
906
907                                         if (page->compact_shared_parent == value)
908                                         {
909                                                 _data = 65534;
910                                         }
911                                         else
912                                         {
913                                                 compact_set_value<header_offset>(this, value);
914
915                                                 _data = 65535;
916                                         }
917                                 }
918                         }
919                         else
920                         {
921                                 _data = 0;
922                         }
923                 }
924
925                 operator T*() const
926                 {
927                         if (_data)
928                         {
929                                 if (_data < 65534)
930                                 {
931                                         uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
932
933                                         return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
934                                 }
935                                 else if (_data == 65534)
936                                         return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
937                                 else
938                                         return compact_get_value<header_offset, T>(this);
939                         }
940                         else
941                                 return 0;
942                 }
943
944                 T* operator->() const
945                 {
946                         return *this;
947                 }
948
949         private:
950                 uint16_t _data;
951         };
952
953         template <int header_offset, int base_offset> class compact_string
954         {
955         public:
956                 compact_string(): _data(0)
957                 {
958                 }
959
960                 void operator=(const compact_string& rhs)
961                 {
962                         *this = rhs + 0;
963                 }
964
965                 void operator=(char_t* value)
966                 {
967                         if (value)
968                         {
969                                 xml_memory_page* page = compact_get_page(this, header_offset);
970
971                                 if (PUGI__UNLIKELY(page->compact_string_base == 0))
972                                         page->compact_string_base = value;
973
974                                 ptrdiff_t offset = value - page->compact_string_base;
975
976                                 if (static_cast<uintptr_t>(offset) < (65535 << 7))
977                                 {
978                                         // round-trip through void* to silence 'cast increases required alignment of target type' warnings
979                                         uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
980
981                                         if (*base == 0)
982                                         {
983                                                 *base = static_cast<uint16_t>((offset >> 7) + 1);
984                                                 _data = static_cast<unsigned char>((offset & 127) + 1);
985                                         }
986                                         else
987                                         {
988                                                 ptrdiff_t remainder = offset - ((*base - 1) << 7);
989
990                                                 if (static_cast<uintptr_t>(remainder) <= 253)
991                                                 {
992                                                         _data = static_cast<unsigned char>(remainder + 1);
993                                                 }
994                                                 else
995                                                 {
996                                                         compact_set_value<header_offset>(this, value);
997
998                                                         _data = 255;
999                                                 }
1000                                         }
1001                                 }
1002                                 else
1003                                 {
1004                                         compact_set_value<header_offset>(this, value);
1005
1006                                         _data = 255;
1007                                 }
1008                         }
1009                         else
1010                         {
1011                                 _data = 0;
1012                         }
1013                 }
1014
1015                 operator char_t*() const
1016                 {
1017                         if (_data)
1018                         {
1019                                 if (_data < 255)
1020                                 {
1021                                         xml_memory_page* page = compact_get_page(this, header_offset);
1022
1023                                         // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1024                                         const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1025                                         assert(*base);
1026
1027                                         ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1028
1029                                         return page->compact_string_base + offset;
1030                                 }
1031                                 else
1032                                 {
1033                                         return compact_get_value<header_offset, char_t>(this);
1034                                 }
1035                         }
1036                         else
1037                                 return 0;
1038                 }
1039
1040         private:
1041                 unsigned char _data;
1042         };
1043 PUGI__NS_END
1044 #endif
1045
1046 #ifdef PUGIXML_COMPACT
1047 namespace pugi
1048 {
1049         struct xml_attribute_struct
1050         {
1051                 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1052                 {
1053                         PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1054                 }
1055
1056                 impl::compact_header header;
1057
1058                 uint16_t namevalue_base;
1059
1060                 impl::compact_string<4, 2> name;
1061                 impl::compact_string<5, 3> value;
1062
1063                 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1064                 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1065         };
1066
1067         struct xml_node_struct
1068         {
1069                 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1070                 {
1071                         PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1072                 }
1073
1074                 impl::compact_header header;
1075
1076                 uint16_t namevalue_base;
1077
1078                 impl::compact_string<4, 2> name;
1079                 impl::compact_string<5, 3> value;
1080
1081                 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1082
1083                 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1084
1085                 impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
1086                 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1087
1088                 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1089         };
1090 }
1091 #else
1092 namespace pugi
1093 {
1094         struct xml_attribute_struct
1095         {
1096                 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1097                 {
1098                         header = PUGI__GETHEADER_IMPL(this, page, 0);
1099                 }
1100
1101                 uintptr_t header;
1102
1103                 char_t* name;
1104                 char_t* value;
1105
1106                 xml_attribute_struct* prev_attribute_c;
1107                 xml_attribute_struct* next_attribute;
1108         };
1109
1110         struct xml_node_struct
1111         {
1112                 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1113                 {
1114                         header = PUGI__GETHEADER_IMPL(this, page, type);
1115                 }
1116
1117                 uintptr_t header;
1118
1119                 char_t* name;
1120                 char_t* value;
1121
1122                 xml_node_struct* parent;
1123
1124                 xml_node_struct* first_child;
1125
1126                 xml_node_struct* prev_sibling_c;
1127                 xml_node_struct* next_sibling;
1128
1129                 xml_attribute_struct* first_attribute;
1130         };
1131 }
1132 #endif
1133
1134 PUGI__NS_BEGIN
1135         struct xml_extra_buffer
1136         {
1137                 char_t* buffer;
1138                 xml_extra_buffer* next;
1139         };
1140
1141         struct xml_document_struct: public xml_node_struct, public xml_allocator
1142         {
1143                 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1144                 {
1145                 }
1146
1147                 const char_t* buffer;
1148
1149                 xml_extra_buffer* extra_buffers;
1150
1151         #ifdef PUGIXML_COMPACT
1152                 compact_hash_table hash;
1153         #endif
1154         };
1155
1156         template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1157         {
1158                 assert(object);
1159
1160                 return *PUGI__GETPAGE(object)->allocator;
1161         }
1162
1163         template <typename Object> inline xml_document_struct& get_document(const Object* object)
1164         {
1165                 assert(object);
1166
1167                 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1168         }
1169 PUGI__NS_END
1170
1171 // Low-level DOM operations
1172 PUGI__NS_BEGIN
1173         inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1174         {
1175                 xml_memory_page* page;
1176                 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1177                 if (!memory) return 0;
1178
1179                 return new (memory) xml_attribute_struct(page);
1180         }
1181
1182         inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1183         {
1184                 xml_memory_page* page;
1185                 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1186                 if (!memory) return 0;
1187
1188                 return new (memory) xml_node_struct(page, type);
1189         }
1190
1191         inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1192         {
1193                 if (a->header & impl::xml_memory_page_name_allocated_mask)
1194                         alloc.deallocate_string(a->name);
1195
1196                 if (a->header & impl::xml_memory_page_value_allocated_mask)
1197                         alloc.deallocate_string(a->value);
1198
1199                 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1200         }
1201
1202         inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1203         {
1204                 if (n->header & impl::xml_memory_page_name_allocated_mask)
1205                         alloc.deallocate_string(n->name);
1206
1207                 if (n->header & impl::xml_memory_page_value_allocated_mask)
1208                         alloc.deallocate_string(n->value);
1209
1210                 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1211                 {
1212                         xml_attribute_struct* next = attr->next_attribute;
1213
1214                         destroy_attribute(attr, alloc);
1215
1216                         attr = next;
1217                 }
1218
1219                 for (xml_node_struct* child = n->first_child; child; )
1220                 {
1221                         xml_node_struct* next = child->next_sibling;
1222
1223                         destroy_node(child, alloc);
1224
1225                         child = next;
1226                 }
1227
1228                 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1229         }
1230
1231         inline void append_node(xml_node_struct* child, xml_node_struct* node)
1232         {
1233                 child->parent = node;
1234
1235                 xml_node_struct* head = node->first_child;
1236
1237                 if (head)
1238                 {
1239                         xml_node_struct* tail = head->prev_sibling_c;
1240
1241                         tail->next_sibling = child;
1242                         child->prev_sibling_c = tail;
1243                         head->prev_sibling_c = child;
1244                 }
1245                 else
1246                 {
1247                         node->first_child = child;
1248                         child->prev_sibling_c = child;
1249                 }
1250         }
1251
1252         inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1253         {
1254                 child->parent = node;
1255
1256                 xml_node_struct* head = node->first_child;
1257
1258                 if (head)
1259                 {
1260                         child->prev_sibling_c = head->prev_sibling_c;
1261                         head->prev_sibling_c = child;
1262                 }
1263                 else
1264                         child->prev_sibling_c = child;
1265
1266                 child->next_sibling = head;
1267                 node->first_child = child;
1268         }
1269
1270         inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1271         {
1272                 xml_node_struct* parent = node->parent;
1273
1274                 child->parent = parent;
1275
1276                 if (node->next_sibling)
1277                         node->next_sibling->prev_sibling_c = child;
1278                 else
1279                         parent->first_child->prev_sibling_c = child;
1280
1281                 child->next_sibling = node->next_sibling;
1282                 child->prev_sibling_c = node;
1283
1284                 node->next_sibling = child;
1285         }
1286
1287         inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1288         {
1289                 xml_node_struct* parent = node->parent;
1290
1291                 child->parent = parent;
1292
1293                 if (node->prev_sibling_c->next_sibling)
1294                         node->prev_sibling_c->next_sibling = child;
1295                 else
1296                         parent->first_child = child;
1297
1298                 child->prev_sibling_c = node->prev_sibling_c;
1299                 child->next_sibling = node;
1300
1301                 node->prev_sibling_c = child;
1302         }
1303
1304         inline void remove_node(xml_node_struct* node)
1305         {
1306                 xml_node_struct* parent = node->parent;
1307
1308                 if (node->next_sibling)
1309                         node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1310                 else
1311                         parent->first_child->prev_sibling_c = node->prev_sibling_c;
1312
1313                 if (node->prev_sibling_c->next_sibling)
1314                         node->prev_sibling_c->next_sibling = node->next_sibling;
1315                 else
1316                         parent->first_child = node->next_sibling;
1317
1318                 node->parent = 0;
1319                 node->prev_sibling_c = 0;
1320                 node->next_sibling = 0;
1321         }
1322
1323         inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1324         {
1325                 xml_attribute_struct* head = node->first_attribute;
1326
1327                 if (head)
1328                 {
1329                         xml_attribute_struct* tail = head->prev_attribute_c;
1330
1331                         tail->next_attribute = attr;
1332                         attr->prev_attribute_c = tail;
1333                         head->prev_attribute_c = attr;
1334                 }
1335                 else
1336                 {
1337                         node->first_attribute = attr;
1338                         attr->prev_attribute_c = attr;
1339                 }
1340         }
1341
1342         inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1343         {
1344                 xml_attribute_struct* head = node->first_attribute;
1345
1346                 if (head)
1347                 {
1348                         attr->prev_attribute_c = head->prev_attribute_c;
1349                         head->prev_attribute_c = attr;
1350                 }
1351                 else
1352                         attr->prev_attribute_c = attr;
1353
1354                 attr->next_attribute = head;
1355                 node->first_attribute = attr;
1356         }
1357
1358         inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1359         {
1360                 if (place->next_attribute)
1361                         place->next_attribute->prev_attribute_c = attr;
1362                 else
1363                         node->first_attribute->prev_attribute_c = attr;
1364
1365                 attr->next_attribute = place->next_attribute;
1366                 attr->prev_attribute_c = place;
1367                 place->next_attribute = attr;
1368         }
1369
1370         inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1371         {
1372                 if (place->prev_attribute_c->next_attribute)
1373                         place->prev_attribute_c->next_attribute = attr;
1374                 else
1375                         node->first_attribute = attr;
1376
1377                 attr->prev_attribute_c = place->prev_attribute_c;
1378                 attr->next_attribute = place;
1379                 place->prev_attribute_c = attr;
1380         }
1381
1382         inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1383         {
1384                 if (attr->next_attribute)
1385                         attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1386                 else
1387                         node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1388
1389                 if (attr->prev_attribute_c->next_attribute)
1390                         attr->prev_attribute_c->next_attribute = attr->next_attribute;
1391                 else
1392                         node->first_attribute = attr->next_attribute;
1393
1394                 attr->prev_attribute_c = 0;
1395                 attr->next_attribute = 0;
1396         }
1397
1398         PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1399         {
1400                 if (!alloc.reserve()) return 0;
1401
1402                 xml_node_struct* child = allocate_node(alloc, type);
1403                 if (!child) return 0;
1404
1405                 append_node(child, node);
1406
1407                 return child;
1408         }
1409
1410         PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1411         {
1412                 if (!alloc.reserve()) return 0;
1413
1414                 xml_attribute_struct* attr = allocate_attribute(alloc);
1415                 if (!attr) return 0;
1416
1417                 append_attribute(attr, node);
1418
1419                 return attr;
1420         }
1421 PUGI__NS_END
1422
1423 // Helper classes for code generation
1424 PUGI__NS_BEGIN
1425         struct opt_false
1426         {
1427                 enum { value = 0 };
1428         };
1429
1430         struct opt_true
1431         {
1432                 enum { value = 1 };
1433         };
1434 PUGI__NS_END
1435
1436 // Unicode utilities
1437 PUGI__NS_BEGIN
1438         inline uint16_t endian_swap(uint16_t value)
1439         {
1440                 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1441         }
1442
1443         inline uint32_t endian_swap(uint32_t value)
1444         {
1445                 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1446         }
1447
1448         struct utf8_counter
1449         {
1450                 typedef size_t value_type;
1451
1452                 static value_type low(value_type result, uint32_t ch)
1453                 {
1454                         // U+0000..U+007F
1455                         if (ch < 0x80) return result + 1;
1456                         // U+0080..U+07FF
1457                         else if (ch < 0x800) return result + 2;
1458                         // U+0800..U+FFFF
1459                         else return result + 3;
1460                 }
1461
1462                 static value_type high(value_type result, uint32_t)
1463                 {
1464                         // U+10000..U+10FFFF
1465                         return result + 4;
1466                 }
1467         };
1468
1469         struct utf8_writer
1470         {
1471                 typedef uint8_t* value_type;
1472
1473                 static value_type low(value_type result, uint32_t ch)
1474                 {
1475                         // U+0000..U+007F
1476                         if (ch < 0x80)
1477                         {
1478                                 *result = static_cast<uint8_t>(ch);
1479                                 return result + 1;
1480                         }
1481                         // U+0080..U+07FF
1482                         else if (ch < 0x800)
1483                         {
1484                                 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1485                                 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1486                                 return result + 2;
1487                         }
1488                         // U+0800..U+FFFF
1489                         else
1490                         {
1491                                 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1492                                 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1493                                 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1494                                 return result + 3;
1495                         }
1496                 }
1497
1498                 static value_type high(value_type result, uint32_t ch)
1499                 {
1500                         // U+10000..U+10FFFF
1501                         result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1502                         result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1503                         result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1504                         result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1505                         return result + 4;
1506                 }
1507
1508                 static value_type any(value_type result, uint32_t ch)
1509                 {
1510                         return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1511                 }
1512         };
1513
1514         struct utf16_counter
1515         {
1516                 typedef size_t value_type;
1517
1518                 static value_type low(value_type result, uint32_t)
1519                 {
1520                         return result + 1;
1521                 }
1522
1523                 static value_type high(value_type result, uint32_t)
1524                 {
1525                         return result + 2;
1526                 }
1527         };
1528
1529         struct utf16_writer
1530         {
1531                 typedef uint16_t* value_type;
1532
1533                 static value_type low(value_type result, uint32_t ch)
1534                 {
1535                         *result = static_cast<uint16_t>(ch);
1536
1537                         return result + 1;
1538                 }
1539
1540                 static value_type high(value_type result, uint32_t ch)
1541                 {
1542                         uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1543                         uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1544
1545                         result[0] = static_cast<uint16_t>(0xD800 + msh);
1546                         result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1547
1548                         return result + 2;
1549                 }
1550
1551                 static value_type any(value_type result, uint32_t ch)
1552                 {
1553                         return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1554                 }
1555         };
1556
1557         struct utf32_counter
1558         {
1559                 typedef size_t value_type;
1560
1561                 static value_type low(value_type result, uint32_t)
1562                 {
1563                         return result + 1;
1564                 }
1565
1566                 static value_type high(value_type result, uint32_t)
1567                 {
1568                         return result + 1;
1569                 }
1570         };
1571
1572         struct utf32_writer
1573         {
1574                 typedef uint32_t* value_type;
1575
1576                 static value_type low(value_type result, uint32_t ch)
1577                 {
1578                         *result = ch;
1579
1580                         return result + 1;
1581                 }
1582
1583                 static value_type high(value_type result, uint32_t ch)
1584                 {
1585                         *result = ch;
1586
1587                         return result + 1;
1588                 }
1589
1590                 static value_type any(value_type result, uint32_t ch)
1591                 {
1592                         *result = ch;
1593
1594                         return result + 1;
1595                 }
1596         };
1597
1598         struct latin1_writer
1599         {
1600                 typedef uint8_t* value_type;
1601
1602                 static value_type low(value_type result, uint32_t ch)
1603                 {
1604                         *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1605
1606                         return result + 1;
1607                 }
1608
1609                 static value_type high(value_type result, uint32_t ch)
1610                 {
1611                         (void)ch;
1612
1613                         *result = '?';
1614
1615                         return result + 1;
1616                 }
1617         };
1618
1619         struct utf8_decoder
1620         {
1621                 typedef uint8_t type;
1622
1623                 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1624                 {
1625                         const uint8_t utf8_byte_mask = 0x3f;
1626
1627                         while (size)
1628                         {
1629                                 uint8_t lead = *data;
1630
1631                                 // 0xxxxxxx -> U+0000..U+007F
1632                                 if (lead < 0x80)
1633                                 {
1634                                         result = Traits::low(result, lead);
1635                                         data += 1;
1636                                         size -= 1;
1637
1638                                         // process aligned single-byte (ascii) blocks
1639                                         if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1640                                         {
1641                                                 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1642                                                 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1643                                                 {
1644                                                         result = Traits::low(result, data[0]);
1645                                                         result = Traits::low(result, data[1]);
1646                                                         result = Traits::low(result, data[2]);
1647                                                         result = Traits::low(result, data[3]);
1648                                                         data += 4;
1649                                                         size -= 4;
1650                                                 }
1651                                         }
1652                                 }
1653                                 // 110xxxxx -> U+0080..U+07FF
1654                                 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1655                                 {
1656                                         result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1657                                         data += 2;
1658                                         size -= 2;
1659                                 }
1660                                 // 1110xxxx -> U+0800-U+FFFF
1661                                 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1662                                 {
1663                                         result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1664                                         data += 3;
1665                                         size -= 3;
1666                                 }
1667                                 // 11110xxx -> U+10000..U+10FFFF
1668                                 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1669                                 {
1670                                         result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1671                                         data += 4;
1672                                         size -= 4;
1673                                 }
1674                                 // 10xxxxxx or 11111xxx -> invalid
1675                                 else
1676                                 {
1677                                         data += 1;
1678                                         size -= 1;
1679                                 }
1680                         }
1681
1682                         return result;
1683                 }
1684         };
1685
1686         template <typename opt_swap> struct utf16_decoder
1687         {
1688                 typedef uint16_t type;
1689
1690                 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1691                 {
1692                         while (size)
1693                         {
1694                                 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1695
1696                                 // U+0000..U+D7FF
1697                                 if (lead < 0xD800)
1698                                 {
1699                                         result = Traits::low(result, lead);
1700                                         data += 1;
1701                                         size -= 1;
1702                                 }
1703                                 // U+E000..U+FFFF
1704                                 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1705                                 {
1706                                         result = Traits::low(result, lead);
1707                                         data += 1;
1708                                         size -= 1;
1709                                 }
1710                                 // surrogate pair lead
1711                                 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1712                                 {
1713                                         uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1714
1715                                         if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1716                                         {
1717                                                 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1718                                                 data += 2;
1719                                                 size -= 2;
1720                                         }
1721                                         else
1722                                         {
1723                                                 data += 1;
1724                                                 size -= 1;
1725                                         }
1726                                 }
1727                                 else
1728                                 {
1729                                         data += 1;
1730                                         size -= 1;
1731                                 }
1732                         }
1733
1734                         return result;
1735                 }
1736         };
1737
1738         template <typename opt_swap> struct utf32_decoder
1739         {
1740                 typedef uint32_t type;
1741
1742                 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1743                 {
1744                         while (size)
1745                         {
1746                                 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1747
1748                                 // U+0000..U+FFFF
1749                                 if (lead < 0x10000)
1750                                 {
1751                                         result = Traits::low(result, lead);
1752                                         data += 1;
1753                                         size -= 1;
1754                                 }
1755                                 // U+10000..U+10FFFF
1756                                 else
1757                                 {
1758                                         result = Traits::high(result, lead);
1759                                         data += 1;
1760                                         size -= 1;
1761                                 }
1762                         }
1763
1764                         return result;
1765                 }
1766         };
1767
1768         struct latin1_decoder
1769         {
1770                 typedef uint8_t type;
1771
1772                 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1773                 {
1774                         while (size)
1775                         {
1776                                 result = Traits::low(result, *data);
1777                                 data += 1;
1778                                 size -= 1;
1779                         }
1780
1781                         return result;
1782                 }
1783         };
1784
1785         template <size_t size> struct wchar_selector;
1786
1787         template <> struct wchar_selector<2>
1788         {
1789                 typedef uint16_t type;
1790                 typedef utf16_counter counter;
1791                 typedef utf16_writer writer;
1792                 typedef utf16_decoder<opt_false> decoder;
1793         };
1794
1795         template <> struct wchar_selector<4>
1796         {
1797                 typedef uint32_t type;
1798                 typedef utf32_counter counter;
1799                 typedef utf32_writer writer;
1800                 typedef utf32_decoder<opt_false> decoder;
1801         };
1802
1803         typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1804         typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1805
1806         struct wchar_decoder
1807         {
1808                 typedef wchar_t type;
1809
1810                 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1811                 {
1812                         typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1813
1814                         return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1815                 }
1816         };
1817
1818 #ifdef PUGIXML_WCHAR_MODE
1819         PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1820         {
1821                 for (size_t i = 0; i < length; ++i)
1822                         result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1823         }
1824 #endif
1825 PUGI__NS_END
1826
1827 PUGI__NS_BEGIN
1828         enum chartype_t
1829         {
1830                 ct_parse_pcdata = 1,    // \0, &, \r, <
1831                 ct_parse_attr = 2,              // \0, &, \r, ', "
1832                 ct_parse_attr_ws = 4,   // \0, &, \r, ', ", \n, tab
1833                 ct_space = 8,                   // \r, \n, space, tab
1834                 ct_parse_cdata = 16,    // \0, ], >, \r
1835                 ct_parse_comment = 32,  // \0, -, >, \r
1836                 ct_symbol = 64,                 // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1837                 ct_start_symbol = 128   // Any symbol > 127, a-z, A-Z, _, :
1838         };
1839
1840         static const unsigned char chartype_table[256] =
1841         {
1842                 55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
1843                 0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
1844                 8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
1845                 64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
1846                 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1847                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
1848                 0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1849                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
1850
1851                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
1852                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1853                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1854                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1855                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1856                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1857                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1858                 192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
1859         };
1860
1861         enum chartypex_t
1862         {
1863                 ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1864                 ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1865                 ctx_start_symbol = 4,     // Any symbol > 127, a-z, A-Z, _
1866                 ctx_digit = 8,                    // 0-9
1867                 ctx_symbol = 16                   // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1868         };
1869
1870         static const unsigned char chartypex_table[256] =
1871         {
1872                 3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
1873                 3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
1874                 0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
1875                 24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
1876
1877                 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
1878                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
1879                 0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
1880                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
1881
1882                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
1883                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1884                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1885                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1886                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1887                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1888                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1889                 20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
1890         };
1891
1892 #ifdef PUGIXML_WCHAR_MODE
1893         #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1894 #else
1895         #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1896 #endif
1897
1898         #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1899         #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1900
1901         PUGI__FN bool is_little_endian()
1902         {
1903                 unsigned int ui = 1;
1904
1905                 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1906         }
1907
1908         PUGI__FN xml_encoding get_wchar_encoding()
1909         {
1910                 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1911
1912                 if (sizeof(wchar_t) == 2)
1913                         return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1914                 else
1915                         return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1916         }
1917
1918         PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1919         {
1920         #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1921         #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1922
1923                 // check if we have a non-empty XML declaration
1924                 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1925                         return false;
1926
1927                 // scan XML declaration until the encoding field
1928                 for (size_t i = 6; i + 1 < size; ++i)
1929                 {
1930                         // declaration can not contain ? in quoted values
1931                         if (data[i] == '?')
1932                                 return false;
1933
1934                         if (data[i] == 'e' && data[i + 1] == 'n')
1935                         {
1936                                 size_t offset = i;
1937
1938                                 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1939                                 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1940                                 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1941
1942                                 // S? = S?
1943                                 PUGI__SCANCHARTYPE(ct_space);
1944                                 PUGI__SCANCHAR('=');
1945                                 PUGI__SCANCHARTYPE(ct_space);
1946
1947                                 // the only two valid delimiters are ' and "
1948                                 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1949
1950                                 PUGI__SCANCHAR(delimiter);
1951
1952                                 size_t start = offset;
1953
1954                                 out_encoding = data + offset;
1955
1956                                 PUGI__SCANCHARTYPE(ct_symbol);
1957
1958                                 out_length = offset - start;
1959
1960                                 PUGI__SCANCHAR(delimiter);
1961
1962                                 return true;
1963                         }
1964                 }
1965
1966                 return false;
1967
1968         #undef PUGI__SCANCHAR
1969         #undef PUGI__SCANCHARTYPE
1970         }
1971
1972         PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1973         {
1974                 // skip encoding autodetection if input buffer is too small
1975                 if (size < 4) return encoding_utf8;
1976
1977                 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1978
1979                 // look for BOM in first few bytes
1980                 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1981                 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1982                 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1983                 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1984                 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1985
1986                 // look for <, <? or <?xm in various encodings
1987                 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1988                 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1989                 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1990                 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1991
1992                 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1993                 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1994                 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1995
1996                 // no known BOM detected; parse declaration
1997                 const uint8_t* enc = 0;
1998                 size_t enc_length = 0;
1999
2000                 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2001                 {
2002                         // iso-8859-1 (case-insensitive)
2003                         if (enc_length == 10
2004                                 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2005                                 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2006                                 && enc[8] == '-' && enc[9] == '1')
2007                                 return encoding_latin1;
2008
2009                         // latin1 (case-insensitive)
2010                         if (enc_length == 6
2011                                 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2012                                 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2013                                 && enc[5] == '1')
2014                                 return encoding_latin1;
2015                 }
2016
2017                 return encoding_utf8;
2018         }
2019
2020         PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2021         {
2022                 // replace wchar encoding with utf implementation
2023                 if (encoding == encoding_wchar) return get_wchar_encoding();
2024
2025                 // replace utf16 encoding with utf16 with specific endianness
2026                 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2027
2028                 // replace utf32 encoding with utf32 with specific endianness
2029                 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2030
2031                 // only do autodetection if no explicit encoding is requested
2032                 if (encoding != encoding_auto) return encoding;
2033
2034                 // try to guess encoding (based on XML specification, Appendix F.1)
2035                 const uint8_t* data = static_cast<const uint8_t*>(contents);
2036
2037                 return guess_buffer_encoding(data, size);
2038         }
2039
2040         PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2041         {
2042                 size_t length = size / sizeof(char_t);
2043
2044                 if (is_mutable)
2045                 {
2046                         out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2047                         out_length = length;
2048                 }
2049                 else
2050                 {
2051                         char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2052                         if (!buffer) return false;
2053
2054                         if (contents)
2055                                 memcpy(buffer, contents, length * sizeof(char_t));
2056                         else
2057                                 assert(length == 0);
2058
2059                         buffer[length] = 0;
2060
2061                         out_buffer = buffer;
2062                         out_length = length + 1;
2063                 }
2064
2065                 return true;
2066         }
2067
2068 #ifdef PUGIXML_WCHAR_MODE
2069         PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2070         {
2071                 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2072                            (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2073         }
2074
2075         PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2076         {
2077                 const char_t* data = static_cast<const char_t*>(contents);
2078                 size_t length = size / sizeof(char_t);
2079
2080                 if (is_mutable)
2081                 {
2082                         char_t* buffer = const_cast<char_t*>(data);
2083
2084                         convert_wchar_endian_swap(buffer, data, length);
2085
2086                         out_buffer = buffer;
2087                         out_length = length;
2088                 }
2089                 else
2090                 {
2091                         char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2092                         if (!buffer) return false;
2093
2094                         convert_wchar_endian_swap(buffer, data, length);
2095                         buffer[length] = 0;
2096
2097                         out_buffer = buffer;
2098                         out_length = length + 1;
2099                 }
2100
2101                 return true;
2102         }
2103
2104         template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2105         {
2106                 const typename D::type* data = static_cast<const typename D::type*>(contents);
2107                 size_t data_length = size / sizeof(typename D::type);
2108
2109                 // first pass: get length in wchar_t units
2110                 size_t length = D::process(data, data_length, 0, wchar_counter());
2111
2112                 // allocate buffer of suitable length
2113                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2114                 if (!buffer) return false;
2115
2116                 // second pass: convert utf16 input to wchar_t
2117                 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2118                 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2119
2120                 assert(oend == obegin + length);
2121                 *oend = 0;
2122
2123                 out_buffer = buffer;
2124                 out_length = length + 1;
2125
2126                 return true;
2127         }
2128
2129         PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2130         {
2131                 // get native encoding
2132                 xml_encoding wchar_encoding = get_wchar_encoding();
2133
2134                 // fast path: no conversion required
2135                 if (encoding == wchar_encoding)
2136                         return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2137
2138                 // only endian-swapping is required
2139                 if (need_endian_swap_utf(encoding, wchar_encoding))
2140                         return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2141
2142                 // source encoding is utf8
2143                 if (encoding == encoding_utf8)
2144                         return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2145
2146                 // source encoding is utf16
2147                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2148                 {
2149                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2150
2151                         return (native_encoding == encoding) ?
2152                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2153                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2154                 }
2155
2156                 // source encoding is utf32
2157                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2158                 {
2159                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2160
2161                         return (native_encoding == encoding) ?
2162                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2163                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2164                 }
2165
2166                 // source encoding is latin1
2167                 if (encoding == encoding_latin1)
2168                         return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2169
2170                 assert(false && "Invalid encoding"); // unreachable
2171                 return false;
2172         }
2173 #else
2174         template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2175         {
2176                 const typename D::type* data = static_cast<const typename D::type*>(contents);
2177                 size_t data_length = size / sizeof(typename D::type);
2178
2179                 // first pass: get length in utf8 units
2180                 size_t length = D::process(data, data_length, 0, utf8_counter());
2181
2182                 // allocate buffer of suitable length
2183                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2184                 if (!buffer) return false;
2185
2186                 // second pass: convert utf16 input to utf8
2187                 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2188                 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2189
2190                 assert(oend == obegin + length);
2191                 *oend = 0;
2192
2193                 out_buffer = buffer;
2194                 out_length = length + 1;
2195
2196                 return true;
2197         }
2198
2199         PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2200         {
2201                 for (size_t i = 0; i < size; ++i)
2202                         if (data[i] > 127)
2203                                 return i;
2204
2205                 return size;
2206         }
2207
2208         PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2209         {
2210                 const uint8_t* data = static_cast<const uint8_t*>(contents);
2211                 size_t data_length = size;
2212
2213                 // get size of prefix that does not need utf8 conversion
2214                 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2215                 assert(prefix_length <= data_length);
2216
2217                 const uint8_t* postfix = data + prefix_length;
2218                 size_t postfix_length = data_length - prefix_length;
2219
2220                 // if no conversion is needed, just return the original buffer
2221                 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2222
2223                 // first pass: get length in utf8 units
2224                 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2225
2226                 // allocate buffer of suitable length
2227                 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2228                 if (!buffer) return false;
2229
2230                 // second pass: convert latin1 input to utf8
2231                 memcpy(buffer, data, prefix_length);
2232
2233                 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2234                 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2235
2236                 assert(oend == obegin + length);
2237                 *oend = 0;
2238
2239                 out_buffer = buffer;
2240                 out_length = length + 1;
2241
2242                 return true;
2243         }
2244
2245         PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2246         {
2247                 // fast path: no conversion required
2248                 if (encoding == encoding_utf8)
2249                         return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2250
2251                 // source encoding is utf16
2252                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2253                 {
2254                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2255
2256                         return (native_encoding == encoding) ?
2257                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2258                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2259                 }
2260
2261                 // source encoding is utf32
2262                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2263                 {
2264                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2265
2266                         return (native_encoding == encoding) ?
2267                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2268                                 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2269                 }
2270
2271                 // source encoding is latin1
2272                 if (encoding == encoding_latin1)
2273                         return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2274
2275                 assert(false && "Invalid encoding"); // unreachable
2276                 return false;
2277         }
2278 #endif
2279
2280         PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2281         {
2282                 // get length in utf8 characters
2283                 return wchar_decoder::process(str, length, 0, utf8_counter());
2284         }
2285
2286         PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2287         {
2288                 // convert to utf8
2289                 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2290                 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2291
2292                 assert(begin + size == end);
2293                 (void)!end;
2294                 (void)!size;
2295         }
2296
2297 #ifndef PUGIXML_NO_STL
2298         PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2299         {
2300                 // first pass: get length in utf8 characters
2301                 size_t size = as_utf8_begin(str, length);
2302
2303                 // allocate resulting string
2304                 std::string result;
2305                 result.resize(size);
2306
2307                 // second pass: convert to utf8
2308                 if (size > 0) as_utf8_end(&result[0], size, str, length);
2309
2310                 return result;
2311         }
2312
2313         PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2314         {
2315                 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2316
2317                 // first pass: get length in wchar_t units
2318                 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2319
2320                 // allocate resulting string
2321                 std::basic_string<wchar_t> result;
2322                 result.resize(length);
2323
2324                 // second pass: convert to wchar_t
2325                 if (length > 0)
2326                 {
2327                         wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2328                         wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2329
2330                         assert(begin + length == end);
2331                         (void)!end;
2332                 }
2333
2334                 return result;
2335         }
2336 #endif
2337
2338         template <typename Header>
2339         inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2340         {
2341                 // never reuse shared memory
2342                 if (header & xml_memory_page_contents_shared_mask) return false;
2343
2344                 size_t target_length = strlength(target);
2345
2346                 // always reuse document buffer memory if possible
2347                 if ((header & header_mask) == 0) return target_length >= length;
2348
2349                 // reuse heap memory if waste is not too great
2350                 const size_t reuse_threshold = 32;
2351
2352                 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2353         }
2354
2355         template <typename String, typename Header>
2356         PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2357         {
2358                 if (source_length == 0)
2359                 {
2360                         // empty string and null pointer are equivalent, so just deallocate old memory
2361                         xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2362
2363                         if (header & header_mask) alloc->deallocate_string(dest);
2364
2365                         // mark the string as not allocated
2366                         dest = 0;
2367                         header &= ~header_mask;
2368
2369                         return true;
2370                 }
2371                 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2372                 {
2373                         // we can reuse old buffer, so just copy the new data (including zero terminator)
2374                         memcpy(dest, source, source_length * sizeof(char_t));
2375                         dest[source_length] = 0;
2376
2377                         return true;
2378                 }
2379                 else
2380                 {
2381                         xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2382
2383                         if (!alloc->reserve()) return false;
2384
2385                         // allocate new buffer
2386                         char_t* buf = alloc->allocate_string(source_length + 1);
2387                         if (!buf) return false;
2388
2389                         // copy the string (including zero terminator)
2390                         memcpy(buf, source, source_length * sizeof(char_t));
2391                         buf[source_length] = 0;
2392
2393                         // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2394                         if (header & header_mask) alloc->deallocate_string(dest);
2395
2396                         // the string is now allocated, so set the flag
2397                         dest = buf;
2398                         header |= header_mask;
2399
2400                         return true;
2401                 }
2402         }
2403
2404         struct gap
2405         {
2406                 char_t* end;
2407                 size_t size;
2408
2409                 gap(): end(0), size(0)
2410                 {
2411                 }
2412
2413                 // Push new gap, move s count bytes further (skipping the gap).
2414                 // Collapse previous gap.
2415                 void push(char_t*& s, size_t count)
2416                 {
2417                         if (end) // there was a gap already; collapse it
2418                         {
2419                                 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2420                                 assert(s >= end);
2421                                 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2422                         }
2423
2424                         s += count; // end of current gap
2425
2426                         // "merge" two gaps
2427                         end = s;
2428                         size += count;
2429                 }
2430
2431                 // Collapse all gaps, return past-the-end pointer
2432                 char_t* flush(char_t* s)
2433                 {
2434                         if (end)
2435                         {
2436                                 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2437                                 assert(s >= end);
2438                                 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2439
2440                                 return s - size;
2441                         }
2442                         else return s;
2443                 }
2444         };
2445
2446         PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2447         {
2448                 char_t* stre = s + 1;
2449
2450                 switch (*stre)
2451                 {
2452                         case '#':       // &#...
2453                         {
2454                                 unsigned int ucsc = 0;
2455
2456                                 if (stre[1] == 'x') // &#x... (hex code)
2457                                 {
2458                                         stre += 2;
2459
2460                                         char_t ch = *stre;
2461
2462                                         if (ch == ';') return stre;
2463
2464                                         for (;;)
2465                                         {
2466                                                 if (static_cast<unsigned int>(ch - '0') <= 9)
2467                                                         ucsc = 16 * ucsc + (ch - '0');
2468                                                 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2469                                                         ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2470                                                 else if (ch == ';')
2471                                                         break;
2472                                                 else // cancel
2473                                                         return stre;
2474
2475                                                 ch = *++stre;
2476                                         }
2477
2478                                         ++stre;
2479                                 }
2480                                 else    // &#... (dec code)
2481                                 {
2482                                         char_t ch = *++stre;
2483
2484                                         if (ch == ';') return stre;
2485
2486                                         for (;;)
2487                                         {
2488                                                 if (static_cast<unsigned int>(ch - '0') <= 9)
2489                                                         ucsc = 10 * ucsc + (ch - '0');
2490                                                 else if (ch == ';')
2491                                                         break;
2492                                                 else // cancel
2493                                                         return stre;
2494
2495                                                 ch = *++stre;
2496                                         }
2497
2498                                         ++stre;
2499                                 }
2500
2501                         #ifdef PUGIXML_WCHAR_MODE
2502                                 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2503                         #else
2504                                 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2505                         #endif
2506
2507                                 g.push(s, stre - s);
2508                                 return stre;
2509                         }
2510
2511                         case 'a':       // &a
2512                         {
2513                                 ++stre;
2514
2515                                 if (*stre == 'm') // &am
2516                                 {
2517                                         if (*++stre == 'p' && *++stre == ';') // &amp;
2518                                         {
2519                                                 *s++ = '&';
2520                                                 ++stre;
2521
2522                                                 g.push(s, stre - s);
2523                                                 return stre;
2524                                         }
2525                                 }
2526                                 else if (*stre == 'p') // &ap
2527                                 {
2528                                         if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2529                                         {
2530                                                 *s++ = '\'';
2531                                                 ++stre;
2532
2533                                                 g.push(s, stre - s);
2534                                                 return stre;
2535                                         }
2536                                 }
2537                                 break;
2538                         }
2539
2540                         case 'g': // &g
2541                         {
2542                                 if (*++stre == 't' && *++stre == ';') // &gt;
2543                                 {
2544                                         *s++ = '>';
2545                                         ++stre;
2546
2547                                         g.push(s, stre - s);
2548                                         return stre;
2549                                 }
2550                                 break;
2551                         }
2552
2553                         case 'l': // &l
2554                         {
2555                                 if (*++stre == 't' && *++stre == ';') // &lt;
2556                                 {
2557                                         *s++ = '<';
2558                                         ++stre;
2559
2560                                         g.push(s, stre - s);
2561                                         return stre;
2562                                 }
2563                                 break;
2564                         }
2565
2566                         case 'q': // &q
2567                         {
2568                                 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2569                                 {
2570                                         *s++ = '"';
2571                                         ++stre;
2572
2573                                         g.push(s, stre - s);
2574                                         return stre;
2575                                 }
2576                                 break;
2577                         }
2578
2579                         default:
2580                                 break;
2581                 }
2582
2583                 return stre;
2584         }
2585
2586         // Parser utilities
2587         #define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
2588         #define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2589         #define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
2590         #define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2591         #define PUGI__POPNODE()             { cursor = cursor->parent; }
2592         #define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
2593         #define PUGI__SCANWHILE(X)          { while (X) ++s; }
2594         #define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2595         #define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
2596         #define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
2597         #define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2598
2599         PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2600         {
2601                 gap g;
2602
2603                 while (true)
2604                 {
2605                         PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2606
2607                         if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2608                         {
2609                                 *s++ = '\n'; // replace first one with 0x0a
2610
2611                                 if (*s == '\n') g.push(s, 1);
2612                         }
2613                         else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2614                         {
2615                                 *g.flush(s) = 0;
2616
2617                                 return s + (s[2] == '>' ? 3 : 2);
2618                         }
2619                         else if (*s == 0)
2620                         {
2621                                 return 0;
2622                         }
2623                         else ++s;
2624                 }
2625         }
2626
2627         PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2628         {
2629                 gap g;
2630
2631                 while (true)
2632                 {
2633                         PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2634
2635                         if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2636                         {
2637                                 *s++ = '\n'; // replace first one with 0x0a
2638
2639                                 if (*s == '\n') g.push(s, 1);
2640                         }
2641                         else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2642                         {
2643                                 *g.flush(s) = 0;
2644
2645                                 return s + 1;
2646                         }
2647                         else if (*s == 0)
2648                         {
2649                                 return 0;
2650                         }
2651                         else ++s;
2652                 }
2653         }
2654
2655         typedef char_t* (*strconv_pcdata_t)(char_t*);
2656
2657         template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2658         {
2659                 static char_t* parse(char_t* s)
2660                 {
2661                         gap g;
2662
2663                         char_t* begin = s;
2664
2665                         while (true)
2666                         {
2667                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2668
2669                                 if (*s == '<') // PCDATA ends here
2670                                 {
2671                                         char_t* end = g.flush(s);
2672
2673                                         if (opt_trim::value)
2674                                                 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2675                                                         --end;
2676
2677                                         *end = 0;
2678
2679                                         return s + 1;
2680                                 }
2681                                 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2682                                 {
2683                                         *s++ = '\n'; // replace first one with 0x0a
2684
2685                                         if (*s == '\n') g.push(s, 1);
2686                                 }
2687                                 else if (opt_escape::value && *s == '&')
2688                                 {
2689                                         s = strconv_escape(s, g);
2690                                 }
2691                                 else if (*s == 0)
2692                                 {
2693                                         char_t* end = g.flush(s);
2694
2695                                         if (opt_trim::value)
2696                                                 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2697                                                         --end;
2698
2699                                         *end = 0;
2700
2701                                         return s;
2702                                 }
2703                                 else ++s;
2704                         }
2705                 }
2706         };
2707
2708         PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2709         {
2710                 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2711
2712                 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2713                 {
2714                 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2715                 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2716                 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2717                 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2718                 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2719                 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2720                 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2721                 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2722                 default: assert(false); return 0; // unreachable
2723                 }
2724         }
2725
2726         typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2727
2728         template <typename opt_escape> struct strconv_attribute_impl
2729         {
2730                 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2731                 {
2732                         gap g;
2733
2734                         // trim leading whitespaces
2735                         if (PUGI__IS_CHARTYPE(*s, ct_space))
2736                         {
2737                                 char_t* str = s;
2738
2739                                 do ++str;
2740                                 while (PUGI__IS_CHARTYPE(*str, ct_space));
2741
2742                                 g.push(s, str - s);
2743                         }
2744
2745                         while (true)
2746                         {
2747                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2748
2749                                 if (*s == end_quote)
2750                                 {
2751                                         char_t* str = g.flush(s);
2752
2753                                         do *str-- = 0;
2754                                         while (PUGI__IS_CHARTYPE(*str, ct_space));
2755
2756                                         return s + 1;
2757                                 }
2758                                 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2759                                 {
2760                                         *s++ = ' ';
2761
2762                                         if (PUGI__IS_CHARTYPE(*s, ct_space))
2763                                         {
2764                                                 char_t* str = s + 1;
2765                                                 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2766
2767                                                 g.push(s, str - s);
2768                                         }
2769                                 }
2770                                 else if (opt_escape::value && *s == '&')
2771                                 {
2772                                         s = strconv_escape(s, g);
2773                                 }
2774                                 else if (!*s)
2775                                 {
2776                                         return 0;
2777                                 }
2778                                 else ++s;
2779                         }
2780                 }
2781
2782                 static char_t* parse_wconv(char_t* s, char_t end_quote)
2783                 {
2784                         gap g;
2785
2786                         while (true)
2787                         {
2788                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2789
2790                                 if (*s == end_quote)
2791                                 {
2792                                         *g.flush(s) = 0;
2793
2794                                         return s + 1;
2795                                 }
2796                                 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2797                                 {
2798                                         if (*s == '\r')
2799                                         {
2800                                                 *s++ = ' ';
2801
2802                                                 if (*s == '\n') g.push(s, 1);
2803                                         }
2804                                         else *s++ = ' ';
2805                                 }
2806                                 else if (opt_escape::value && *s == '&')
2807                                 {
2808                                         s = strconv_escape(s, g);
2809                                 }
2810                                 else if (!*s)
2811                                 {
2812                                         return 0;
2813                                 }
2814                                 else ++s;
2815                         }
2816                 }
2817
2818                 static char_t* parse_eol(char_t* s, char_t end_quote)
2819                 {
2820                         gap g;
2821
2822                         while (true)
2823                         {
2824                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2825
2826                                 if (*s == end_quote)
2827                                 {
2828                                         *g.flush(s) = 0;
2829
2830                                         return s + 1;
2831                                 }
2832                                 else if (*s == '\r')
2833                                 {
2834                                         *s++ = '\n';
2835
2836                                         if (*s == '\n') g.push(s, 1);
2837                                 }
2838                                 else if (opt_escape::value && *s == '&')
2839                                 {
2840                                         s = strconv_escape(s, g);
2841                                 }
2842                                 else if (!*s)
2843                                 {
2844                                         return 0;
2845                                 }
2846                                 else ++s;
2847                         }
2848                 }
2849
2850                 static char_t* parse_simple(char_t* s, char_t end_quote)
2851                 {
2852                         gap g;
2853
2854                         while (true)
2855                         {
2856                                 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2857
2858                                 if (*s == end_quote)
2859                                 {
2860                                         *g.flush(s) = 0;
2861
2862                                         return s + 1;
2863                                 }
2864                                 else if (opt_escape::value && *s == '&')
2865                                 {
2866                                         s = strconv_escape(s, g);
2867                                 }
2868                                 else if (!*s)
2869                                 {
2870                                         return 0;
2871                                 }
2872                                 else ++s;
2873                         }
2874                 }
2875         };
2876
2877         PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2878         {
2879                 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2880
2881                 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2882                 {
2883                 case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
2884                 case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
2885                 case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
2886                 case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
2887                 case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
2888                 case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
2889                 case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
2890                 case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
2891                 case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
2892                 case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
2893                 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2894                 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2895                 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2896                 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2897                 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2898                 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2899                 default: assert(false); return 0; // unreachable
2900                 }
2901         }
2902
2903         inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2904         {
2905                 xml_parse_result result;
2906                 result.status = status;
2907                 result.offset = offset;
2908
2909                 return result;
2910         }
2911
2912         struct xml_parser
2913         {
2914                 xml_allocator* alloc;
2915                 char_t* error_offset;
2916                 xml_parse_status error_status;
2917
2918                 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2919                 {
2920                 }
2921
2922                 // DOCTYPE consists of nested sections of the following possible types:
2923                 // <!-- ... -->, <? ... ?>, "...", '...'
2924                 // <![...]]>
2925                 // <!...>
2926                 // First group can not contain nested groups
2927                 // Second group can contain nested groups of the same type
2928                 // Third group can contain all other groups
2929                 char_t* parse_doctype_primitive(char_t* s)
2930                 {
2931                         if (*s == '"' || *s == '\'')
2932                         {
2933                                 // quoted string
2934                                 char_t ch = *s++;
2935                                 PUGI__SCANFOR(*s == ch);
2936                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2937
2938                                 s++;
2939                         }
2940                         else if (s[0] == '<' && s[1] == '?')
2941                         {
2942                                 // <? ... ?>
2943                                 s += 2;
2944                                 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2945                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2946
2947                                 s += 2;
2948                         }
2949                         else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2950                         {
2951                                 s += 4;
2952                                 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2953                                 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2954
2955                                 s += 3;
2956                         }
2957                         else PUGI__THROW_ERROR(status_bad_doctype, s);
2958
2959                         return s;
2960                 }
2961
2962                 char_t* parse_doctype_ignore(char_t* s)
2963                 {
2964                         size_t depth = 0;
2965
2966                         assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2967                         s += 3;
2968
2969                         while (*s)
2970                         {
2971                                 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2972                                 {
2973                                         // nested ignore section
2974                                         s += 3;
2975                                         depth++;
2976                                 }
2977                                 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2978                                 {
2979                                         // ignore section end
2980                                         s += 3;
2981
2982                                         if (depth == 0)
2983                                                 return s;
2984
2985                                         depth--;
2986                                 }
2987                                 else s++;
2988                         }
2989
2990                         PUGI__THROW_ERROR(status_bad_doctype, s);
2991                 }
2992
2993                 char_t* parse_doctype_group(char_t* s, char_t endch)
2994                 {
2995                         size_t depth = 0;
2996
2997                         assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2998                         s += 2;
2999
3000                         while (*s)
3001                         {
3002                                 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3003                                 {
3004                                         if (s[2] == '[')
3005                                         {
3006                                                 // ignore
3007                                                 s = parse_doctype_ignore(s);
3008                                                 if (!s) return s;
3009                                         }
3010                                         else
3011                                         {
3012                                                 // some control group
3013                                                 s += 2;
3014                                                 depth++;
3015                                         }
3016                                 }
3017                                 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3018                                 {
3019                                         // unknown tag (forbidden), or some primitive group
3020                                         s = parse_doctype_primitive(s);
3021                                         if (!s) return s;
3022                                 }
3023                                 else if (*s == '>')
3024                                 {
3025                                         if (depth == 0)
3026                                                 return s;
3027
3028                                         depth--;
3029                                         s++;
3030                                 }
3031                                 else s++;
3032                         }
3033
3034                         if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3035
3036                         return s;
3037                 }
3038
3039                 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3040                 {
3041                         // parse node contents, starting with exclamation mark
3042                         ++s;
3043
3044                         if (*s == '-') // '<!-...'
3045                         {
3046                                 ++s;
3047
3048                                 if (*s == '-') // '<!--...'
3049                                 {
3050                                         ++s;
3051
3052                                         if (PUGI__OPTSET(parse_comments))
3053                                         {
3054                                                 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3055                                                 cursor->value = s; // Save the offset.
3056                                         }
3057
3058                                         if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3059                                         {
3060                                                 s = strconv_comment(s, endch);
3061
3062                                                 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3063                                         }
3064                                         else
3065                                         {
3066                                                 // Scan for terminating '-->'.
3067                                                 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3068                                                 PUGI__CHECK_ERROR(status_bad_comment, s);
3069
3070                                                 if (PUGI__OPTSET(parse_comments))
3071                                                         *s = 0; // Zero-terminate this segment at the first terminating '-'.
3072
3073                                                 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3074                                         }
3075                                 }
3076                                 else PUGI__THROW_ERROR(status_bad_comment, s);
3077                         }
3078                         else if (*s == '[')
3079                         {
3080                                 // '<![CDATA[...'
3081                                 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3082                                 {
3083                                         ++s;
3084
3085                                         if (PUGI__OPTSET(parse_cdata))
3086                                         {
3087                                                 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3088                                                 cursor->value = s; // Save the offset.
3089
3090                                                 if (PUGI__OPTSET(parse_eol))
3091                                                 {
3092                                                         s = strconv_cdata(s, endch);
3093
3094                                                         if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3095                                                 }
3096                                                 else
3097                                                 {
3098                                                         // Scan for terminating ']]>'.
3099                                                         PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3100                                                         PUGI__CHECK_ERROR(status_bad_cdata, s);
3101
3102                                                         *s++ = 0; // Zero-terminate this segment.
3103                                                 }
3104                                         }
3105                                         else // Flagged for discard, but we still have to scan for the terminator.
3106                                         {
3107                                                 // Scan for terminating ']]>'.
3108                                                 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3109                                                 PUGI__CHECK_ERROR(status_bad_cdata, s);
3110
3111                                                 ++s;
3112                                         }
3113
3114                                         s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3115                                 }
3116                                 else PUGI__THROW_ERROR(status_bad_cdata, s);
3117                         }
3118                         else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3119                         {
3120                                 s -= 2;
3121
3122                                 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3123
3124                                 char_t* mark = s + 9;
3125
3126                                 s = parse_doctype_group(s, endch);
3127                                 if (!s) return s;
3128
3129                                 assert((*s == 0 && endch == '>') || *s == '>');
3130                                 if (*s) *s++ = 0;
3131
3132                                 if (PUGI__OPTSET(parse_doctype))
3133                                 {
3134                                         while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3135
3136                                         PUGI__PUSHNODE(node_doctype);
3137
3138                                         cursor->value = mark;
3139                                 }
3140                         }
3141                         else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3142                         else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3143                         else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3144
3145                         return s;
3146                 }
3147
3148                 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3149                 {
3150                         // load into registers
3151                         xml_node_struct* cursor = ref_cursor;
3152                         char_t ch = 0;
3153
3154                         // parse node contents, starting with question mark
3155                         ++s;
3156
3157                         // read PI target
3158                         char_t* target = s;
3159
3160                         if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3161
3162                         PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3163                         PUGI__CHECK_ERROR(status_bad_pi, s);
3164
3165                         // determine node type; stricmp / strcasecmp is not portable
3166                         bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3167
3168                         if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3169                         {
3170                                 if (declaration)
3171                                 {
3172                                         // disallow non top-level declarations
3173                                         if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3174
3175                                         PUGI__PUSHNODE(node_declaration);
3176                                 }
3177                                 else
3178                                 {
3179                                         PUGI__PUSHNODE(node_pi);
3180                                 }
3181
3182                                 cursor->name = target;
3183
3184                                 PUGI__ENDSEG();
3185
3186                                 // parse value/attributes
3187                                 if (ch == '?')
3188                                 {
3189                                         // empty node
3190                                         if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3191                                         s += (*s == '>');
3192
3193                                         PUGI__POPNODE();
3194                                 }
3195                                 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3196                                 {
3197                                         PUGI__SKIPWS();
3198
3199                                         // scan for tag end
3200                                         char_t* value = s;
3201
3202                                         PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3203                                         PUGI__CHECK_ERROR(status_bad_pi, s);
3204
3205                                         if (declaration)
3206                                         {
3207                                                 // replace ending ? with / so that 'element' terminates properly
3208                                                 *s = '/';
3209
3210                                                 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3211                                                 s = value;
3212                                         }
3213                                         else
3214                                         {
3215                                                 // store value and step over >
3216                                                 cursor->value = value;
3217
3218                                                 PUGI__POPNODE();
3219
3220                                                 PUGI__ENDSEG();
3221
3222                                                 s += (*s == '>');
3223                                         }
3224                                 }
3225                                 else PUGI__THROW_ERROR(status_bad_pi, s);
3226                         }
3227                         else
3228                         {
3229                                 // scan for tag end
3230                                 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3231                                 PUGI__CHECK_ERROR(status_bad_pi, s);
3232
3233                                 s += (s[1] == '>' ? 2 : 1);
3234                         }
3235
3236                         // store from registers
3237                         ref_cursor = cursor;
3238
3239                         return s;
3240                 }
3241
3242                 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3243                 {
3244                         strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3245                         strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3246
3247                         char_t ch = 0;
3248                         xml_node_struct* cursor = root;
3249                         char_t* mark = s;
3250
3251                         while (*s != 0)
3252                         {
3253                                 if (*s == '<')
3254                                 {
3255                                         ++s;
3256
3257                                 LOC_TAG:
3258                                         if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3259                                         {
3260                                                 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3261
3262                                                 cursor->name = s;
3263
3264                                                 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3265                                                 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3266
3267                                                 if (ch == '>')
3268                                                 {
3269                                                         // end of tag
3270                                                 }
3271                                                 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3272                                                 {
3273                                                 LOC_ATTRIBUTES:
3274                                                         while (true)
3275                                                         {
3276                                                                 PUGI__SKIPWS(); // Eat any whitespace.
3277
3278                                                                 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3279                                                                 {
3280                                                                         xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3281                                                                         if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3282
3283                                                                         a->name = s; // Save the offset.
3284
3285                                                                         PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3286                                                                         PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3287
3288                                                                         if (PUGI__IS_CHARTYPE(ch, ct_space))
3289                                                                         {
3290                                                                                 PUGI__SKIPWS(); // Eat any whitespace.
3291
3292                                                                                 ch = *s;
3293                                                                                 ++s;
3294                                                                         }
3295
3296                                                                         if (ch == '=') // '<... #=...'
3297                                                                         {
3298                                                                                 PUGI__SKIPWS(); // Eat any whitespace.
3299
3300                                                                                 if (*s == '"' || *s == '\'') // '<... #="...'
3301                                                                                 {
3302                                                                                         ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3303                                                                                         ++s; // Step over the quote.
3304                                                                                         a->value = s; // Save the offset.
3305
3306                                                                                         s = strconv_attribute(s, ch);
3307
3308                                                                                         if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3309
3310                                                                                         // After this line the loop continues from the start;
3311                                                                                         // Whitespaces, / and > are ok, symbols and EOF are wrong,
3312                                                                                         // everything else will be detected
3313                                                                                         if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3314                                                                                 }
3315                                                                                 else PUGI__THROW_ERROR(status_bad_attribute, s);
3316                                                                         }
3317                                                                         else PUGI__THROW_ERROR(status_bad_attribute, s);
3318                                                                 }
3319                                                                 else if (*s == '/')
3320                                                                 {
3321                                                                         ++s;
3322
3323                                                                         if (*s == '>')
3324                                                                         {
3325                                                                                 PUGI__POPNODE();
3326                                                                                 s++;
3327                                                                                 break;
3328                                                                         }
3329                                                                         else if (*s == 0 && endch == '>')
3330                                                                         {
3331                                                                                 PUGI__POPNODE();
3332                                                                                 break;
3333                                                                         }
3334                                                                         else PUGI__THROW_ERROR(status_bad_start_element, s);
3335                                                                 }
3336                                                                 else if (*s == '>')
3337                                                                 {
3338                                                                         ++s;
3339
3340                                                                         break;
3341                                                                 }
3342                                                                 else if (*s == 0 && endch == '>')
3343                                                                 {
3344                                                                         break;
3345                                                                 }
3346                                                                 else PUGI__THROW_ERROR(status_bad_start_element, s);
3347                                                         }
3348
3349                                                         // !!!
3350                                                 }
3351                                                 else if (ch == '/') // '<#.../'
3352                                                 {
3353                                                         if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3354
3355                                                         PUGI__POPNODE(); // Pop.
3356
3357                                                         s += (*s == '>');
3358                                                 }
3359                                                 else if (ch == 0)
3360                                                 {
3361                                                         // we stepped over null terminator, backtrack & handle closing tag
3362                                                         --s;
3363
3364                                                         if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3365                                                 }
3366                                                 else PUGI__THROW_ERROR(status_bad_start_element, s);
3367                                         }
3368                                         else if (*s == '/')
3369                                         {
3370                                                 ++s;
3371
3372                                                 mark = s;
3373
3374                                                 char_t* name = cursor->name;
3375                                                 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3376
3377                                                 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3378                                                 {
3379                                                         if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3380                                                 }
3381
3382                                                 if (*name)
3383                                                 {
3384                                                         if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3385                                                         else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3386                                                 }
3387
3388                                                 PUGI__POPNODE(); // Pop.
3389
3390                                                 PUGI__SKIPWS();
3391
3392                                                 if (*s == 0)
3393                                                 {
3394                                                         if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3395                                                 }
3396                                                 else
3397                                                 {
3398                                                         if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3399                                                         ++s;
3400                                                 }
3401                                         }
3402                                         else if (*s == '?') // '<?...'
3403                                         {
3404                                                 s = parse_question(s, cursor, optmsk, endch);
3405                                                 if (!s) return s;
3406
3407                                                 assert(cursor);
3408                                                 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3409                                         }
3410                                         else if (*s == '!') // '<!...'
3411                                         {
3412                                                 s = parse_exclamation(s, cursor, optmsk, endch);
3413                                                 if (!s) return s;
3414                                         }
3415                                         else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3416                                         else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3417                                 }
3418                                 else
3419                                 {
3420                                         mark = s; // Save this offset while searching for a terminator.
3421
3422                                         PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3423
3424                                         if (*s == '<' || !*s)
3425                                         {
3426                                                 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3427                                                 assert(mark != s);
3428
3429                                                 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3430                                                 {
3431                                                         continue;
3432                                                 }
3433                                                 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3434                                                 {
3435                                                         if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3436                                                 }
3437                                         }
3438
3439                                         if (!PUGI__OPTSET(parse_trim_pcdata))
3440                                                 s = mark;
3441
3442                                         if (cursor->parent || PUGI__OPTSET(parse_fragment))
3443                                         {
3444                                                 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3445                                                 {
3446                                                         cursor->value = s; // Save the offset.
3447                                                 }
3448                                                 else
3449                                                 {
3450                                                         PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3451
3452                                                         cursor->value = s; // Save the offset.
3453
3454                                                         PUGI__POPNODE(); // Pop since this is a standalone.
3455                                                 }
3456
3457                                                 s = strconv_pcdata(s);
3458
3459                                                 if (!*s) break;
3460                                         }
3461                                         else
3462                                         {
3463                                                 PUGI__SCANFOR(*s == '<'); // '...<'
3464                                                 if (!*s) break;
3465
3466                                                 ++s;
3467                                         }
3468
3469                                         // We're after '<'
3470                                         goto LOC_TAG;
3471                                 }
3472                         }
3473
3474                         // check that last tag is closed
3475                         if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3476
3477                         return s;
3478                 }
3479
3480         #ifdef PUGIXML_WCHAR_MODE
3481                 static char_t* parse_skip_bom(char_t* s)
3482                 {
3483                         unsigned int bom = 0xfeff;
3484                         return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3485                 }
3486         #else
3487                 static char_t* parse_skip_bom(char_t* s)
3488                 {
3489                         return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3490                 }
3491         #endif
3492
3493                 static bool has_element_node_siblings(xml_node_struct* node)
3494                 {
3495                         while (node)
3496                         {
3497                                 if (PUGI__NODETYPE(node) == node_element) return true;
3498
3499                                 node = node->next_sibling;
3500                         }
3501
3502                         return false;
3503                 }
3504
3505                 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3506                 {
3507                         // early-out for empty documents
3508                         if (length == 0)
3509                                 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3510
3511                         // get last child of the root before parsing
3512                         xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3513
3514                         // create parser on stack
3515                         xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3516
3517                         // save last character and make buffer zero-terminated (speeds up parsing)
3518                         char_t endch = buffer[length - 1];
3519                         buffer[length - 1] = 0;
3520
3521                         // skip BOM to make sure it does not end up as part of parse output
3522                         char_t* buffer_data = parse_skip_bom(buffer);
3523
3524                         // perform actual parsing
3525                         parser.parse_tree(buffer_data, root, optmsk, endch);
3526
3527                         xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3528                         assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3529
3530                         if (result)
3531                         {
3532                                 // since we removed last character, we have to handle the only possible false positive (stray <)
3533                                 if (endch == '<')
3534                                         return make_parse_result(status_unrecognized_tag, length - 1);
3535
3536                                 // check if there are any element nodes parsed
3537                                 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3538
3539                                 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3540                                         return make_parse_result(status_no_document_element, length - 1);
3541                         }
3542                         else
3543                         {
3544                                 // roll back offset if it occurs on a null terminator in the source buffer
3545                                 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3546                                         result.offset--;
3547                         }
3548
3549                         return result;
3550                 }
3551         };
3552
3553         // Output facilities
3554         PUGI__FN xml_encoding get_write_native_encoding()
3555         {
3556         #ifdef PUGIXML_WCHAR_MODE
3557                 return get_wchar_encoding();
3558         #else
3559                 return encoding_utf8;
3560         #endif
3561         }
3562
3563         PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3564         {
3565                 // replace wchar encoding with utf implementation
3566                 if (encoding == encoding_wchar) return get_wchar_encoding();
3567
3568                 // replace utf16 encoding with utf16 with specific endianness
3569                 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3570
3571                 // replace utf32 encoding with utf32 with specific endianness
3572                 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3573
3574                 // only do autodetection if no explicit encoding is requested
3575                 if (encoding != encoding_auto) return encoding;
3576
3577                 // assume utf8 encoding
3578                 return encoding_utf8;
3579         }
3580
3581         template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3582         {
3583                 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3584
3585                 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3586
3587                 return static_cast<size_t>(end - dest) * sizeof(*dest);
3588         }
3589
3590         template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3591         {
3592                 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3593
3594                 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3595
3596                 if (opt_swap)
3597                 {
3598                         for (typename T::value_type i = dest; i != end; ++i)
3599                                 *i = endian_swap(*i);
3600                 }
3601
3602                 return static_cast<size_t>(end - dest) * sizeof(*dest);
3603         }
3604
3605 #ifdef PUGIXML_WCHAR_MODE
3606         PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3607         {
3608                 if (length < 1) return 0;
3609
3610                 // discard last character if it's the lead of a surrogate pair
3611                 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3612         }
3613
3614         PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3615         {
3616                 // only endian-swapping is required
3617                 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3618                 {
3619                         convert_wchar_endian_swap(r_char, data, length);
3620
3621                         return length * sizeof(char_t);
3622                 }
3623
3624                 // convert to utf8
3625                 if (encoding == encoding_utf8)
3626                         return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3627
3628                 // convert to utf16
3629                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3630                 {
3631                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3632
3633                         return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3634                 }
3635
3636                 // convert to utf32
3637                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3638                 {
3639                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3640
3641                         return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3642                 }
3643
3644                 // convert to latin1
3645                 if (encoding == encoding_latin1)
3646                         return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3647
3648                 assert(false && "Invalid encoding"); // unreachable
3649                 return 0;
3650         }
3651 #else
3652         PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3653         {
3654                 if (length < 5) return 0;
3655
3656                 for (size_t i = 1; i <= 4; ++i)
3657                 {
3658                         uint8_t ch = static_cast<uint8_t>(data[length - i]);
3659
3660                         // either a standalone character or a leading one
3661                         if ((ch & 0xc0) != 0x80) return length - i;
3662                 }
3663
3664                 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3665                 return length;
3666         }
3667
3668         PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3669         {
3670                 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3671                 {
3672                         xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3673
3674                         return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3675                 }
3676
3677                 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3678                 {
3679                         xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3680
3681                         return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3682                 }
3683
3684                 if (encoding == encoding_latin1)
3685                         return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3686
3687                 assert(false && "Invalid encoding"); // unreachable
3688                 return 0;
3689         }
3690 #endif
3691
3692         class xml_buffered_writer
3693         {
3694                 xml_buffered_writer(const xml_buffered_writer&);
3695                 xml_buffered_writer& operator=(const xml_buffered_writer&);
3696
3697         public:
3698                 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3699                 {
3700                         PUGI__STATIC_ASSERT(bufcapacity >= 8);
3701                 }
3702
3703                 size_t flush()
3704                 {
3705                         flush(buffer, bufsize);
3706                         bufsize = 0;
3707                         return 0;
3708                 }
3709
3710                 void flush(const char_t* data, size_t size)
3711                 {
3712                         if (size == 0) return;
3713
3714                         // fast path, just write data
3715                         if (encoding == get_write_native_encoding())
3716                                 writer.write(data, size * sizeof(char_t));
3717                         else
3718                         {
3719                                 // convert chunk
3720                                 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3721                                 assert(result <= sizeof(scratch));
3722
3723                                 // write data
3724                                 writer.write(scratch.data_u8, result);
3725                         }
3726                 }
3727
3728                 void write_direct(const char_t* data, size_t length)
3729                 {
3730                         // flush the remaining buffer contents
3731                         flush();
3732
3733                         // handle large chunks
3734                         if (length > bufcapacity)
3735                         {
3736                                 if (encoding == get_write_native_encoding())
3737                                 {
3738                                         // fast path, can just write data chunk
3739                                         writer.write(data, length * sizeof(char_t));
3740                                         return;
3741                                 }
3742
3743                                 // need to convert in suitable chunks
3744                                 while (length > bufcapacity)
3745                                 {
3746                                         // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3747                                         // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3748                                         size_t chunk_size = get_valid_length(data, bufcapacity);
3749                                         assert(chunk_size);
3750
3751                                         // convert chunk and write
3752                                         flush(data, chunk_size);
3753
3754                                         // iterate
3755                                         data += chunk_size;
3756                                         length -= chunk_size;
3757                                 }
3758
3759                                 // small tail is copied below
3760                                 bufsize = 0;
3761                         }
3762
3763                         memcpy(buffer + bufsize, data, length * sizeof(char_t));
3764                         bufsize += length;
3765                 }
3766
3767                 void write_buffer(const char_t* data, size_t length)
3768                 {
3769                         size_t offset = bufsize;
3770
3771                         if (offset + length <= bufcapacity)
3772                         {
3773                                 memcpy(buffer + offset, data, length * sizeof(char_t));
3774                                 bufsize = offset + length;
3775                         }
3776                         else
3777                         {
3778                                 write_direct(data, length);
3779                         }
3780                 }
3781
3782                 void write_string(const char_t* data)
3783                 {
3784                         // write the part of the string that fits in the buffer
3785                         size_t offset = bufsize;
3786
3787                         while (*data && offset < bufcapacity)
3788                                 buffer[offset++] = *data++;
3789
3790                         // write the rest
3791                         if (offset < bufcapacity)
3792                         {
3793                                 bufsize = offset;
3794                         }
3795                         else
3796                         {
3797                                 // backtrack a bit if we have split the codepoint
3798                                 size_t length = offset - bufsize;
3799                                 size_t extra = length - get_valid_length(data - length, length);
3800
3801                                 bufsize = offset - extra;
3802
3803                                 write_direct(data - extra, strlength(data) + extra);
3804                         }
3805                 }
3806
3807                 void write(char_t d0)
3808                 {
3809                         size_t offset = bufsize;
3810                         if (offset > bufcapacity - 1) offset = flush();
3811
3812                         buffer[offset + 0] = d0;
3813                         bufsize = offset + 1;
3814                 }
3815
3816                 void write(char_t d0, char_t d1)
3817                 {
3818                         size_t offset = bufsize;
3819                         if (offset > bufcapacity - 2) offset = flush();
3820
3821                         buffer[offset + 0] = d0;
3822                         buffer[offset + 1] = d1;
3823                         bufsize = offset + 2;
3824                 }
3825
3826                 void write(char_t d0, char_t d1, char_t d2)
3827                 {
3828                         size_t offset = bufsize;
3829                         if (offset > bufcapacity - 3) offset = flush();
3830
3831                         buffer[offset + 0] = d0;
3832                         buffer[offset + 1] = d1;
3833                         buffer[offset + 2] = d2;
3834                         bufsize = offset + 3;
3835                 }
3836
3837                 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3838                 {
3839                         size_t offset = bufsize;
3840                         if (offset > bufcapacity - 4) offset = flush();
3841
3842                         buffer[offset + 0] = d0;
3843                         buffer[offset + 1] = d1;
3844                         buffer[offset + 2] = d2;
3845                         buffer[offset + 3] = d3;
3846                         bufsize = offset + 4;
3847                 }
3848
3849                 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3850                 {
3851                         size_t offset = bufsize;
3852                         if (offset > bufcapacity - 5) offset = flush();
3853
3854                         buffer[offset + 0] = d0;
3855                         buffer[offset + 1] = d1;
3856                         buffer[offset + 2] = d2;
3857                         buffer[offset + 3] = d3;
3858                         buffer[offset + 4] = d4;
3859                         bufsize = offset + 5;
3860                 }
3861
3862                 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3863                 {
3864                         size_t offset = bufsize;
3865                         if (offset > bufcapacity - 6) offset = flush();
3866
3867                         buffer[offset + 0] = d0;
3868                         buffer[offset + 1] = d1;
3869                         buffer[offset + 2] = d2;
3870                         buffer[offset + 3] = d3;
3871                         buffer[offset + 4] = d4;
3872                         buffer[offset + 5] = d5;
3873                         bufsize = offset + 6;
3874                 }
3875
3876                 // utf8 maximum expansion: x4 (-> utf32)
3877                 // utf16 maximum expansion: x2 (-> utf32)
3878                 // utf32 maximum expansion: x1
3879                 enum
3880                 {
3881                         bufcapacitybytes =
3882                         #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3883                                 PUGIXML_MEMORY_OUTPUT_STACK
3884                         #else
3885                                 10240
3886                         #endif
3887                         ,
3888                         bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3889                 };
3890
3891                 char_t buffer[bufcapacity];
3892
3893                 union
3894                 {
3895                         uint8_t data_u8[4 * bufcapacity];
3896                         uint16_t data_u16[2 * bufcapacity];
3897                         uint32_t data_u32[bufcapacity];
3898                         char_t data_char[bufcapacity];
3899                 } scratch;
3900
3901                 xml_writer& writer;
3902                 size_t bufsize;
3903                 xml_encoding encoding;
3904         };
3905
3906         PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3907         {
3908                 while (*s)
3909                 {
3910                         const char_t* prev = s;
3911
3912                         // While *s is a usual symbol
3913                         PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3914
3915                         writer.write_buffer(prev, static_cast<size_t>(s - prev));
3916
3917                         switch (*s)
3918                         {
3919                                 case 0: break;
3920                                 case '&':
3921                                         writer.write('&', 'a', 'm', 'p', ';');
3922                                         ++s;
3923                                         break;
3924                                 case '<':
3925                                         writer.write('&', 'l', 't', ';');
3926                                         ++s;
3927                                         break;
3928                                 case '>':
3929                                         writer.write('&', 'g', 't', ';');
3930                                         ++s;
3931                                         break;
3932                                 case '"':
3933                                         writer.write('&', 'q', 'u', 'o', 't', ';');
3934                                         ++s;
3935                                         break;
3936                                 default: // s is not a usual symbol
3937                                 {
3938                                         unsigned int ch = static_cast<unsigned int>(*s++);
3939                                         assert(ch < 32);
3940
3941                                         writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3942                                 }
3943                         }
3944                 }
3945         }
3946
3947         PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3948         {
3949                 if (flags & format_no_escapes)
3950                         writer.write_string(s);
3951                 else
3952                         text_output_escaped(writer, s, type);
3953         }
3954
3955         PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3956         {
3957                 do
3958                 {
3959                         writer.write('<', '!', '[', 'C', 'D');
3960                         writer.write('A', 'T', 'A', '[');
3961
3962                         const char_t* prev = s;
3963
3964                         // look for ]]> sequence - we can't output it as is since it terminates CDATA
3965                         while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3966
3967                         // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3968                         if (*s) s += 2;
3969
3970                         writer.write_buffer(prev, static_cast<size_t>(s - prev));
3971
3972                         writer.write(']', ']', '>');
3973                 }
3974                 while (*s);
3975         }
3976
3977         PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3978         {
3979                 switch (indent_length)
3980                 {
3981                 case 1:
3982                 {
3983                         for (unsigned int i = 0; i < depth; ++i)
3984                                 writer.write(indent[0]);
3985                         break;
3986                 }
3987
3988                 case 2:
3989                 {
3990                         for (unsigned int i = 0; i < depth; ++i)
3991                                 writer.write(indent[0], indent[1]);
3992                         break;
3993                 }
3994
3995                 case 3:
3996                 {
3997                         for (unsigned int i = 0; i < depth; ++i)
3998                                 writer.write(indent[0], indent[1], indent[2]);
3999                         break;
4000                 }
4001
4002                 case 4:
4003                 {
4004                         for (unsigned int i = 0; i < depth; ++i)
4005                                 writer.write(indent[0], indent[1], indent[2], indent[3]);
4006                         break;
4007                 }
4008
4009                 default:
4010                 {
4011                         for (unsigned int i = 0; i < depth; ++i)
4012                                 writer.write_buffer(indent, indent_length);
4013                 }
4014                 }
4015         }
4016
4017         PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4018         {
4019                 writer.write('<', '!', '-', '-');
4020
4021                 while (*s)
4022                 {
4023                         const char_t* prev = s;
4024
4025                         // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4026                         while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4027
4028                         writer.write_buffer(prev, static_cast<size_t>(s - prev));
4029
4030                         if (*s)
4031                         {
4032                                 assert(*s == '-');
4033
4034                                 writer.write('-', ' ');
4035                                 ++s;
4036                         }
4037                 }
4038
4039                 writer.write('-', '-', '>');
4040         }
4041
4042         PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4043         {
4044                 while (*s)
4045                 {
4046                         const char_t* prev = s;
4047
4048                         // look for ?> sequence - we can't output it since ?> terminates PI
4049                         while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4050
4051                         writer.write_buffer(prev, static_cast<size_t>(s - prev));
4052
4053                         if (*s)
4054                         {
4055                                 assert(s[0] == '?' && s[1] == '>');
4056
4057                                 writer.write('?', ' ', '>');
4058                                 s += 2;
4059                         }
4060                 }
4061         }
4062
4063         PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4064         {
4065                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4066
4067                 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4068                 {
4069                         if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4070                         {
4071                                 writer.write('\n');
4072
4073                                 text_output_indent(writer, indent, indent_length, depth + 1);
4074                         }
4075                         else
4076                         {
4077                                 writer.write(' ');
4078                         }
4079
4080                         writer.write_string(a->name ? a->name + 0 : default_name);
4081                         writer.write('=', '"');
4082
4083                         if (a->value)
4084                                 text_output(writer, a->value, ctx_special_attr, flags);
4085
4086                         writer.write('"');
4087                 }
4088         }
4089
4090         PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4091         {
4092                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4093                 const char_t* name = node->name ? node->name + 0 : default_name;
4094
4095                 writer.write('<');
4096                 writer.write_string(name);
4097
4098                 if (node->first_attribute)
4099                         node_output_attributes(writer, node, indent, indent_length, flags, depth);
4100
4101                 // element nodes can have value if parse_embed_pcdata was used
4102                 if (!node->value)
4103                 {
4104                         if (!node->first_child)
4105                         {
4106                                 if (flags & format_no_empty_element_tags)
4107                                 {
4108                                         writer.write('>', '<', '/');
4109                                         writer.write_string(name);
4110                                         writer.write('>');
4111
4112                                         return false;
4113                                 }
4114                                 else
4115                                 {
4116                                         if ((flags & format_raw) == 0)
4117                                                 writer.write(' ');
4118
4119                                         writer.write('/', '>');
4120
4121                                         return false;
4122                                 }
4123                         }
4124                         else
4125                         {
4126                                 writer.write('>');
4127
4128                                 return true;
4129                         }
4130                 }
4131                 else
4132                 {
4133                         writer.write('>');
4134
4135                         text_output(writer, node->value, ctx_special_pcdata, flags);
4136
4137                         if (!node->first_child)
4138                         {
4139                                 writer.write('<', '/');
4140                                 writer.write_string(name);
4141                                 writer.write('>');
4142
4143                                 return false;
4144                         }
4145                         else
4146                         {
4147                                 return true;
4148                         }
4149                 }
4150         }
4151
4152         PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4153         {
4154                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4155                 const char_t* name = node->name ? node->name + 0 : default_name;
4156
4157                 writer.write('<', '/');
4158                 writer.write_string(name);
4159                 writer.write('>');
4160         }
4161
4162         PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4163         {
4164                 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4165
4166                 switch (PUGI__NODETYPE(node))
4167                 {
4168                         case node_pcdata:
4169                                 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4170                                 break;
4171
4172                         case node_cdata:
4173                                 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4174                                 break;
4175
4176                         case node_comment:
4177                                 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4178                                 break;
4179
4180                         case node_pi:
4181                                 writer.write('<', '?');
4182                                 writer.write_string(node->name ? node->name + 0 : default_name);
4183
4184                                 if (node->value)
4185                                 {
4186                                         writer.write(' ');
4187                                         node_output_pi_value(writer, node->value);
4188                                 }
4189
4190                                 writer.write('?', '>');
4191                                 break;
4192
4193                         case node_declaration:
4194                                 writer.write('<', '?');
4195                                 writer.write_string(node->name ? node->name + 0 : default_name);
4196                                 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4197                                 writer.write('?', '>');
4198                                 break;
4199
4200                         case node_doctype:
4201                                 writer.write('<', '!', 'D', 'O', 'C');
4202                                 writer.write('T', 'Y', 'P', 'E');
4203
4204                                 if (node->value)
4205                                 {
4206                                         writer.write(' ');
4207                                         writer.write_string(node->value);
4208                                 }
4209
4210                                 writer.write('>');
4211                                 break;
4212
4213                         default:
4214                                 assert(false && "Invalid node type"); // unreachable
4215                 }
4216         }
4217
4218         enum indent_flags_t
4219         {
4220                 indent_newline = 1,
4221                 indent_indent = 2
4222         };
4223
4224         PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4225         {
4226                 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4227                 unsigned int indent_flags = indent_indent;
4228
4229                 xml_node_struct* node = root;
4230
4231                 do
4232                 {
4233                         assert(node);
4234
4235                         // begin writing current node
4236                         if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4237                         {
4238                                 node_output_simple(writer, node, flags);
4239
4240                                 indent_flags = 0;
4241                         }
4242                         else
4243                         {
4244                                 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4245                                         writer.write('\n');
4246
4247                                 if ((indent_flags & indent_indent) && indent_length)
4248                                         text_output_indent(writer, indent, indent_length, depth);
4249
4250                                 if (PUGI__NODETYPE(node) == node_element)
4251                                 {
4252                                         indent_flags = indent_newline | indent_indent;
4253
4254                                         if (node_output_start(writer, node, indent, indent_length, flags, depth))
4255                                         {
4256                                                 // element nodes can have value if parse_embed_pcdata was used
4257                                                 if (node->value)
4258                                                         indent_flags = 0;
4259
4260                                                 node = node->first_child;
4261                                                 depth++;
4262                                                 continue;
4263                                         }
4264                                 }
4265                                 else if (PUGI__NODETYPE(node) == node_document)
4266                                 {
4267                                         indent_flags = indent_indent;
4268
4269                                         if (node->first_child)
4270                                         {
4271                                                 node = node->first_child;
4272                                                 continue;
4273                                         }
4274                                 }
4275                                 else
4276                                 {
4277                                         node_output_simple(writer, node, flags);
4278
4279                                         indent_flags = indent_newline | indent_indent;
4280                                 }
4281                         }
4282
4283                         // continue to the next node
4284                         while (node != root)
4285                         {
4286                                 if (node->next_sibling)
4287                                 {
4288                                         node = node->next_sibling;
4289                                         break;
4290                                 }
4291
4292                                 node = node->parent;
4293
4294                                 // write closing node
4295                                 if (PUGI__NODETYPE(node) == node_element)
4296                                 {
4297                                         depth--;
4298
4299                                         if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4300                                                 writer.write('\n');
4301
4302                                         if ((indent_flags & indent_indent) && indent_length)
4303                                                 text_output_indent(writer, indent, indent_length, depth);
4304
4305                                         node_output_end(writer, node);
4306
4307                                         indent_flags = indent_newline | indent_indent;
4308                                 }
4309                         }
4310                 }
4311                 while (node != root);
4312
4313                 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4314                         writer.write('\n');
4315         }
4316
4317         PUGI__FN bool has_declaration(xml_node_struct* node)
4318         {
4319                 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4320                 {
4321                         xml_node_type type = PUGI__NODETYPE(child);
4322
4323                         if (type == node_declaration) return true;
4324                         if (type == node_element) return false;
4325                 }
4326
4327                 return false;
4328         }
4329
4330         PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4331         {
4332                 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4333                         if (a == attr)
4334                                 return true;
4335
4336                 return false;
4337         }
4338
4339         PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4340         {
4341                 return parent == node_element || parent == node_declaration;
4342         }
4343
4344         PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4345         {
4346                 if (parent != node_document && parent != node_element) return false;
4347                 if (child == node_document || child == node_null) return false;
4348                 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4349
4350                 return true;
4351         }
4352
4353         PUGI__FN bool allow_move(xml_node parent, xml_node child)
4354         {
4355                 // check that child can be a child of parent
4356                 if (!allow_insert_child(parent.type(), child.type()))
4357                         return false;
4358
4359                 // check that node is not moved between documents
4360                 if (parent.root() != child.root())
4361                         return false;
4362
4363                 // check that new parent is not in the child subtree
4364                 xml_node cur = parent;
4365
4366                 while (cur)
4367                 {
4368                         if (cur == child)
4369                                 return false;
4370
4371                         cur = cur.parent();
4372                 }
4373
4374                 return true;
4375         }
4376
4377         template <typename String, typename Header>
4378         PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4379         {
4380                 assert(!dest && (header & header_mask) == 0);
4381
4382                 if (source)
4383                 {
4384                         if (alloc && (source_header & header_mask) == 0)
4385                         {
4386                                 dest = source;
4387
4388                                 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4389                                 header |= xml_memory_page_contents_shared_mask;
4390                                 source_header |= xml_memory_page_contents_shared_mask;
4391                         }
4392                         else
4393                                 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4394                 }
4395         }
4396
4397         PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4398         {
4399                 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4400                 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4401
4402                 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4403                 {
4404                         xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4405
4406                         if (da)
4407                         {
4408                                 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4409                                 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4410                         }
4411                 }
4412         }
4413
4414         PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4415         {
4416                 xml_allocator& alloc = get_allocator(dn);
4417                 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4418
4419                 node_copy_contents(dn, sn, shared_alloc);
4420
4421                 xml_node_struct* dit = dn;
4422                 xml_node_struct* sit = sn->first_child;
4423
4424                 while (sit && sit != sn)
4425                 {
4426                         // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4427                         if (sit != dn)
4428                         {
4429                                 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4430
4431                                 if (copy)
4432                                 {
4433                                         node_copy_contents(copy, sit, shared_alloc);
4434
4435                                         if (sit->first_child)
4436                                         {
4437                                                 dit = copy;
4438                                                 sit = sit->first_child;
4439                                                 continue;
4440                                         }
4441                                 }
4442                         }
4443
4444                         // continue to the next node
4445                         do
4446                         {
4447                                 if (sit->next_sibling)
4448                                 {
4449                                         sit = sit->next_sibling;
4450                                         break;
4451                                 }
4452
4453                                 sit = sit->parent;
4454                                 dit = dit->parent;
4455                         }
4456                         while (sit != sn);
4457                 }
4458         }
4459
4460         PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4461         {
4462                 xml_allocator& alloc = get_allocator(da);
4463                 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4464
4465                 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4466                 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4467         }
4468
4469         inline bool is_text_node(xml_node_struct* node)
4470         {
4471                 xml_node_type type = PUGI__NODETYPE(node);
4472
4473                 return type == node_pcdata || type == node_cdata;
4474         }
4475
4476         // get value with conversion functions
4477         template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4478         {
4479                 U result = 0;
4480                 const char_t* s = value;
4481
4482                 while (PUGI__IS_CHARTYPE(*s, ct_space))
4483                         s++;
4484
4485                 bool negative = (*s == '-');
4486
4487                 s += (*s == '+' || *s == '-');
4488
4489                 bool overflow = false;
4490
4491                 if (s[0] == '0' && (s[1] | ' ') == 'x')
4492                 {
4493                         s += 2;
4494
4495                         // since overflow detection relies on length of the sequence skip leading zeros
4496                         while (*s == '0')
4497                                 s++;
4498
4499                         const char_t* start = s;
4500
4501                         for (;;)
4502                         {
4503                                 if (static_cast<unsigned>(*s - '0') < 10)
4504                                         result = result * 16 + (*s - '0');
4505                                 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4506                                         result = result * 16 + ((*s | ' ') - 'a' + 10);
4507                                 else
4508                                         break;
4509
4510                                 s++;
4511                         }
4512
4513                         size_t digits = static_cast<size_t>(s - start);
4514
4515                         overflow = digits > sizeof(U) * 2;
4516                 }
4517                 else
4518                 {
4519                         // since overflow detection relies on length of the sequence skip leading zeros
4520                         while (*s == '0')
4521                                 s++;
4522
4523                         const char_t* start = s;
4524
4525                         for (;;)
4526                         {
4527                                 if (static_cast<unsigned>(*s - '0') < 10)
4528                                         result = result * 10 + (*s - '0');
4529                                 else
4530                                         break;
4531
4532                                 s++;
4533                         }
4534
4535                         size_t digits = static_cast<size_t>(s - start);
4536
4537                         PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4538
4539                         const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4540                         const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4541                         const size_t high_bit = sizeof(U) * 8 - 1;
4542
4543                         overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4544                 }
4545
4546                 if (negative)
4547                 {
4548                         // Workaround for crayc++ CC-3059: Expected no overflow in routine.
4549                 #ifdef _CRAYC
4550                         return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4551                 #else
4552                         return (overflow || result > 0 - minv) ? minv : 0 - result;
4553                 #endif
4554                 }
4555                 else
4556                         return (overflow || result > maxv) ? maxv : result;
4557         }
4558
4559         PUGI__FN int get_value_int(const char_t* value)
4560         {
4561                 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4562         }
4563
4564         PUGI__FN unsigned int get_value_uint(const char_t* value)
4565         {
4566                 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4567         }
4568
4569         PUGI__FN double get_value_double(const char_t* value)
4570         {
4571         #ifdef PUGIXML_WCHAR_MODE
4572                 return wcstod(value, 0);
4573         #else
4574                 return strtod(value, 0);
4575         #endif
4576         }
4577
4578         PUGI__FN float get_value_float(const char_t* value)
4579         {
4580         #ifdef PUGIXML_WCHAR_MODE
4581                 return static_cast<float>(wcstod(value, 0));
4582         #else
4583                 return static_cast<float>(strtod(value, 0));
4584         #endif
4585         }
4586
4587         PUGI__FN bool get_value_bool(const char_t* value)
4588         {
4589                 // only look at first char
4590                 char_t first = *value;
4591
4592                 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4593                 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4594         }
4595
4596 #ifdef PUGIXML_HAS_LONG_LONG
4597         PUGI__FN long long get_value_llong(const char_t* value)
4598         {
4599                 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4600         }
4601
4602         PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4603         {
4604                 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4605         }
4606 #endif
4607
4608         template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4609         {
4610                 char_t* result = end - 1;
4611                 U rest = negative ? 0 - value : value;
4612
4613                 do
4614                 {
4615                         *result-- = static_cast<char_t>('0' + (rest % 10));
4616                         rest /= 10;
4617                 }
4618                 while (rest);
4619
4620                 assert(result >= begin);
4621                 (void)begin;
4622
4623                 *result = '-';
4624
4625                 return result + !negative;
4626         }
4627
4628         // set value with conversion functions
4629         template <typename String, typename Header>
4630         PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4631         {
4632         #ifdef PUGIXML_WCHAR_MODE
4633                 char_t wbuf[128];
4634                 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4635
4636                 size_t offset = 0;
4637                 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4638
4639                 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4640         #else
4641                 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4642         #endif
4643         }
4644
4645         template <typename U, typename String, typename Header>
4646         PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4647         {
4648                 char_t buf[64];
4649                 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4650                 char_t* begin = integer_to_string(buf, end, value, negative);
4651
4652                 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4653         }
4654
4655         template <typename String, typename Header>
4656         PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4657         {
4658                 char buf[128];
4659                 PUGI__SNPRINTF(buf, "%.9g", value);
4660
4661                 return set_value_ascii(dest, header, header_mask, buf);
4662         }
4663
4664         template <typename String, typename Header>
4665         PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4666         {
4667                 char buf[128];
4668                 PUGI__SNPRINTF(buf, "%.17g", value);
4669
4670                 return set_value_ascii(dest, header, header_mask, buf);
4671         }
4672
4673         template <typename String, typename Header>
4674         PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4675         {
4676                 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4677         }
4678
4679         PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4680         {
4681                 // check input buffer
4682                 if (!contents && size) return make_parse_result(status_io_error);
4683
4684                 // get actual encoding
4685                 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4686
4687                 // get private buffer
4688                 char_t* buffer = 0;
4689                 size_t length = 0;
4690
4691                 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4692
4693                 // delete original buffer if we performed a conversion
4694                 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4695
4696                 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4697                 if (own || buffer != contents) *out_buffer = buffer;
4698
4699                 // store buffer for offset_debug
4700                 doc->buffer = buffer;
4701
4702                 // parse
4703                 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4704
4705                 // remember encoding
4706                 res.encoding = buffer_encoding;
4707
4708                 return res;
4709         }
4710
4711         // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4712         PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4713         {
4714         #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4715                 // there are 64-bit versions of fseek/ftell, let's use them
4716                 typedef __int64 length_type;
4717
4718                 _fseeki64(file, 0, SEEK_END);
4719                 length_type length = _ftelli64(file);
4720                 _fseeki64(file, 0, SEEK_SET);
4721         #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4722                 // there are 64-bit versions of fseek/ftell, let's use them
4723                 typedef off64_t length_type;
4724
4725                 fseeko64(file, 0, SEEK_END);
4726                 length_type length = ftello64(file);
4727                 fseeko64(file, 0, SEEK_SET);
4728         #else
4729                 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4730                 typedef long length_type;
4731
4732                 fseek(file, 0, SEEK_END);
4733                 length_type length = ftell(file);
4734                 fseek(file, 0, SEEK_SET);
4735         #endif
4736
4737                 // check for I/O errors
4738                 if (length < 0) return status_io_error;
4739
4740                 // check for overflow
4741                 size_t result = static_cast<size_t>(length);
4742
4743                 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4744
4745                 // finalize
4746                 out_result = result;
4747
4748                 return status_ok;
4749         }
4750
4751         // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4752         PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4753         {
4754                 // We only need to zero-terminate if encoding conversion does not do it for us
4755         #ifdef PUGIXML_WCHAR_MODE
4756                 xml_encoding wchar_encoding = get_wchar_encoding();
4757
4758                 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4759                 {
4760                         size_t length = size / sizeof(char_t);
4761
4762                         static_cast<char_t*>(buffer)[length] = 0;
4763                         return (length + 1) * sizeof(char_t);
4764                 }
4765         #else
4766                 if (encoding == encoding_utf8)
4767                 {
4768                         static_cast<char*>(buffer)[size] = 0;
4769                         return size + 1;
4770                 }
4771         #endif
4772
4773                 return size;
4774         }
4775
4776         PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4777         {
4778                 if (!file) return make_parse_result(status_file_not_found);
4779
4780                 // get file size (can result in I/O errors)
4781                 size_t size = 0;
4782                 xml_parse_status size_status = get_file_size(file, size);
4783                 if (size_status != status_ok) return make_parse_result(size_status);
4784
4785                 size_t max_suffix_size = sizeof(char_t);
4786
4787                 // allocate buffer for the whole file
4788                 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4789                 if (!contents) return make_parse_result(status_out_of_memory);
4790
4791                 // read file in memory
4792                 size_t read_size = fread(contents, 1, size, file);
4793
4794                 if (read_size != size)
4795                 {
4796                         xml_memory::deallocate(contents);
4797                         return make_parse_result(status_io_error);
4798                 }
4799
4800                 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4801
4802                 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4803         }
4804
4805         PUGI__FN void close_file(FILE* file)
4806         {
4807                 fclose(file);
4808         }
4809
4810 #ifndef PUGIXML_NO_STL
4811         template <typename T> struct xml_stream_chunk
4812         {
4813                 static xml_stream_chunk* create()
4814                 {
4815                         void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4816                         if (!memory) return 0;
4817
4818                         return new (memory) xml_stream_chunk();
4819                 }
4820
4821                 static void destroy(xml_stream_chunk* chunk)
4822                 {
4823                         // free chunk chain
4824                         while (chunk)
4825                         {
4826                                 xml_stream_chunk* next_ = chunk->next;
4827
4828                                 xml_memory::deallocate(chunk);
4829
4830                                 chunk = next_;
4831                         }
4832                 }
4833
4834                 xml_stream_chunk(): next(0), size(0)
4835                 {
4836                 }
4837
4838                 xml_stream_chunk* next;
4839                 size_t size;
4840
4841                 T data[xml_memory_page_size / sizeof(T)];
4842         };
4843
4844         template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4845         {
4846                 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4847
4848                 // read file to a chunk list
4849                 size_t total = 0;
4850                 xml_stream_chunk<T>* last = 0;
4851
4852                 while (!stream.eof())
4853                 {
4854                         // allocate new chunk
4855                         xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4856                         if (!chunk) return status_out_of_memory;
4857
4858                         // append chunk to list
4859                         if (last) last = last->next = chunk;
4860                         else chunks.data = last = chunk;
4861
4862                         // read data to chunk
4863                         stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4864                         chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4865
4866                         // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4867                         if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4868
4869                         // guard against huge files (chunk size is small enough to make this overflow check work)
4870                         if (total + chunk->size < total) return status_out_of_memory;
4871                         total += chunk->size;
4872                 }
4873
4874                 size_t max_suffix_size = sizeof(char_t);
4875
4876                 // copy chunk list to a contiguous buffer
4877                 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4878                 if (!buffer) return status_out_of_memory;
4879
4880                 char* write = buffer;
4881
4882                 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4883                 {
4884                         assert(write + chunk->size <= buffer + total);
4885                         memcpy(write, chunk->data, chunk->size);
4886                         write += chunk->size;
4887                 }
4888
4889                 assert(write == buffer + total);
4890
4891                 // return buffer
4892                 *out_buffer = buffer;
4893                 *out_size = total;
4894
4895                 return status_ok;
4896         }
4897
4898         template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4899         {
4900                 // get length of remaining data in stream
4901                 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4902                 stream.seekg(0, std::ios::end);
4903                 std::streamoff length = stream.tellg() - pos;
4904                 stream.seekg(pos);
4905
4906                 if (stream.fail() || pos < 0) return status_io_error;
4907
4908                 // guard against huge files
4909                 size_t read_length = static_cast<size_t>(length);
4910
4911                 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4912
4913                 size_t max_suffix_size = sizeof(char_t);
4914
4915                 // read stream data into memory (guard against stream exceptions with buffer holder)
4916                 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4917                 if (!buffer.data) return status_out_of_memory;
4918
4919                 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4920
4921                 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4922                 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4923
4924                 // return buffer
4925                 size_t actual_length = static_cast<size_t>(stream.gcount());
4926                 assert(actual_length <= read_length);
4927
4928                 *out_buffer = buffer.release();
4929                 *out_size = actual_length * sizeof(T);
4930
4931                 return status_ok;
4932         }
4933
4934         template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4935         {
4936                 void* buffer = 0;
4937                 size_t size = 0;
4938                 xml_parse_status status = status_ok;
4939
4940                 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4941                 if (stream.fail()) return make_parse_result(status_io_error);
4942
4943                 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4944                 if (stream.tellg() < 0)
4945                 {
4946                         stream.clear(); // clear error flags that could be set by a failing tellg
4947                         status = load_stream_data_noseek(stream, &buffer, &size);
4948                 }
4949                 else
4950                         status = load_stream_data_seek(stream, &buffer, &size);
4951
4952                 if (status != status_ok) return make_parse_result(status);
4953
4954                 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4955
4956                 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4957         }
4958 #endif
4959
4960 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4961         PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4962         {
4963                 return _wfopen(path, mode);
4964         }
4965 #else
4966         PUGI__FN char* convert_path_heap(const wchar_t* str)
4967         {
4968                 assert(str);
4969
4970                 // first pass: get length in utf8 characters
4971                 size_t length = strlength_wide(str);
4972                 size_t size = as_utf8_begin(str, length);
4973
4974                 // allocate resulting string
4975                 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4976                 if (!result) return 0;
4977
4978                 // second pass: convert to utf8
4979                 as_utf8_end(result, size, str, length);
4980
4981                 // zero-terminate
4982                 result[size] = 0;
4983
4984                 return result;
4985         }
4986
4987         PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4988         {
4989                 // there is no standard function to open wide paths, so our best bet is to try utf8 path
4990                 char* path_utf8 = convert_path_heap(path);
4991                 if (!path_utf8) return 0;
4992
4993                 // convert mode to ASCII (we mirror _wfopen interface)
4994                 char mode_ascii[4] = {0};
4995                 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4996
4997                 // try to open the utf8 path
4998                 FILE* result = fopen(path_utf8, mode_ascii);
4999
5000                 // free dummy buffer
5001                 xml_memory::deallocate(path_utf8);
5002
5003                 return result;
5004         }
5005 #endif
5006
5007         PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5008         {
5009                 if (!file) return false;
5010
5011                 xml_writer_file writer(file);
5012                 doc.save(writer, indent, flags, encoding);
5013
5014                 return ferror(file) == 0;
5015         }
5016
5017         struct name_null_sentry
5018         {
5019                 xml_node_struct* node;
5020                 char_t* name;
5021
5022                 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5023                 {
5024                         node->name = 0;
5025                 }
5026
5027                 ~name_null_sentry()
5028                 {
5029                         node->name = name;
5030                 }
5031         };
5032 PUGI__NS_END
5033
5034 namespace pugi
5035 {
5036         PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5037         {
5038         }
5039
5040         PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5041         {
5042                 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5043                 (void)!result; // unfortunately we can't do proper error handling here
5044         }
5045
5046 #ifndef PUGIXML_NO_STL
5047         PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5048         {
5049         }
5050
5051         PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5052         {
5053         }
5054
5055         PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5056         {
5057                 if (narrow_stream)
5058                 {
5059                         assert(!wide_stream);
5060                         narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5061                 }
5062                 else
5063                 {
5064                         assert(wide_stream);
5065                         assert(size % sizeof(wchar_t) == 0);
5066
5067                         wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5068                 }
5069         }
5070 #endif
5071
5072         PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5073         {
5074         }
5075
5076         PUGI__FN xml_tree_walker::~xml_tree_walker()
5077         {
5078         }
5079
5080         PUGI__FN int xml_tree_walker::depth() const
5081         {
5082                 return _depth;
5083         }
5084
5085         PUGI__FN bool xml_tree_walker::begin(xml_node&)
5086         {
5087                 return true;
5088         }
5089
5090         PUGI__FN bool xml_tree_walker::end(xml_node&)
5091         {
5092                 return true;
5093         }
5094
5095         PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5096         {
5097         }
5098
5099         PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5100         {
5101         }
5102
5103         PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5104         {
5105         }
5106
5107         PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5108         {
5109                 return _attr ? unspecified_bool_xml_attribute : 0;
5110         }
5111
5112         PUGI__FN bool xml_attribute::operator!() const
5113         {
5114                 return !_attr;
5115         }
5116
5117         PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5118         {
5119                 return (_attr == r._attr);
5120         }
5121
5122         PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5123         {
5124                 return (_attr != r._attr);
5125         }
5126
5127         PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5128         {
5129                 return (_attr < r._attr);
5130         }
5131
5132         PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5133         {
5134                 return (_attr > r._attr);
5135         }
5136
5137         PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5138         {
5139                 return (_attr <= r._attr);
5140         }
5141
5142         PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5143         {
5144                 return (_attr >= r._attr);
5145         }
5146
5147         PUGI__FN xml_attribute xml_attribute::next_attribute() const
5148         {
5149                 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5150         }
5151
5152         PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5153         {
5154                 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5155         }
5156
5157         PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5158         {
5159                 return (_attr && _attr->value) ? _attr->value + 0 : def;
5160         }
5161
5162         PUGI__FN int xml_attribute::as_int(int def) const
5163         {
5164                 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5165         }
5166
5167         PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5168         {
5169                 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5170         }
5171
5172         PUGI__FN double xml_attribute::as_double(double def) const
5173         {
5174                 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5175         }
5176
5177         PUGI__FN float xml_attribute::as_float(float def) const
5178         {
5179                 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5180         }
5181
5182         PUGI__FN bool xml_attribute::as_bool(bool def) const
5183         {
5184                 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5185         }
5186
5187 #ifdef PUGIXML_HAS_LONG_LONG
5188         PUGI__FN long long xml_attribute::as_llong(long long def) const
5189         {
5190                 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5191         }
5192
5193         PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5194         {
5195                 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5196         }
5197 #endif
5198
5199         PUGI__FN bool xml_attribute::empty() const
5200         {
5201                 return !_attr;
5202         }
5203
5204         PUGI__FN const char_t* xml_attribute::name() const
5205         {
5206                 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5207         }
5208
5209         PUGI__FN const char_t* xml_attribute::value() const
5210         {
5211                 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5212         }
5213
5214         PUGI__FN size_t xml_attribute::hash_value() const
5215         {
5216                 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5217         }
5218
5219         PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5220         {
5221                 return _attr;
5222         }
5223
5224         PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5225         {
5226                 set_value(rhs);
5227                 return *this;
5228         }
5229
5230         PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5231         {
5232                 set_value(rhs);
5233                 return *this;
5234         }
5235
5236         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5237         {
5238                 set_value(rhs);
5239                 return *this;
5240         }
5241
5242         PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5243         {
5244                 set_value(rhs);
5245                 return *this;
5246         }
5247
5248         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5249         {
5250                 set_value(rhs);
5251                 return *this;
5252         }
5253
5254         PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5255         {
5256                 set_value(rhs);
5257                 return *this;
5258         }
5259
5260         PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5261         {
5262                 set_value(rhs);
5263                 return *this;
5264         }
5265
5266         PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5267         {
5268                 set_value(rhs);
5269                 return *this;
5270         }
5271
5272 #ifdef PUGIXML_HAS_LONG_LONG
5273         PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5274         {
5275                 set_value(rhs);
5276                 return *this;
5277         }
5278
5279         PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5280         {
5281                 set_value(rhs);
5282                 return *this;
5283         }
5284 #endif
5285
5286         PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5287         {
5288                 if (!_attr) return false;
5289
5290                 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5291         }
5292
5293         PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5294         {
5295                 if (!_attr) return false;
5296
5297                 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5298         }
5299
5300         PUGI__FN bool xml_attribute::set_value(int rhs)
5301         {
5302                 if (!_attr) return false;
5303
5304                 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5305         }
5306
5307         PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5308         {
5309                 if (!_attr) return false;
5310
5311                 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5312         }
5313
5314         PUGI__FN bool xml_attribute::set_value(long rhs)
5315         {
5316                 if (!_attr) return false;
5317
5318                 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5319         }
5320
5321         PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5322         {
5323                 if (!_attr) return false;
5324
5325                 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5326         }
5327
5328         PUGI__FN bool xml_attribute::set_value(double rhs)
5329         {
5330                 if (!_attr) return false;
5331
5332                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5333         }
5334
5335         PUGI__FN bool xml_attribute::set_value(float rhs)
5336         {
5337                 if (!_attr) return false;
5338
5339                 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5340         }
5341
5342         PUGI__FN bool xml_attribute::set_value(bool rhs)
5343         {
5344                 if (!_attr) return false;
5345
5346                 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5347         }
5348
5349 #ifdef PUGIXML_HAS_LONG_LONG
5350         PUGI__FN bool xml_attribute::set_value(long long rhs)
5351         {
5352                 if (!_attr) return false;
5353
5354                 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5355         }
5356
5357         PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5358         {
5359                 if (!_attr) return false;
5360
5361                 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5362         }
5363 #endif
5364
5365 #ifdef __BORLANDC__
5366         PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5367         {
5368                 return (bool)lhs && rhs;
5369         }
5370
5371         PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5372         {
5373                 return (bool)lhs || rhs;
5374         }
5375 #endif
5376
5377         PUGI__FN xml_node::xml_node(): _root(0)
5378         {
5379         }
5380
5381         PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5382         {
5383         }
5384
5385         PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5386         {
5387         }
5388
5389         PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5390         {
5391                 return _root ? unspecified_bool_xml_node : 0;
5392         }
5393
5394         PUGI__FN bool xml_node::operator!() const
5395         {
5396                 return !_root;
5397         }
5398
5399         PUGI__FN xml_node::iterator xml_node::begin() const
5400         {
5401                 return iterator(_root ? _root->first_child + 0 : 0, _root);
5402         }
5403
5404         PUGI__FN xml_node::iterator xml_node::end() const
5405         {
5406                 return iterator(0, _root);
5407         }
5408
5409         PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5410         {
5411                 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5412         }
5413
5414         PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5415         {
5416                 return attribute_iterator(0, _root);
5417         }
5418
5419         PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5420         {
5421                 return xml_object_range<xml_node_iterator>(begin(), end());
5422         }
5423
5424         PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5425         {
5426                 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5427         }
5428
5429         PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5430         {
5431                 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5432         }
5433
5434         PUGI__FN bool xml_node::operator==(const xml_node& r) const
5435         {
5436                 return (_root == r._root);
5437         }
5438
5439         PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5440         {
5441                 return (_root != r._root);
5442         }
5443
5444         PUGI__FN bool xml_node::operator<(const xml_node& r) const
5445         {
5446                 return (_root < r._root);
5447         }
5448
5449         PUGI__FN bool xml_node::operator>(const xml_node& r) const
5450         {
5451                 return (_root > r._root);
5452         }
5453
5454         PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5455         {
5456                 return (_root <= r._root);
5457         }
5458
5459         PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5460         {
5461                 return (_root >= r._root);
5462         }
5463
5464         PUGI__FN bool xml_node::empty() const
5465         {
5466                 return !_root;
5467         }
5468
5469         PUGI__FN const char_t* xml_node::name() const
5470         {
5471                 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5472         }
5473
5474         PUGI__FN xml_node_type xml_node::type() const
5475         {
5476                 return _root ? PUGI__NODETYPE(_root) : node_null;
5477         }
5478
5479         PUGI__FN const char_t* xml_node::value() const
5480         {
5481                 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5482         }
5483
5484         PUGI__FN xml_node xml_node::child(const char_t* name_) const
5485         {
5486                 if (!_root) return xml_node();
5487
5488                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5489                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5490
5491                 return xml_node();
5492         }
5493
5494         PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5495         {
5496                 if (!_root) return xml_attribute();
5497
5498                 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5499                         if (i->name && impl::strequal(name_, i->name))
5500                                 return xml_attribute(i);
5501
5502                 return xml_attribute();
5503         }
5504
5505         PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5506         {
5507                 if (!_root) return xml_node();
5508
5509                 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5510                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5511
5512                 return xml_node();
5513         }
5514
5515         PUGI__FN xml_node xml_node::next_sibling() const
5516         {
5517                 return _root ? xml_node(_root->next_sibling) : xml_node();
5518         }
5519
5520         PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5521         {
5522                 if (!_root) return xml_node();
5523
5524                 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5525                         if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5526
5527                 return xml_node();
5528         }
5529
5530         PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5531         {
5532                 xml_attribute_struct* hint = hint_._attr;
5533
5534                 // if hint is not an attribute of node, behavior is not defined
5535                 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5536
5537                 if (!_root) return xml_attribute();
5538
5539                 // optimistically search from hint up until the end
5540                 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5541                         if (i->name && impl::strequal(name_, i->name))
5542                         {
5543                                 // update hint to maximize efficiency of searching for consecutive attributes
5544                                 hint_._attr = i->next_attribute;
5545
5546                                 return xml_attribute(i);
5547                         }
5548
5549                 // wrap around and search from the first attribute until the hint
5550                 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5551                 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5552                         if (j->name && impl::strequal(name_, j->name))
5553                         {
5554                                 // update hint to maximize efficiency of searching for consecutive attributes
5555                                 hint_._attr = j->next_attribute;
5556
5557                                 return xml_attribute(j);
5558                         }
5559
5560                 return xml_attribute();
5561         }
5562
5563         PUGI__FN xml_node xml_node::previous_sibling() const
5564         {
5565                 if (!_root) return xml_node();
5566
5567                 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5568                 else return xml_node();
5569         }
5570
5571         PUGI__FN xml_node xml_node::parent() const
5572         {
5573                 return _root ? xml_node(_root->parent) : xml_node();
5574         }
5575
5576         PUGI__FN xml_node xml_node::root() const
5577         {
5578                 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5579         }
5580
5581         PUGI__FN xml_text xml_node::text() const
5582         {
5583                 return xml_text(_root);
5584         }
5585
5586         PUGI__FN const char_t* xml_node::child_value() const
5587         {
5588                 if (!_root) return PUGIXML_TEXT("");
5589
5590                 // element nodes can have value if parse_embed_pcdata was used
5591                 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5592                         return _root->value;
5593
5594                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5595                         if (impl::is_text_node(i) && i->value)
5596                                 return i->value;
5597
5598                 return PUGIXML_TEXT("");
5599         }
5600
5601         PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5602         {
5603                 return child(name_).child_value();
5604         }
5605
5606         PUGI__FN xml_attribute xml_node::first_attribute() const
5607         {
5608                 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5609         }
5610
5611         PUGI__FN xml_attribute xml_node::last_attribute() const
5612         {
5613                 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5614         }
5615
5616         PUGI__FN xml_node xml_node::first_child() const
5617         {
5618                 return _root ? xml_node(_root->first_child) : xml_node();
5619         }
5620
5621         PUGI__FN xml_node xml_node::last_child() const
5622         {
5623                 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5624         }
5625
5626         PUGI__FN bool xml_node::set_name(const char_t* rhs)
5627         {
5628                 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5629
5630                 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5631                         return false;
5632
5633                 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5634         }
5635
5636         PUGI__FN bool xml_node::set_value(const char_t* rhs)
5637         {
5638                 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5639
5640                 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5641                         return false;
5642
5643                 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5644         }
5645
5646         PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5647         {
5648                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5649
5650                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5651                 if (!alloc.reserve()) return xml_attribute();
5652
5653                 xml_attribute a(impl::allocate_attribute(alloc));
5654                 if (!a) return xml_attribute();
5655
5656                 impl::append_attribute(a._attr, _root);
5657
5658                 a.set_name(name_);
5659
5660                 return a;
5661         }
5662
5663         PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5664         {
5665                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5666
5667                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5668                 if (!alloc.reserve()) return xml_attribute();
5669
5670                 xml_attribute a(impl::allocate_attribute(alloc));
5671                 if (!a) return xml_attribute();
5672
5673                 impl::prepend_attribute(a._attr, _root);
5674
5675                 a.set_name(name_);
5676
5677                 return a;
5678         }
5679
5680         PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5681         {
5682                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5683                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5684
5685                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5686                 if (!alloc.reserve()) return xml_attribute();
5687
5688                 xml_attribute a(impl::allocate_attribute(alloc));
5689                 if (!a) return xml_attribute();
5690
5691                 impl::insert_attribute_after(a._attr, attr._attr, _root);
5692
5693                 a.set_name(name_);
5694
5695                 return a;
5696         }
5697
5698         PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5699         {
5700                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5701                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5702
5703                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5704                 if (!alloc.reserve()) return xml_attribute();
5705
5706                 xml_attribute a(impl::allocate_attribute(alloc));
5707                 if (!a) return xml_attribute();
5708
5709                 impl::insert_attribute_before(a._attr, attr._attr, _root);
5710
5711                 a.set_name(name_);
5712
5713                 return a;
5714         }
5715
5716         PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5717         {
5718                 if (!proto) return xml_attribute();
5719                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5720
5721                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5722                 if (!alloc.reserve()) return xml_attribute();
5723
5724                 xml_attribute a(impl::allocate_attribute(alloc));
5725                 if (!a) return xml_attribute();
5726
5727                 impl::append_attribute(a._attr, _root);
5728                 impl::node_copy_attribute(a._attr, proto._attr);
5729
5730                 return a;
5731         }
5732
5733         PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5734         {
5735                 if (!proto) return xml_attribute();
5736                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5737
5738                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5739                 if (!alloc.reserve()) return xml_attribute();
5740
5741                 xml_attribute a(impl::allocate_attribute(alloc));
5742                 if (!a) return xml_attribute();
5743
5744                 impl::prepend_attribute(a._attr, _root);
5745                 impl::node_copy_attribute(a._attr, proto._attr);
5746
5747                 return a;
5748         }
5749
5750         PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5751         {
5752                 if (!proto) return xml_attribute();
5753                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5754                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5755
5756                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5757                 if (!alloc.reserve()) return xml_attribute();
5758
5759                 xml_attribute a(impl::allocate_attribute(alloc));
5760                 if (!a) return xml_attribute();
5761
5762                 impl::insert_attribute_after(a._attr, attr._attr, _root);
5763                 impl::node_copy_attribute(a._attr, proto._attr);
5764
5765                 return a;
5766         }
5767
5768         PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5769         {
5770                 if (!proto) return xml_attribute();
5771                 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5772                 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5773
5774                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5775                 if (!alloc.reserve()) return xml_attribute();
5776
5777                 xml_attribute a(impl::allocate_attribute(alloc));
5778                 if (!a) return xml_attribute();
5779
5780                 impl::insert_attribute_before(a._attr, attr._attr, _root);
5781                 impl::node_copy_attribute(a._attr, proto._attr);
5782
5783                 return a;
5784         }
5785
5786         PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5787         {
5788                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5789
5790                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5791                 if (!alloc.reserve()) return xml_node();
5792
5793                 xml_node n(impl::allocate_node(alloc, type_));
5794                 if (!n) return xml_node();
5795
5796                 impl::append_node(n._root, _root);
5797
5798                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5799
5800                 return n;
5801         }
5802
5803         PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5804         {
5805                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5806
5807                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5808                 if (!alloc.reserve()) return xml_node();
5809
5810                 xml_node n(impl::allocate_node(alloc, type_));
5811                 if (!n) return xml_node();
5812
5813                 impl::prepend_node(n._root, _root);
5814
5815                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5816
5817                 return n;
5818         }
5819
5820         PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5821         {
5822                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5823                 if (!node._root || node._root->parent != _root) return xml_node();
5824
5825                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5826                 if (!alloc.reserve()) return xml_node();
5827
5828                 xml_node n(impl::allocate_node(alloc, type_));
5829                 if (!n) return xml_node();
5830
5831                 impl::insert_node_before(n._root, node._root);
5832
5833                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5834
5835                 return n;
5836         }
5837
5838         PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5839         {
5840                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5841                 if (!node._root || node._root->parent != _root) return xml_node();
5842
5843                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5844                 if (!alloc.reserve()) return xml_node();
5845
5846                 xml_node n(impl::allocate_node(alloc, type_));
5847                 if (!n) return xml_node();
5848
5849                 impl::insert_node_after(n._root, node._root);
5850
5851                 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5852
5853                 return n;
5854         }
5855
5856         PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5857         {
5858                 xml_node result = append_child(node_element);
5859
5860                 result.set_name(name_);
5861
5862                 return result;
5863         }
5864
5865         PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5866         {
5867                 xml_node result = prepend_child(node_element);
5868
5869                 result.set_name(name_);
5870
5871                 return result;
5872         }
5873
5874         PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5875         {
5876                 xml_node result = insert_child_after(node_element, node);
5877
5878                 result.set_name(name_);
5879
5880                 return result;
5881         }
5882
5883         PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5884         {
5885                 xml_node result = insert_child_before(node_element, node);
5886
5887                 result.set_name(name_);
5888
5889                 return result;
5890         }
5891
5892         PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5893         {
5894                 xml_node_type type_ = proto.type();
5895                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5896
5897                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5898                 if (!alloc.reserve()) return xml_node();
5899
5900                 xml_node n(impl::allocate_node(alloc, type_));
5901                 if (!n) return xml_node();
5902
5903                 impl::append_node(n._root, _root);
5904                 impl::node_copy_tree(n._root, proto._root);
5905
5906                 return n;
5907         }
5908
5909         PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5910         {
5911                 xml_node_type type_ = proto.type();
5912                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5913
5914                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5915                 if (!alloc.reserve()) return xml_node();
5916
5917                 xml_node n(impl::allocate_node(alloc, type_));
5918                 if (!n) return xml_node();
5919
5920                 impl::prepend_node(n._root, _root);
5921                 impl::node_copy_tree(n._root, proto._root);
5922
5923                 return n;
5924         }
5925
5926         PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5927         {
5928                 xml_node_type type_ = proto.type();
5929                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5930                 if (!node._root || node._root->parent != _root) return xml_node();
5931
5932                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5933                 if (!alloc.reserve()) return xml_node();
5934
5935                 xml_node n(impl::allocate_node(alloc, type_));
5936                 if (!n) return xml_node();
5937
5938                 impl::insert_node_after(n._root, node._root);
5939                 impl::node_copy_tree(n._root, proto._root);
5940
5941                 return n;
5942         }
5943
5944         PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5945         {
5946                 xml_node_type type_ = proto.type();
5947                 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5948                 if (!node._root || node._root->parent != _root) return xml_node();
5949
5950                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5951                 if (!alloc.reserve()) return xml_node();
5952
5953                 xml_node n(impl::allocate_node(alloc, type_));
5954                 if (!n) return xml_node();
5955
5956                 impl::insert_node_before(n._root, node._root);
5957                 impl::node_copy_tree(n._root, proto._root);
5958
5959                 return n;
5960         }
5961
5962         PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5963         {
5964                 if (!impl::allow_move(*this, moved)) return xml_node();
5965
5966                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5967                 if (!alloc.reserve()) return xml_node();
5968
5969                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5970                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5971
5972                 impl::remove_node(moved._root);
5973                 impl::append_node(moved._root, _root);
5974
5975                 return moved;
5976         }
5977
5978         PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5979         {
5980                 if (!impl::allow_move(*this, moved)) return xml_node();
5981
5982                 impl::xml_allocator& alloc = impl::get_allocator(_root);
5983                 if (!alloc.reserve()) return xml_node();
5984
5985                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5986                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5987
5988                 impl::remove_node(moved._root);
5989                 impl::prepend_node(moved._root, _root);
5990
5991                 return moved;
5992         }
5993
5994         PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5995         {
5996                 if (!impl::allow_move(*this, moved)) return xml_node();
5997                 if (!node._root || node._root->parent != _root) return xml_node();
5998                 if (moved._root == node._root) return xml_node();
5999
6000                 impl::xml_allocator& alloc = impl::get_allocator(_root);
6001                 if (!alloc.reserve()) return xml_node();
6002
6003                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6004                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6005
6006                 impl::remove_node(moved._root);
6007                 impl::insert_node_after(moved._root, node._root);
6008
6009                 return moved;
6010         }
6011
6012         PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6013         {
6014                 if (!impl::allow_move(*this, moved)) return xml_node();
6015                 if (!node._root || node._root->parent != _root) return xml_node();
6016                 if (moved._root == node._root) return xml_node();
6017
6018                 impl::xml_allocator& alloc = impl::get_allocator(_root);
6019                 if (!alloc.reserve()) return xml_node();
6020
6021                 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6022                 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6023
6024                 impl::remove_node(moved._root);
6025                 impl::insert_node_before(moved._root, node._root);
6026
6027                 return moved;
6028         }
6029
6030         PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6031         {
6032                 return remove_attribute(attribute(name_));
6033         }
6034
6035         PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6036         {
6037                 if (!_root || !a._attr) return false;
6038                 if (!impl::is_attribute_of(a._attr, _root)) return false;
6039
6040                 impl::xml_allocator& alloc = impl::get_allocator(_root);
6041                 if (!alloc.reserve()) return false;
6042
6043                 impl::remove_attribute(a._attr, _root);
6044                 impl::destroy_attribute(a._attr, alloc);
6045
6046                 return true;
6047         }
6048
6049         PUGI__FN bool xml_node::remove_child(const char_t* name_)
6050         {
6051                 return remove_child(child(name_));
6052         }
6053
6054         PUGI__FN bool xml_node::remove_child(const xml_node& n)
6055         {
6056                 if (!_root || !n._root || n._root->parent != _root) return false;
6057
6058                 impl::xml_allocator& alloc = impl::get_allocator(_root);
6059                 if (!alloc.reserve()) return false;
6060
6061                 impl::remove_node(n._root);
6062                 impl::destroy_node(n._root, alloc);
6063
6064                 return true;
6065         }
6066
6067         PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6068         {
6069                 // append_buffer is only valid for elements/documents
6070                 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6071
6072                 // get document node
6073                 impl::xml_document_struct* doc = &impl::get_document(_root);
6074
6075                 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6076                 doc->header |= impl::xml_memory_page_contents_shared_mask;
6077
6078                 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6079                 impl::xml_memory_page* page = 0;
6080                 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6081                 (void)page;
6082
6083                 if (!extra) return impl::make_parse_result(status_out_of_memory);
6084
6085         #ifdef PUGIXML_COMPACT
6086                 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6087                 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6088                 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6089         #endif
6090
6091                 // add extra buffer to the list
6092                 extra->buffer = 0;
6093                 extra->next = doc->extra_buffers;
6094                 doc->extra_buffers = extra;
6095
6096                 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6097                 impl::name_null_sentry sentry(_root);
6098
6099                 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6100         }
6101
6102         PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6103         {
6104                 if (!_root) return xml_node();
6105
6106                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6107                         if (i->name && impl::strequal(name_, i->name))
6108                         {
6109                                 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6110                                         if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6111                                                 return xml_node(i);
6112                         }
6113
6114                 return xml_node();
6115         }
6116
6117         PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6118         {
6119                 if (!_root) return xml_node();
6120
6121                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6122                         for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6123                                 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6124                                         return xml_node(i);
6125
6126                 return xml_node();
6127         }
6128
6129 #ifndef PUGIXML_NO_STL
6130         PUGI__FN string_t xml_node::path(char_t delimiter) const
6131         {
6132                 if (!_root) return string_t();
6133
6134                 size_t offset = 0;
6135
6136                 for (xml_node_struct* i = _root; i; i = i->parent)
6137                 {
6138                         offset += (i != _root);
6139                         offset += i->name ? impl::strlength(i->name) : 0;
6140                 }
6141
6142                 string_t result;
6143                 result.resize(offset);
6144
6145                 for (xml_node_struct* j = _root; j; j = j->parent)
6146                 {
6147                         if (j != _root)
6148                                 result[--offset] = delimiter;
6149
6150                         if (j->name)
6151                         {
6152                                 size_t length = impl::strlength(j->name);
6153
6154                                 offset -= length;
6155                                 memcpy(&result[offset], j->name, length * sizeof(char_t));
6156                         }
6157                 }
6158
6159                 assert(offset == 0);
6160
6161                 return result;
6162         }
6163 #endif
6164
6165         PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6166         {
6167                 xml_node found = *this; // Current search context.
6168
6169                 if (!_root || !path_[0]) return found;
6170
6171                 if (path_[0] == delimiter)
6172                 {
6173                         // Absolute path; e.g. '/foo/bar'
6174                         found = found.root();
6175                         ++path_;
6176                 }
6177
6178                 const char_t* path_segment = path_;
6179
6180                 while (*path_segment == delimiter) ++path_segment;
6181
6182                 const char_t* path_segment_end = path_segment;
6183
6184                 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6185
6186                 if (path_segment == path_segment_end) return found;
6187
6188                 const char_t* next_segment = path_segment_end;
6189
6190                 while (*next_segment == delimiter) ++next_segment;
6191
6192                 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6193                         return found.first_element_by_path(next_segment, delimiter);
6194                 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6195                         return found.parent().first_element_by_path(next_segment, delimiter);
6196                 else
6197                 {
6198                         for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6199                         {
6200                                 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6201                                 {
6202                                         xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6203
6204                                         if (subsearch) return subsearch;
6205                                 }
6206                         }
6207
6208                         return xml_node();
6209                 }
6210         }
6211
6212         PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6213         {
6214                 walker._depth = -1;
6215
6216                 xml_node arg_begin(_root);
6217                 if (!walker.begin(arg_begin)) return false;
6218
6219                 xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6220
6221                 if (cur)
6222                 {
6223                         ++walker._depth;
6224
6225                         do
6226                         {
6227                                 xml_node arg_for_each(cur);
6228                                 if (!walker.for_each(arg_for_each))
6229                                         return false;
6230
6231                                 if (cur->first_child)
6232                                 {
6233                                         ++walker._depth;
6234                                         cur = cur->first_child;
6235                                 }
6236                                 else if (cur->next_sibling)
6237                                         cur = cur->next_sibling;
6238                                 else
6239                                 {
6240                                         while (!cur->next_sibling && cur != _root && cur->parent)
6241                                         {
6242                                                 --walker._depth;
6243                                                 cur = cur->parent;
6244                                         }
6245
6246                                         if (cur != _root)
6247                                                 cur = cur->next_sibling;
6248                                 }
6249                         }
6250                         while (cur && cur != _root);
6251                 }
6252
6253                 assert(walker._depth == -1);
6254
6255                 xml_node arg_end(_root);
6256                 return walker.end(arg_end);
6257         }
6258
6259         PUGI__FN size_t xml_node::hash_value() const
6260         {
6261                 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6262         }
6263
6264         PUGI__FN xml_node_struct* xml_node::internal_object() const
6265         {
6266                 return _root;
6267         }
6268
6269         PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6270         {
6271                 if (!_root) return;
6272
6273                 impl::xml_buffered_writer buffered_writer(writer, encoding);
6274
6275                 impl::node_output(buffered_writer, _root, indent, flags, depth);
6276
6277                 buffered_writer.flush();
6278         }
6279
6280 #ifndef PUGIXML_NO_STL
6281         PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6282         {
6283                 xml_writer_stream writer(stream);
6284
6285                 print(writer, indent, flags, encoding, depth);
6286         }
6287
6288         PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6289         {
6290                 xml_writer_stream writer(stream);
6291
6292                 print(writer, indent, flags, encoding_wchar, depth);
6293         }
6294 #endif
6295
6296         PUGI__FN ptrdiff_t xml_node::offset_debug() const
6297         {
6298                 if (!_root) return -1;
6299
6300                 impl::xml_document_struct& doc = impl::get_document(_root);
6301
6302                 // we can determine the offset reliably only if there is exactly once parse buffer
6303                 if (!doc.buffer || doc.extra_buffers) return -1;
6304
6305                 switch (type())
6306                 {
6307                 case node_document:
6308                         return 0;
6309
6310                 case node_element:
6311                 case node_declaration:
6312                 case node_pi:
6313                         return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6314
6315                 case node_pcdata:
6316                 case node_cdata:
6317                 case node_comment:
6318                 case node_doctype:
6319                         return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6320
6321                 default:
6322                         assert(false && "Invalid node type"); // unreachable
6323                         return -1;
6324                 }
6325         }
6326
6327 #ifdef __BORLANDC__
6328         PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6329         {
6330                 return (bool)lhs && rhs;
6331         }
6332
6333         PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6334         {
6335                 return (bool)lhs || rhs;
6336         }
6337 #endif
6338
6339         PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6340         {
6341         }
6342
6343         PUGI__FN xml_node_struct* xml_text::_data() const
6344         {
6345                 if (!_root || impl::is_text_node(_root)) return _root;
6346
6347                 // element nodes can have value if parse_embed_pcdata was used
6348                 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6349                         return _root;
6350
6351                 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6352                         if (impl::is_text_node(node))
6353                                 return node;
6354
6355                 return 0;
6356         }
6357
6358         PUGI__FN xml_node_struct* xml_text::_data_new()
6359         {
6360                 xml_node_struct* d = _data();
6361                 if (d) return d;
6362
6363                 return xml_node(_root).append_child(node_pcdata).internal_object();
6364         }
6365
6366         PUGI__FN xml_text::xml_text(): _root(0)
6367         {
6368         }
6369
6370         PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6371         {
6372         }
6373
6374         PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6375         {
6376                 return _data() ? unspecified_bool_xml_text : 0;
6377         }
6378
6379         PUGI__FN bool xml_text::operator!() const
6380         {
6381                 return !_data();
6382         }
6383
6384         PUGI__FN bool xml_text::empty() const
6385         {
6386                 return _data() == 0;
6387         }
6388
6389         PUGI__FN const char_t* xml_text::get() const
6390         {
6391                 xml_node_struct* d = _data();
6392
6393                 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6394         }
6395
6396         PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6397         {
6398                 xml_node_struct* d = _data();
6399
6400                 return (d && d->value) ? d->value + 0 : def;
6401         }
6402
6403         PUGI__FN int xml_text::as_int(int def) const
6404         {
6405                 xml_node_struct* d = _data();
6406
6407                 return (d && d->value) ? impl::get_value_int(d->value) : def;
6408         }
6409
6410         PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6411         {
6412                 xml_node_struct* d = _data();
6413
6414                 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6415         }
6416
6417         PUGI__FN double xml_text::as_double(double def) const
6418         {
6419                 xml_node_struct* d = _data();
6420
6421                 return (d && d->value) ? impl::get_value_double(d->value) : def;
6422         }
6423
6424         PUGI__FN float xml_text::as_float(float def) const
6425         {
6426                 xml_node_struct* d = _data();
6427
6428                 return (d && d->value) ? impl::get_value_float(d->value) : def;
6429         }
6430
6431         PUGI__FN bool xml_text::as_bool(bool def) const
6432         {
6433                 xml_node_struct* d = _data();
6434
6435                 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6436         }
6437
6438 #ifdef PUGIXML_HAS_LONG_LONG
6439         PUGI__FN long long xml_text::as_llong(long long def) const
6440         {
6441                 xml_node_struct* d = _data();
6442
6443                 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6444         }
6445
6446         PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6447         {
6448                 xml_node_struct* d = _data();
6449
6450                 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6451         }
6452 #endif
6453
6454         PUGI__FN bool xml_text::set(const char_t* rhs)
6455         {
6456                 xml_node_struct* dn = _data_new();
6457
6458                 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6459         }
6460
6461         PUGI__FN bool xml_text::set(int rhs)
6462         {
6463                 xml_node_struct* dn = _data_new();
6464
6465                 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6466         }
6467
6468         PUGI__FN bool xml_text::set(unsigned int rhs)
6469         {
6470                 xml_node_struct* dn = _data_new();
6471
6472                 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6473         }
6474
6475         PUGI__FN bool xml_text::set(long rhs)
6476         {
6477                 xml_node_struct* dn = _data_new();
6478
6479                 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6480         }
6481
6482         PUGI__FN bool xml_text::set(unsigned long rhs)
6483         {
6484                 xml_node_struct* dn = _data_new();
6485
6486                 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6487         }
6488
6489         PUGI__FN bool xml_text::set(float rhs)
6490         {
6491                 xml_node_struct* dn = _data_new();
6492
6493                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6494         }
6495
6496         PUGI__FN bool xml_text::set(double rhs)
6497         {
6498                 xml_node_struct* dn = _data_new();
6499
6500                 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6501         }
6502
6503         PUGI__FN bool xml_text::set(bool rhs)
6504         {
6505                 xml_node_struct* dn = _data_new();
6506
6507                 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6508         }
6509
6510 #ifdef PUGIXML_HAS_LONG_LONG
6511         PUGI__FN bool xml_text::set(long long rhs)
6512         {
6513                 xml_node_struct* dn = _data_new();
6514
6515                 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6516         }
6517
6518         PUGI__FN bool xml_text::set(unsigned long long rhs)
6519         {
6520                 xml_node_struct* dn = _data_new();
6521
6522                 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6523         }
6524 #endif
6525
6526         PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6527         {
6528                 set(rhs);
6529                 return *this;
6530         }
6531
6532         PUGI__FN xml_text& xml_text::operator=(int rhs)
6533         {
6534                 set(rhs);
6535                 return *this;
6536         }
6537
6538         PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6539         {
6540                 set(rhs);
6541                 return *this;
6542         }
6543
6544         PUGI__FN xml_text& xml_text::operator=(long rhs)
6545         {
6546                 set(rhs);
6547                 return *this;
6548         }
6549
6550         PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6551         {
6552                 set(rhs);
6553                 return *this;
6554         }
6555
6556         PUGI__FN xml_text& xml_text::operator=(double rhs)
6557         {
6558                 set(rhs);
6559                 return *this;
6560         }
6561
6562         PUGI__FN xml_text& xml_text::operator=(float rhs)
6563         {
6564                 set(rhs);
6565                 return *this;
6566         }
6567
6568         PUGI__FN xml_text& xml_text::operator=(bool rhs)
6569         {
6570                 set(rhs);
6571                 return *this;
6572         }
6573
6574 #ifdef PUGIXML_HAS_LONG_LONG
6575         PUGI__FN xml_text& xml_text::operator=(long long rhs)
6576         {
6577                 set(rhs);
6578                 return *this;
6579         }
6580
6581         PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6582         {
6583                 set(rhs);
6584                 return *this;
6585         }
6586 #endif
6587
6588         PUGI__FN xml_node xml_text::data() const
6589         {
6590                 return xml_node(_data());
6591         }
6592
6593 #ifdef __BORLANDC__
6594         PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6595         {
6596                 return (bool)lhs && rhs;
6597         }
6598
6599         PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6600         {
6601                 return (bool)lhs || rhs;
6602         }
6603 #endif
6604
6605         PUGI__FN xml_node_iterator::xml_node_iterator()
6606         {
6607         }
6608
6609         PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6610         {
6611         }
6612
6613         PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6614         {
6615         }
6616
6617         PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6618         {
6619                 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6620         }
6621
6622         PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6623         {
6624                 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6625         }
6626
6627         PUGI__FN xml_node& xml_node_iterator::operator*() const
6628         {
6629                 assert(_wrap._root);
6630                 return _wrap;
6631         }
6632
6633         PUGI__FN xml_node* xml_node_iterator::operator->() const
6634         {
6635                 assert(_wrap._root);
6636                 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6637         }
6638
6639         PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6640         {
6641                 assert(_wrap._root);
6642                 _wrap._root = _wrap._root->next_sibling;
6643                 return *this;
6644         }
6645
6646         PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6647         {
6648                 xml_node_iterator temp = *this;
6649                 ++*this;
6650                 return temp;
6651         }
6652
6653         PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6654         {
6655                 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6656                 return *this;
6657         }
6658
6659         PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6660         {
6661                 xml_node_iterator temp = *this;
6662                 --*this;
6663                 return temp;
6664         }
6665
6666         PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6667         {
6668         }
6669
6670         PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6671         {
6672         }
6673
6674         PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6675         {
6676         }
6677
6678         PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6679         {
6680                 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6681         }
6682
6683         PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6684         {
6685                 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6686         }
6687
6688         PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6689         {
6690                 assert(_wrap._attr);
6691                 return _wrap;
6692         }
6693
6694         PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6695         {
6696                 assert(_wrap._attr);
6697                 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6698         }
6699
6700         PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6701         {
6702                 assert(_wrap._attr);
6703                 _wrap._attr = _wrap._attr->next_attribute;
6704                 return *this;
6705         }
6706
6707         PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6708         {
6709                 xml_attribute_iterator temp = *this;
6710                 ++*this;
6711                 return temp;
6712         }
6713
6714         PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6715         {
6716                 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6717                 return *this;
6718         }
6719
6720         PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6721         {
6722                 xml_attribute_iterator temp = *this;
6723                 --*this;
6724                 return temp;
6725         }
6726
6727         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6728         {
6729         }
6730
6731         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6732         {
6733         }
6734
6735         PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6736         {
6737         }
6738
6739         PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6740         {
6741                 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6742         }
6743
6744         PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6745         {
6746                 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6747         }
6748
6749         PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6750         {
6751                 assert(_wrap._root);
6752                 return _wrap;
6753         }
6754
6755         PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6756         {
6757                 assert(_wrap._root);
6758                 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6759         }
6760
6761         PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6762         {
6763                 assert(_wrap._root);
6764                 _wrap = _wrap.next_sibling(_name);
6765                 return *this;
6766         }
6767
6768         PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6769         {
6770                 xml_named_node_iterator temp = *this;
6771                 ++*this;
6772                 return temp;
6773         }
6774
6775         PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6776         {
6777                 if (_wrap._root)
6778                         _wrap = _wrap.previous_sibling(_name);
6779                 else
6780                 {
6781                         _wrap = _parent.last_child();
6782
6783                         if (!impl::strequal(_wrap.name(), _name))
6784                                 _wrap = _wrap.previous_sibling(_name);
6785                 }
6786
6787                 return *this;
6788         }
6789
6790         PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6791         {
6792                 xml_named_node_iterator temp = *this;
6793                 --*this;
6794                 return temp;
6795         }
6796
6797         PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6798         {
6799         }
6800
6801         PUGI__FN xml_parse_result::operator bool() const
6802         {
6803                 return status == status_ok;
6804         }
6805
6806         PUGI__FN const char* xml_parse_result::description() const
6807         {
6808                 switch (status)
6809                 {
6810                 case status_ok: return "No error";
6811
6812                 case status_file_not_found: return "File was not found";
6813                 case status_io_error: return "Error reading from file/stream";
6814                 case status_out_of_memory: return "Could not allocate memory";
6815                 case status_internal_error: return "Internal error occurred";
6816
6817                 case status_unrecognized_tag: return "Could not determine tag type";
6818
6819                 case status_bad_pi: return "Error parsing document declaration/processing instruction";
6820                 case status_bad_comment: return "Error parsing comment";
6821                 case status_bad_cdata: return "Error parsing CDATA section";
6822                 case status_bad_doctype: return "Error parsing document type declaration";
6823                 case status_bad_pcdata: return "Error parsing PCDATA section";
6824                 case status_bad_start_element: return "Error parsing start element tag";
6825                 case status_bad_attribute: return "Error parsing element attribute";
6826                 case status_bad_end_element: return "Error parsing end element tag";
6827                 case status_end_element_mismatch: return "Start-end tags mismatch";
6828
6829                 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6830
6831                 case status_no_document_element: return "No document element found";
6832
6833                 default: return "Unknown error";
6834                 }
6835         }
6836
6837         PUGI__FN xml_document::xml_document(): _buffer(0)
6838         {
6839                 _create();
6840         }
6841
6842         PUGI__FN xml_document::~xml_document()
6843         {
6844                 _destroy();
6845         }
6846
6847 #ifdef PUGIXML_HAS_MOVE
6848         PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
6849         {
6850                 _create();
6851                 _move(rhs);
6852         }
6853
6854         PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6855         {
6856                 if (this == &rhs) return *this;
6857
6858                 _destroy();
6859                 _create();
6860                 _move(rhs);
6861
6862                 return *this;
6863         }
6864 #endif
6865
6866         PUGI__FN void xml_document::reset()
6867         {
6868                 _destroy();
6869                 _create();
6870         }
6871
6872         PUGI__FN void xml_document::reset(const xml_document& proto)
6873         {
6874                 reset();
6875
6876                 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6877                         append_copy(cur);
6878         }
6879
6880         PUGI__FN void xml_document::_create()
6881         {
6882                 assert(!_root);
6883
6884         #ifdef PUGIXML_COMPACT
6885                 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
6886                 const size_t page_offset = sizeof(void*);
6887         #else
6888                 const size_t page_offset = 0;
6889         #endif
6890
6891                 // initialize sentinel page
6892                 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6893
6894                 // prepare page structure
6895                 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6896                 assert(page);
6897
6898                 page->busy_size = impl::xml_memory_page_size;
6899
6900                 // setup first page marker
6901         #ifdef PUGIXML_COMPACT
6902                 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
6903                 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6904                 *page->compact_page_marker = sizeof(impl::xml_memory_page);
6905         #endif
6906
6907                 // allocate new root
6908                 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6909                 _root->prev_sibling_c = _root;
6910
6911                 // setup sentinel page
6912                 page->allocator = static_cast<impl::xml_document_struct*>(_root);
6913
6914                 // setup hash table pointer in allocator
6915         #ifdef PUGIXML_COMPACT
6916                 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
6917         #endif
6918
6919                 // verify the document allocation
6920                 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6921         }
6922
6923         PUGI__FN void xml_document::_destroy()
6924         {
6925                 assert(_root);
6926
6927                 // destroy static storage
6928                 if (_buffer)
6929                 {
6930                         impl::xml_memory::deallocate(_buffer);
6931                         _buffer = 0;
6932                 }
6933
6934                 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6935                 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6936                 {
6937                         if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6938                 }
6939
6940                 // destroy dynamic storage, leave sentinel page (it's in static memory)
6941                 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6942                 assert(root_page && !root_page->prev);
6943                 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6944
6945                 for (impl::xml_memory_page* page = root_page->next; page; )
6946                 {
6947                         impl::xml_memory_page* next = page->next;
6948
6949                         impl::xml_allocator::deallocate_page(page);
6950
6951                         page = next;
6952                 }
6953
6954         #ifdef PUGIXML_COMPACT
6955                 // destroy hash table
6956                 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6957         #endif
6958
6959                 _root = 0;
6960         }
6961
6962 #ifdef PUGIXML_HAS_MOVE
6963         PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6964         {
6965                 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
6966                 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
6967
6968                 // save first child pointer for later; this needs hash access
6969                 xml_node_struct* other_first_child = other->first_child;
6970
6971         #ifdef PUGIXML_COMPACT
6972                 // reserve space for the hash table up front; this is the only operation that can fail
6973                 // if it does, we have no choice but to throw (if we have exceptions)
6974                 if (other_first_child)
6975                 {
6976                         size_t other_children = 0;
6977                         for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
6978                                 other_children++;
6979
6980                         // in compact mode, each pointer assignment could result in a hash table request
6981                         // during move, we have to relocate document first_child and parents of all children
6982                         // normally there's just one child and its parent has a pointerless encoding but
6983                         // we assume the worst here
6984                         if (!other->_hash->reserve(other_children + 1))
6985                         {
6986                         #ifdef PUGIXML_NO_EXCEPTIONS
6987                                 return;
6988                         #else
6989                                 throw std::bad_alloc();
6990                         #endif
6991                         }
6992                 }
6993         #endif
6994
6995                 // move allocation state
6996                 doc->_root = other->_root;
6997                 doc->_busy_size = other->_busy_size;
6998
6999                 // move buffer state
7000                 doc->buffer = other->buffer;
7001                 doc->extra_buffers = other->extra_buffers;
7002                 _buffer = rhs._buffer;
7003
7004         #ifdef PUGIXML_COMPACT
7005                 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7006                 doc->hash = other->hash;
7007                 doc->_hash = &doc->hash;
7008
7009                 // make sure we don't access other hash up until the end when we reinitialize other document
7010                 other->_hash = 0;
7011         #endif
7012
7013                 // move page structure
7014                 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7015                 assert(doc_page && !doc_page->prev && !doc_page->next);
7016
7017                 impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7018                 assert(other_page && !other_page->prev);
7019
7020                 // relink pages since root page is embedded into xml_document
7021                 if (impl::xml_memory_page* page = other_page->next)
7022                 {
7023                         assert(page->prev == other_page);
7024
7025                         page->prev = doc_page;
7026
7027                         doc_page->next = page;
7028                         other_page->next = 0;
7029                 }
7030
7031                 // make sure pages point to the correct document state
7032                 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7033                 {
7034                         assert(page->allocator == other);
7035
7036                         page->allocator = doc;
7037
7038                 #ifdef PUGIXML_COMPACT
7039                         // this automatically migrates most children between documents and prevents ->parent assignment from allocating
7040                         if (page->compact_shared_parent == other)
7041                                 page->compact_shared_parent = doc;
7042                 #endif
7043                 }
7044
7045                 // move tree structure
7046                 assert(!doc->first_child);
7047
7048                 doc->first_child = other_first_child;
7049
7050                 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7051                 {
7052                 #ifdef PUGIXML_COMPACT
7053                         // most children will have migrated when we reassigned compact_shared_parent
7054                         assert(node->parent == other || node->parent == doc);
7055
7056                         node->parent = doc;
7057                 #else
7058                         assert(node->parent == other);
7059                         node->parent = doc;
7060                 #endif
7061                 }
7062
7063                 // reset other document
7064                 new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7065                 rhs._buffer = 0;
7066         }
7067 #endif
7068
7069 #ifndef PUGIXML_NO_STL
7070         PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7071         {
7072                 reset();
7073
7074                 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7075         }
7076
7077         PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7078         {
7079                 reset();
7080
7081                 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7082         }
7083 #endif
7084
7085         PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7086         {
7087                 // Force native encoding (skip autodetection)
7088         #ifdef PUGIXML_WCHAR_MODE
7089                 xml_encoding encoding = encoding_wchar;
7090         #else
7091                 xml_encoding encoding = encoding_utf8;
7092         #endif
7093
7094                 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7095         }
7096
7097         PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7098         {
7099                 return load_string(contents, options);
7100         }
7101
7102         PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7103         {
7104                 reset();
7105
7106                 using impl::auto_deleter; // MSVC7 workaround
7107                 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
7108
7109                 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7110         }
7111
7112         PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7113         {
7114                 reset();
7115
7116                 using impl::auto_deleter; // MSVC7 workaround
7117                 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7118
7119                 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7120         }
7121
7122         PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7123         {
7124                 reset();
7125
7126                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7127         }
7128
7129         PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7130         {
7131                 reset();
7132
7133                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7134         }
7135
7136         PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7137         {
7138                 reset();
7139
7140                 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7141         }
7142
7143         PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7144         {
7145                 impl::xml_buffered_writer buffered_writer(writer, encoding);
7146
7147                 if ((flags & format_write_bom) && encoding != encoding_latin1)
7148                 {
7149                         // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7150                 #ifdef PUGIXML_WCHAR_MODE
7151                         unsigned int bom = 0xfeff;
7152                         buffered_writer.write(static_cast<wchar_t>(bom));
7153                 #else
7154                         buffered_writer.write('\xef', '\xbb', '\xbf');
7155                 #endif
7156                 }
7157
7158                 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7159                 {
7160                         buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7161                         if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7162                         buffered_writer.write('?', '>');
7163                         if (!(flags & format_raw)) buffered_writer.write('\n');
7164                 }
7165
7166                 impl::node_output(buffered_writer, _root, indent, flags, 0);
7167
7168                 buffered_writer.flush();
7169         }
7170
7171 #ifndef PUGIXML_NO_STL
7172         PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7173         {
7174                 xml_writer_stream writer(stream);
7175
7176                 save(writer, indent, flags, encoding);
7177         }
7178
7179         PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7180         {
7181                 xml_writer_stream writer(stream);
7182
7183                 save(writer, indent, flags, encoding_wchar);
7184         }
7185 #endif
7186
7187         PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7188         {
7189                 using impl::auto_deleter; // MSVC7 workaround
7190                 auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7191
7192                 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7193         }
7194
7195         PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7196         {
7197                 using impl::auto_deleter; // MSVC7 workaround
7198                 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7199
7200                 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7201         }
7202
7203         PUGI__FN xml_node xml_document::document_element() const
7204         {
7205                 assert(_root);
7206
7207                 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7208                         if (PUGI__NODETYPE(i) == node_element)
7209                                 return xml_node(i);
7210
7211                 return xml_node();
7212         }
7213
7214 #ifndef PUGIXML_NO_STL
7215         PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7216         {
7217                 assert(str);
7218
7219                 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7220         }
7221
7222         PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7223         {
7224                 return impl::as_utf8_impl(str.c_str(), str.size());
7225         }
7226
7227         PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7228         {
7229                 assert(str);
7230
7231                 return impl::as_wide_impl(str, strlen(str));
7232         }
7233
7234         PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7235         {
7236                 return impl::as_wide_impl(str.c_str(), str.size());
7237         }
7238 #endif
7239
7240         PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7241         {
7242                 impl::xml_memory::allocate = allocate;
7243                 impl::xml_memory::deallocate = deallocate;
7244         }
7245
7246         PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7247         {
7248                 return impl::xml_memory::allocate;
7249         }
7250
7251         PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7252         {
7253                 return impl::xml_memory::deallocate;
7254         }
7255 }
7256
7257 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7258 namespace std
7259 {
7260         // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7261         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7262         {
7263                 return std::bidirectional_iterator_tag();
7264         }
7265
7266         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7267         {
7268                 return std::bidirectional_iterator_tag();
7269         }
7270
7271         PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7272         {
7273                 return std::bidirectional_iterator_tag();
7274         }
7275 }
7276 #endif
7277
7278 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7279 namespace std
7280 {
7281         // Workarounds for (non-standard) iterator category detection
7282         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7283         {
7284                 return std::bidirectional_iterator_tag();
7285         }
7286
7287         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7288         {
7289                 return std::bidirectional_iterator_tag();
7290         }
7291
7292         PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7293         {
7294                 return std::bidirectional_iterator_tag();
7295         }
7296 }
7297 #endif
7298
7299 #ifndef PUGIXML_NO_XPATH
7300 // STL replacements
7301 PUGI__NS_BEGIN
7302         struct equal_to
7303         {
7304                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7305                 {
7306                         return lhs == rhs;
7307                 }
7308         };
7309
7310         struct not_equal_to
7311         {
7312                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7313                 {
7314                         return lhs != rhs;
7315                 }
7316         };
7317
7318         struct less
7319         {
7320                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7321                 {
7322                         return lhs < rhs;
7323                 }
7324         };
7325
7326         struct less_equal
7327         {
7328                 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7329                 {
7330                         return lhs <= rhs;
7331                 }
7332         };
7333
7334         template <typename T> void swap(T& lhs, T& rhs)
7335         {
7336                 T temp = lhs;
7337                 lhs = rhs;
7338                 rhs = temp;
7339         }
7340
7341         template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7342         {
7343                 I result = begin;
7344
7345                 for (I it = begin + 1; it != end; ++it)
7346                         if (pred(*it, *result))
7347                                 result = it;
7348
7349                 return result;
7350         }
7351
7352         template <typename I> void reverse(I begin, I end)
7353         {
7354                 while (end - begin > 1) swap(*begin++, *--end);
7355         }
7356
7357         template <typename I> I unique(I begin, I end)
7358         {
7359                 // fast skip head
7360                 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7361
7362                 if (begin == end) return begin;
7363
7364                 // last written element
7365                 I write = begin++;
7366
7367                 // merge unique elements
7368                 while (begin != end)
7369                 {
7370                         if (*begin != *write)
7371                                 *++write = *begin++;
7372                         else
7373                                 begin++;
7374                 }
7375
7376                 // past-the-end (write points to live element)
7377                 return write + 1;
7378         }
7379
7380         template <typename T, typename Pred> void insertion_sort(T* begin, T* end, const Pred& pred)
7381         {
7382                 if (begin == end)
7383                         return;
7384
7385                 for (T* it = begin + 1; it != end; ++it)
7386                 {
7387                         T val = *it;
7388                         T* hole = it;
7389
7390                         // move hole backwards
7391                         while (hole > begin && pred(val, *(hole - 1)))
7392                         {
7393                                 *hole = *(hole - 1);
7394                                 hole--;
7395                         }
7396
7397                         // fill hole with element
7398                         *hole = val;
7399                 }
7400         }
7401
7402         template <typename I, typename Pred> I median3(I first, I middle, I last, const Pred& pred)
7403         {
7404                 if (pred(*middle, *first)) swap(middle, first);
7405                 if (pred(*last, *middle)) swap(last, middle);
7406                 if (pred(*middle, *first)) swap(middle, first);
7407
7408                 return middle;
7409         }
7410
7411         template <typename T, typename Pred> void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7412         {
7413                 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7414                 T* eq = begin;
7415                 T* lt = begin;
7416                 T* gt = end;
7417
7418                 while (lt < gt)
7419                 {
7420                         if (pred(*lt, pivot))
7421                                 lt++;
7422                         else if (*lt == pivot)
7423                                 swap(*eq++, *lt++);
7424                         else
7425                                 swap(*lt, *--gt);
7426                 }
7427
7428                 // we now have just 4 groups: = < >; move equal elements to the middle
7429                 T* eqbeg = gt;
7430
7431                 for (T* it = begin; it != eq; ++it)
7432                         swap(*it, *--eqbeg);
7433
7434                 *out_eqbeg = eqbeg;
7435                 *out_eqend = gt;
7436         }
7437
7438         template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7439         {
7440                 // sort large chunks
7441                 while (end - begin > 16)
7442                 {
7443                         // find median element
7444                         I middle = begin + (end - begin) / 2;
7445                         I median = median3(begin, middle, end - 1, pred);
7446
7447                         // partition in three chunks (< = >)
7448                         I eqbeg, eqend;
7449                         partition3(begin, end, *median, pred, &eqbeg, &eqend);
7450
7451                         // loop on larger half
7452                         if (eqbeg - begin > end - eqend)
7453                         {
7454                                 sort(eqend, end, pred);
7455                                 end = eqbeg;
7456                         }
7457                         else
7458                         {
7459                                 sort(begin, eqbeg, pred);
7460                                 begin = eqend;
7461                         }
7462                 }
7463
7464                 // insertion sort small chunk
7465                 insertion_sort(begin, end, pred);
7466         }
7467 PUGI__NS_END
7468
7469 // Allocator used for AST and evaluation stacks
7470 PUGI__NS_BEGIN
7471         static const size_t xpath_memory_page_size =
7472         #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7473                 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7474         #else
7475                 4096
7476         #endif
7477                 ;
7478
7479         static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7480
7481         struct xpath_memory_block
7482         {
7483                 xpath_memory_block* next;
7484                 size_t capacity;
7485
7486                 union
7487                 {
7488                         char data[xpath_memory_page_size];
7489                         double alignment;
7490                 };
7491         };
7492
7493         struct xpath_allocator
7494         {
7495                 xpath_memory_block* _root;
7496                 size_t _root_size;
7497                 bool* _error;
7498
7499                 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7500                 {
7501                 }
7502
7503                 void* allocate(size_t size)
7504                 {
7505                         // round size up to block alignment boundary
7506                         size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7507
7508                         if (_root_size + size <= _root->capacity)
7509                         {
7510                                 void* buf = &_root->data[0] + _root_size;
7511                                 _root_size += size;
7512                                 return buf;
7513                         }
7514                         else
7515                         {
7516                                 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7517                                 size_t block_capacity_base = sizeof(_root->data);
7518                                 size_t block_capacity_req = size + block_capacity_base / 4;
7519                                 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7520
7521                                 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7522
7523                                 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7524                                 if (!block)
7525                                 {
7526                                         if (_error) *_error = true;
7527                                         return 0;
7528                                 }
7529
7530                                 block->next = _root;
7531                                 block->capacity = block_capacity;
7532
7533                                 _root = block;
7534                                 _root_size = size;
7535
7536                                 return block->data;
7537                         }
7538                 }
7539
7540                 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7541                 {
7542                         // round size up to block alignment boundary
7543                         old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7544                         new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7545
7546                         // we can only reallocate the last object
7547                         assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7548
7549                         // try to reallocate the object inplace
7550                         if (ptr && _root_size - old_size + new_size <= _root->capacity)
7551                         {
7552                                 _root_size = _root_size - old_size + new_size;
7553                                 return ptr;
7554                         }
7555
7556                         // allocate a new block
7557                         void* result = allocate(new_size);
7558                         if (!result) return 0;
7559
7560                         // we have a new block
7561                         if (ptr)
7562                         {
7563                                 // copy old data (we only support growing)
7564                                 assert(new_size >= old_size);
7565                                 memcpy(result, ptr, old_size);
7566
7567                                 // free the previous page if it had no other objects
7568                                 assert(_root->data == result);
7569                                 assert(_root->next);
7570
7571                                 if (_root->next->data == ptr)
7572                                 {
7573                                         // deallocate the whole page, unless it was the first one
7574                                         xpath_memory_block* next = _root->next->next;
7575
7576                                         if (next)
7577                                         {
7578                                                 xml_memory::deallocate(_root->next);
7579                                                 _root->next = next;
7580                                         }
7581                                 }
7582                         }
7583
7584                         return result;
7585                 }
7586
7587                 void revert(const xpath_allocator& state)
7588                 {
7589                         // free all new pages
7590                         xpath_memory_block* cur = _root;
7591
7592                         while (cur != state._root)
7593                         {
7594                                 xpath_memory_block* next = cur->next;
7595
7596                                 xml_memory::deallocate(cur);
7597
7598                                 cur = next;
7599                         }
7600
7601                         // restore state
7602                         _root = state._root;
7603                         _root_size = state._root_size;
7604                 }
7605
7606                 void release()
7607                 {
7608                         xpath_memory_block* cur = _root;
7609                         assert(cur);
7610
7611                         while (cur->next)
7612                         {
7613                                 xpath_memory_block* next = cur->next;
7614
7615                                 xml_memory::deallocate(cur);
7616
7617                                 cur = next;
7618                         }
7619                 }
7620         };
7621
7622         struct xpath_allocator_capture
7623         {
7624                 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7625                 {
7626                 }
7627
7628                 ~xpath_allocator_capture()
7629                 {
7630                         _target->revert(_state);
7631                 }
7632
7633                 xpath_allocator* _target;
7634                 xpath_allocator _state;
7635         };
7636
7637         struct xpath_stack
7638         {
7639                 xpath_allocator* result;
7640                 xpath_allocator* temp;
7641         };
7642
7643         struct xpath_stack_data
7644         {
7645                 xpath_memory_block blocks[2];
7646                 xpath_allocator result;
7647                 xpath_allocator temp;
7648                 xpath_stack stack;
7649                 bool oom;
7650
7651                 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7652                 {
7653                         blocks[0].next = blocks[1].next = 0;
7654                         blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7655
7656                         stack.result = &result;
7657                         stack.temp = &temp;
7658                 }
7659
7660                 ~xpath_stack_data()
7661                 {
7662                         result.release();
7663                         temp.release();
7664                 }
7665         };
7666 PUGI__NS_END
7667
7668 // String class
7669 PUGI__NS_BEGIN
7670         class xpath_string
7671         {
7672                 const char_t* _buffer;
7673                 bool _uses_heap;
7674                 size_t _length_heap;
7675
7676                 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7677                 {
7678                         char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7679                         if (!result) return 0;
7680
7681                         memcpy(result, string, length * sizeof(char_t));
7682                         result[length] = 0;
7683
7684                         return result;
7685                 }
7686
7687                 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7688                 {
7689                 }
7690
7691         public:
7692                 static xpath_string from_const(const char_t* str)
7693                 {
7694                         return xpath_string(str, false, 0);
7695                 }
7696
7697                 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7698                 {
7699                         assert(begin <= end && *end == 0);
7700
7701                         return xpath_string(begin, true, static_cast<size_t>(end - begin));
7702                 }
7703
7704                 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7705                 {
7706                         assert(begin <= end);
7707
7708                         if (begin == end)
7709                                 return xpath_string();
7710
7711                         size_t length = static_cast<size_t>(end - begin);
7712                         const char_t* data = duplicate_string(begin, length, alloc);
7713
7714                         return data ? xpath_string(data, true, length) : xpath_string();
7715                 }
7716
7717                 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7718                 {
7719                 }
7720
7721                 void append(const xpath_string& o, xpath_allocator* alloc)
7722                 {
7723                         // skip empty sources
7724                         if (!*o._buffer) return;
7725
7726                         // fast append for constant empty target and constant source
7727                         if (!*_buffer && !_uses_heap && !o._uses_heap)
7728                         {
7729                                 _buffer = o._buffer;
7730                         }
7731                         else
7732                         {
7733                                 // need to make heap copy
7734                                 size_t target_length = length();
7735                                 size_t source_length = o.length();
7736                                 size_t result_length = target_length + source_length;
7737
7738                                 // allocate new buffer
7739                                 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7740                                 if (!result) return;
7741
7742                                 // append first string to the new buffer in case there was no reallocation
7743                                 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7744
7745                                 // append second string to the new buffer
7746                                 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7747                                 result[result_length] = 0;
7748
7749                                 // finalize
7750                                 _buffer = result;
7751                                 _uses_heap = true;
7752                                 _length_heap = result_length;
7753                         }
7754                 }
7755
7756                 const char_t* c_str() const
7757                 {
7758                         return _buffer;
7759                 }
7760
7761                 size_t length() const
7762                 {
7763                         return _uses_heap ? _length_heap : strlength(_buffer);
7764                 }
7765
7766                 char_t* data(xpath_allocator* alloc)
7767                 {
7768                         // make private heap copy
7769                         if (!_uses_heap)
7770                         {
7771                                 size_t length_ = strlength(_buffer);
7772                                 const char_t* data_ = duplicate_string(_buffer, length_, alloc);
7773
7774                                 if (!data_) return 0;
7775
7776                                 _buffer = data_;
7777                                 _uses_heap = true;
7778                                 _length_heap = length_;
7779                         }
7780
7781                         return const_cast<char_t*>(_buffer);
7782                 }
7783
7784                 bool empty() const
7785                 {
7786                         return *_buffer == 0;
7787                 }
7788
7789                 bool operator==(const xpath_string& o) const
7790                 {
7791                         return strequal(_buffer, o._buffer);
7792                 }
7793
7794                 bool operator!=(const xpath_string& o) const
7795                 {
7796                         return !strequal(_buffer, o._buffer);
7797                 }
7798
7799                 bool uses_heap() const
7800                 {
7801                         return _uses_heap;
7802                 }
7803         };
7804 PUGI__NS_END
7805
7806 PUGI__NS_BEGIN
7807         PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7808         {
7809                 while (*pattern && *string == *pattern)
7810                 {
7811                         string++;
7812                         pattern++;
7813                 }
7814
7815                 return *pattern == 0;
7816         }
7817
7818         PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7819         {
7820         #ifdef PUGIXML_WCHAR_MODE
7821                 return wcschr(s, c);
7822         #else
7823                 return strchr(s, c);
7824         #endif
7825         }
7826
7827         PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7828         {
7829         #ifdef PUGIXML_WCHAR_MODE
7830                 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7831                 return (*p == 0) ? s : wcsstr(s, p);
7832         #else
7833                 return strstr(s, p);
7834         #endif
7835         }
7836
7837         // Converts symbol to lower case, if it is an ASCII one
7838         PUGI__FN char_t tolower_ascii(char_t ch)
7839         {
7840                 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7841         }
7842
7843         PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7844         {
7845                 if (na.attribute())
7846                         return xpath_string::from_const(na.attribute().value());
7847                 else
7848                 {
7849                         xml_node n = na.node();
7850
7851                         switch (n.type())
7852                         {
7853                         case node_pcdata:
7854                         case node_cdata:
7855                         case node_comment:
7856                         case node_pi:
7857                                 return xpath_string::from_const(n.value());
7858
7859                         case node_document:
7860                         case node_element:
7861                         {
7862                                 xpath_string result;
7863
7864                                 // element nodes can have value if parse_embed_pcdata was used
7865                                 if (n.value()[0])
7866                                         result.append(xpath_string::from_const(n.value()), alloc);
7867
7868                                 xml_node cur = n.first_child();
7869
7870                                 while (cur && cur != n)
7871                                 {
7872                                         if (cur.type() == node_pcdata || cur.type() == node_cdata)
7873                                                 result.append(xpath_string::from_const(cur.value()), alloc);
7874
7875                                         if (cur.first_child())
7876                                                 cur = cur.first_child();
7877                                         else if (cur.next_sibling())
7878                                                 cur = cur.next_sibling();
7879                                         else
7880                                         {
7881                                                 while (!cur.next_sibling() && cur != n)
7882                                                         cur = cur.parent();
7883
7884                                                 if (cur != n) cur = cur.next_sibling();
7885                                         }
7886                                 }
7887
7888                                 return result;
7889                         }
7890
7891                         default:
7892                                 return xpath_string();
7893                         }
7894                 }
7895         }
7896
7897         PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7898         {
7899                 assert(ln->parent == rn->parent);
7900
7901                 // there is no common ancestor (the shared parent is null), nodes are from different documents
7902                 if (!ln->parent) return ln < rn;
7903
7904                 // determine sibling order
7905                 xml_node_struct* ls = ln;
7906                 xml_node_struct* rs = rn;
7907
7908                 while (ls && rs)
7909                 {
7910                         if (ls == rn) return true;
7911                         if (rs == ln) return false;
7912
7913                         ls = ls->next_sibling;
7914                         rs = rs->next_sibling;
7915                 }
7916
7917                 // if rn sibling chain ended ln must be before rn
7918                 return !rs;
7919         }
7920
7921         PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7922         {
7923                 // find common ancestor at the same depth, if any
7924                 xml_node_struct* lp = ln;
7925                 xml_node_struct* rp = rn;
7926
7927                 while (lp && rp && lp->parent != rp->parent)
7928                 {
7929                         lp = lp->parent;
7930                         rp = rp->parent;
7931                 }
7932
7933                 // parents are the same!
7934                 if (lp && rp) return node_is_before_sibling(lp, rp);
7935
7936                 // nodes are at different depths, need to normalize heights
7937                 bool left_higher = !lp;
7938
7939                 while (lp)
7940                 {
7941                         lp = lp->parent;
7942                         ln = ln->parent;
7943                 }
7944
7945                 while (rp)
7946                 {
7947                         rp = rp->parent;
7948                         rn = rn->parent;
7949                 }
7950
7951                 // one node is the ancestor of the other
7952                 if (ln == rn) return left_higher;
7953
7954                 // find common ancestor... again
7955                 while (ln->parent != rn->parent)
7956                 {
7957                         ln = ln->parent;
7958                         rn = rn->parent;
7959                 }
7960
7961                 return node_is_before_sibling(ln, rn);
7962         }
7963
7964         PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7965         {
7966                 while (node && node != parent) node = node->parent;
7967
7968                 return parent && node == parent;
7969         }
7970
7971         PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7972         {
7973                 xml_node_struct* node = xnode.node().internal_object();
7974
7975                 if (node)
7976                 {
7977                         if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7978                         {
7979                                 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7980                                 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7981                         }
7982
7983                         return 0;
7984                 }
7985
7986                 xml_attribute_struct* attr = xnode.attribute().internal_object();
7987
7988                 if (attr)
7989                 {
7990                         if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7991                         {
7992                                 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7993                                 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
7994                         }
7995
7996                         return 0;
7997                 }
7998
7999                 return 0;
8000         }
8001
8002         struct document_order_comparator
8003         {
8004                 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8005                 {
8006                         // optimized document order based check
8007                         const void* lo = document_buffer_order(lhs);
8008                         const void* ro = document_buffer_order(rhs);
8009
8010                         if (lo && ro) return lo < ro;
8011
8012                         // slow comparison
8013                         xml_node ln = lhs.node(), rn = rhs.node();
8014
8015                         // compare attributes
8016                         if (lhs.attribute() && rhs.attribute())
8017                         {
8018                                 // shared parent
8019                                 if (lhs.parent() == rhs.parent())
8020                                 {
8021                                         // determine sibling order
8022                                         for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8023                                                 if (a == rhs.attribute())
8024                                                         return true;
8025
8026                                         return false;
8027                                 }
8028
8029                                 // compare attribute parents
8030                                 ln = lhs.parent();
8031                                 rn = rhs.parent();
8032                         }
8033                         else if (lhs.attribute())
8034                         {
8035                                 // attributes go after the parent element
8036                                 if (lhs.parent() == rhs.node()) return false;
8037
8038                                 ln = lhs.parent();
8039                         }
8040                         else if (rhs.attribute())
8041                         {
8042                                 // attributes go after the parent element
8043                                 if (rhs.parent() == lhs.node()) return true;
8044
8045                                 rn = rhs.parent();
8046                         }
8047
8048                         if (ln == rn) return false;
8049
8050                         if (!ln || !rn) return ln < rn;
8051
8052                         return node_is_before(ln.internal_object(), rn.internal_object());
8053                 }
8054         };
8055
8056         struct duplicate_comparator
8057         {
8058                 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8059                 {
8060                         if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
8061                         else return rhs.attribute() ? false : lhs.node() < rhs.node();
8062                 }
8063         };
8064
8065         PUGI__FN double gen_nan()
8066         {
8067         #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8068                 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8069                 typedef uint32_t UI; // BCC5 workaround
8070                 union { float f; UI i; } u;
8071                 u.i = 0x7fc00000;
8072                 return u.f;
8073         #else
8074                 // fallback
8075                 const volatile double zero = 0.0;
8076                 return zero / zero;
8077         #endif
8078         }
8079
8080         PUGI__FN bool is_nan(double value)
8081         {
8082         #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8083                 return !!_isnan(value);
8084         #elif defined(fpclassify) && defined(FP_NAN)
8085                 return fpclassify(value) == FP_NAN;
8086         #else
8087                 // fallback
8088                 const volatile double v = value;
8089                 return v != v;
8090         #endif
8091         }
8092
8093         PUGI__FN const char_t* convert_number_to_string_special(double value)
8094         {
8095         #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8096                 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8097                 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8098                 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8099         #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8100                 switch (fpclassify(value))
8101                 {
8102                 case FP_NAN:
8103                         return PUGIXML_TEXT("NaN");
8104
8105                 case FP_INFINITE:
8106                         return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8107
8108                 case FP_ZERO:
8109                         return PUGIXML_TEXT("0");
8110
8111                 default:
8112                         return 0;
8113                 }
8114         #else
8115                 // fallback
8116                 const volatile double v = value;
8117
8118                 if (v == 0) return PUGIXML_TEXT("0");
8119                 if (v != v) return PUGIXML_TEXT("NaN");
8120                 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8121                 return 0;
8122         #endif
8123         }
8124
8125         PUGI__FN bool convert_number_to_boolean(double value)
8126         {
8127                 return (value != 0 && !is_nan(value));
8128         }
8129
8130         PUGI__FN void truncate_zeros(char* begin, char* end)
8131         {
8132                 while (begin != end && end[-1] == '0') end--;
8133
8134                 *end = 0;
8135         }
8136
8137         // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8138 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
8139         PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8140         {
8141                 // get base values
8142                 int sign, exponent;
8143                 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8144
8145                 // truncate redundant zeros
8146                 truncate_zeros(buffer, buffer + strlen(buffer));
8147
8148                 // fill results
8149                 *out_mantissa = buffer;
8150                 *out_exponent = exponent;
8151         }
8152 #else
8153         PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8154         {
8155                 // get a scientific notation value with IEEE DBL_DIG decimals
8156                 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8157
8158                 // get the exponent (possibly negative)
8159                 char* exponent_string = strchr(buffer, 'e');
8160                 assert(exponent_string);
8161
8162                 int exponent = atoi(exponent_string + 1);
8163
8164                 // extract mantissa string: skip sign
8165                 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8166                 assert(mantissa[0] != '0' && mantissa[1] == '.');
8167
8168                 // divide mantissa by 10 to eliminate integer part
8169                 mantissa[1] = mantissa[0];
8170                 mantissa++;
8171                 exponent++;
8172
8173                 // remove extra mantissa digits and zero-terminate mantissa
8174                 truncate_zeros(mantissa, exponent_string);
8175
8176                 // fill results
8177                 *out_mantissa = mantissa;
8178                 *out_exponent = exponent;
8179         }
8180 #endif
8181
8182         PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8183         {
8184                 // try special number conversion
8185                 const char_t* special = convert_number_to_string_special(value);
8186                 if (special) return xpath_string::from_const(special);
8187
8188                 // get mantissa + exponent form
8189                 char mantissa_buffer[32];
8190
8191                 char* mantissa;
8192                 int exponent;
8193                 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8194
8195                 // allocate a buffer of suitable length for the number
8196                 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8197                 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8198                 if (!result) return xpath_string();
8199
8200                 // make the number!
8201                 char_t* s = result;
8202
8203                 // sign
8204                 if (value < 0) *s++ = '-';
8205
8206                 // integer part
8207                 if (exponent <= 0)
8208                 {
8209                         *s++ = '0';
8210                 }
8211                 else
8212                 {
8213                         while (exponent > 0)
8214                         {
8215                                 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8216                                 *s++ = *mantissa ? *mantissa++ : '0';
8217                                 exponent--;
8218                         }
8219                 }
8220
8221                 // fractional part
8222                 if (*mantissa)
8223                 {
8224                         // decimal point
8225                         *s++ = '.';
8226
8227                         // extra zeroes from negative exponent
8228                         while (exponent < 0)
8229                         {
8230                                 *s++ = '0';
8231                                 exponent++;
8232                         }
8233
8234                         // extra mantissa digits
8235                         while (*mantissa)
8236                         {
8237                                 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8238                                 *s++ = *mantissa++;
8239                         }
8240                 }
8241
8242                 // zero-terminate
8243                 assert(s < result + result_size);
8244                 *s = 0;
8245
8246                 return xpath_string::from_heap_preallocated(result, s);
8247         }
8248
8249         PUGI__FN bool check_string_to_number_format(const char_t* string)
8250         {
8251                 // parse leading whitespace
8252                 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8253
8254                 // parse sign
8255                 if (*string == '-') ++string;
8256
8257                 if (!*string) return false;
8258
8259                 // if there is no integer part, there should be a decimal part with at least one digit
8260                 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8261
8262                 // parse integer part
8263                 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8264
8265                 // parse decimal part
8266                 if (*string == '.')
8267                 {
8268                         ++string;
8269
8270                         while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8271                 }
8272
8273                 // parse trailing whitespace
8274                 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8275
8276                 return *string == 0;
8277         }
8278
8279         PUGI__FN double convert_string_to_number(const char_t* string)
8280         {
8281                 // check string format
8282                 if (!check_string_to_number_format(string)) return gen_nan();
8283
8284                 // parse string
8285         #ifdef PUGIXML_WCHAR_MODE
8286                 return wcstod(string, 0);
8287         #else
8288                 return strtod(string, 0);
8289         #endif
8290         }
8291
8292         PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8293         {
8294                 size_t length = static_cast<size_t>(end - begin);
8295                 char_t* scratch = buffer;
8296
8297                 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8298                 {
8299                         // need to make dummy on-heap copy
8300                         scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8301                         if (!scratch) return false;
8302                 }
8303
8304                 // copy string to zero-terminated buffer and perform conversion
8305                 memcpy(scratch, begin, length * sizeof(char_t));
8306                 scratch[length] = 0;
8307
8308                 *out_result = convert_string_to_number(scratch);
8309
8310                 // free dummy buffer
8311                 if (scratch != buffer) xml_memory::deallocate(scratch);
8312
8313                 return true;
8314         }
8315
8316         PUGI__FN double round_nearest(double value)
8317         {
8318                 return floor(value + 0.5);
8319         }
8320
8321         PUGI__FN double round_nearest_nzero(double value)
8322         {
8323                 // same as round_nearest, but returns -0 for [-0.5, -0]
8324                 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8325                 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8326         }
8327
8328         PUGI__FN const char_t* qualified_name(const xpath_node& node)
8329         {
8330                 return node.attribute() ? node.attribute().name() : node.node().name();
8331         }
8332
8333         PUGI__FN const char_t* local_name(const xpath_node& node)
8334         {
8335                 const char_t* name = qualified_name(node);
8336                 const char_t* p = find_char(name, ':');
8337
8338                 return p ? p + 1 : name;
8339         }
8340
8341         struct namespace_uri_predicate
8342         {
8343                 const char_t* prefix;
8344                 size_t prefix_length;
8345
8346                 namespace_uri_predicate(const char_t* name)
8347                 {
8348                         const char_t* pos = find_char(name, ':');
8349
8350                         prefix = pos ? name : 0;
8351                         prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8352                 }
8353
8354                 bool operator()(xml_attribute a) const
8355                 {
8356                         const char_t* name = a.name();
8357
8358                         if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8359
8360                         return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8361                 }
8362         };
8363
8364         PUGI__FN const char_t* namespace_uri(xml_node node)
8365         {
8366                 namespace_uri_predicate pred = node.name();
8367
8368                 xml_node p = node;
8369
8370                 while (p)
8371                 {
8372                         xml_attribute a = p.find_attribute(pred);
8373
8374                         if (a) return a.value();
8375
8376                         p = p.parent();
8377                 }
8378
8379                 return PUGIXML_TEXT("");
8380         }
8381
8382         PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8383         {
8384                 namespace_uri_predicate pred = attr.name();
8385
8386                 // Default namespace does not apply to attributes
8387                 if (!pred.prefix) return PUGIXML_TEXT("");
8388
8389                 xml_node p = parent;
8390
8391                 while (p)
8392                 {
8393                         xml_attribute a = p.find_attribute(pred);
8394
8395                         if (a) return a.value();
8396
8397                         p = p.parent();
8398                 }
8399
8400                 return PUGIXML_TEXT("");
8401         }
8402
8403         PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8404         {
8405                 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8406         }
8407
8408         PUGI__FN char_t* normalize_space(char_t* buffer)
8409         {
8410                 char_t* write = buffer;
8411
8412                 for (char_t* it = buffer; *it; )
8413                 {
8414                         char_t ch = *it++;
8415
8416                         if (PUGI__IS_CHARTYPE(ch, ct_space))
8417                         {
8418                                 // replace whitespace sequence with single space
8419                                 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8420
8421                                 // avoid leading spaces
8422                                 if (write != buffer) *write++ = ' ';
8423                         }
8424                         else *write++ = ch;
8425                 }
8426
8427                 // remove trailing space
8428                 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8429
8430                 // zero-terminate
8431                 *write = 0;
8432
8433                 return write;
8434         }
8435
8436         PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8437         {
8438                 char_t* write = buffer;
8439
8440                 while (*buffer)
8441                 {
8442                         PUGI__DMC_VOLATILE char_t ch = *buffer++;
8443
8444                         const char_t* pos = find_char(from, ch);
8445
8446                         if (!pos)
8447                                 *write++ = ch; // do not process
8448                         else if (static_cast<size_t>(pos - from) < to_length)
8449                                 *write++ = to[pos - from]; // replace
8450                 }
8451
8452                 // zero-terminate
8453                 *write = 0;
8454
8455                 return write;
8456         }
8457
8458         PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8459         {
8460                 unsigned char table[128] = {0};
8461
8462                 while (*from)
8463                 {
8464                         unsigned int fc = static_cast<unsigned int>(*from);
8465                         unsigned int tc = static_cast<unsigned int>(*to);
8466
8467                         if (fc >= 128 || tc >= 128)
8468                                 return 0;
8469
8470                         // code=128 means "skip character"
8471                         if (!table[fc])
8472                                 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8473
8474                         from++;
8475                         if (tc) to++;
8476                 }
8477
8478                 for (int i = 0; i < 128; ++i)
8479                         if (!table[i])
8480                                 table[i] = static_cast<unsigned char>(i);
8481
8482                 void* result = alloc->allocate(sizeof(table));
8483                 if (!result) return 0;
8484
8485                 memcpy(result, table, sizeof(table));
8486
8487                 return static_cast<unsigned char*>(result);
8488         }
8489
8490         PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8491         {
8492                 char_t* write = buffer;
8493
8494                 while (*buffer)
8495                 {
8496                         char_t ch = *buffer++;
8497                         unsigned int index = static_cast<unsigned int>(ch);
8498
8499                         if (index < 128)
8500                         {
8501                                 unsigned char code = table[index];
8502
8503                                 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8504                                 // this code skips these characters without extra branches
8505                                 *write = static_cast<char_t>(code);
8506                                 write += 1 - (code >> 7);
8507                         }
8508                         else
8509                         {
8510                                 *write++ = ch;
8511                         }
8512                 }
8513
8514                 // zero-terminate
8515                 *write = 0;
8516
8517                 return write;
8518         }
8519
8520         inline bool is_xpath_attribute(const char_t* name)
8521         {
8522                 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8523         }
8524
8525         struct xpath_variable_boolean: xpath_variable
8526         {
8527                 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8528                 {
8529                 }
8530
8531                 bool value;
8532                 char_t name[1];
8533         };
8534
8535         struct xpath_variable_number: xpath_variable
8536         {
8537                 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8538                 {
8539                 }
8540
8541                 double value;
8542                 char_t name[1];
8543         };
8544
8545         struct xpath_variable_string: xpath_variable
8546         {
8547                 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8548                 {
8549                 }
8550
8551                 ~xpath_variable_string()
8552                 {
8553                         if (value) xml_memory::deallocate(value);
8554                 }
8555
8556                 char_t* value;
8557                 char_t name[1];
8558         };
8559
8560         struct xpath_variable_node_set: xpath_variable
8561         {
8562                 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8563                 {
8564                 }
8565
8566                 xpath_node_set value;
8567                 char_t name[1];
8568         };
8569
8570         static const xpath_node_set dummy_node_set;
8571
8572         PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8573         {
8574                 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8575                 unsigned int result = 0;
8576
8577                 while (*str)
8578                 {
8579                         result += static_cast<unsigned int>(*str++);
8580                         result += result << 10;
8581                         result ^= result >> 6;
8582                 }
8583
8584                 result += result << 3;
8585                 result ^= result >> 11;
8586                 result += result << 15;
8587
8588                 return result;
8589         }
8590
8591         template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8592         {
8593                 size_t length = strlength(name);
8594                 if (length == 0) return 0; // empty variable names are invalid
8595
8596                 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8597                 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8598                 if (!memory) return 0;
8599
8600                 T* result = new (memory) T();
8601
8602                 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8603
8604                 return result;
8605         }
8606
8607         PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8608         {
8609                 switch (type)
8610                 {
8611                 case xpath_type_node_set:
8612                         return new_xpath_variable<xpath_variable_node_set>(name);
8613
8614                 case xpath_type_number:
8615                         return new_xpath_variable<xpath_variable_number>(name);
8616
8617                 case xpath_type_string:
8618                         return new_xpath_variable<xpath_variable_string>(name);
8619
8620                 case xpath_type_boolean:
8621                         return new_xpath_variable<xpath_variable_boolean>(name);
8622
8623                 default:
8624                         return 0;
8625                 }
8626         }
8627
8628         template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8629         {
8630                 var->~T();
8631                 xml_memory::deallocate(var);
8632         }
8633
8634         PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8635         {
8636                 switch (type)
8637                 {
8638                 case xpath_type_node_set:
8639                         delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8640                         break;
8641
8642                 case xpath_type_number:
8643                         delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8644                         break;
8645
8646                 case xpath_type_string:
8647                         delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8648                         break;
8649
8650                 case xpath_type_boolean:
8651                         delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8652                         break;
8653
8654                 default:
8655                         assert(false && "Invalid variable type"); // unreachable
8656                 }
8657         }
8658
8659         PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8660         {
8661                 switch (rhs->type())
8662                 {
8663                 case xpath_type_node_set:
8664                         return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8665
8666                 case xpath_type_number:
8667                         return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8668
8669                 case xpath_type_string:
8670                         return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8671
8672                 case xpath_type_boolean:
8673                         return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8674
8675                 default:
8676                         assert(false && "Invalid variable type"); // unreachable
8677                         return false;
8678                 }
8679         }
8680
8681         PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8682         {
8683                 size_t length = static_cast<size_t>(end - begin);
8684                 char_t* scratch = buffer;
8685
8686                 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8687                 {
8688                         // need to make dummy on-heap copy
8689                         scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8690                         if (!scratch) return false;
8691                 }
8692
8693                 // copy string to zero-terminated buffer and perform lookup
8694                 memcpy(scratch, begin, length * sizeof(char_t));
8695                 scratch[length] = 0;
8696
8697                 *out_result = set->get(scratch);
8698
8699                 // free dummy buffer
8700                 if (scratch != buffer) xml_memory::deallocate(scratch);
8701
8702                 return true;
8703         }
8704 PUGI__NS_END
8705
8706 // Internal node set class
8707 PUGI__NS_BEGIN
8708         PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8709         {
8710                 if (end - begin < 2)
8711                         return xpath_node_set::type_sorted;
8712
8713                 document_order_comparator cmp;
8714
8715                 bool first = cmp(begin[0], begin[1]);
8716
8717                 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8718                         if (cmp(it[0], it[1]) != first)
8719                                 return xpath_node_set::type_unsorted;
8720
8721                 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8722         }
8723
8724         PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8725         {
8726                 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8727
8728                 if (type == xpath_node_set::type_unsorted)
8729                 {
8730                         xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8731
8732                         if (sorted == xpath_node_set::type_unsorted)
8733                         {
8734                                 sort(begin, end, document_order_comparator());
8735
8736                                 type = xpath_node_set::type_sorted;
8737                         }
8738                         else
8739                                 type = sorted;
8740                 }
8741
8742                 if (type != order) reverse(begin, end);
8743
8744                 return order;
8745         }
8746
8747         PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8748         {
8749                 if (begin == end) return xpath_node();
8750
8751                 switch (type)
8752                 {
8753                 case xpath_node_set::type_sorted:
8754                         return *begin;
8755
8756                 case xpath_node_set::type_sorted_reverse:
8757                         return *(end - 1);
8758
8759                 case xpath_node_set::type_unsorted:
8760                         return *min_element(begin, end, document_order_comparator());
8761
8762                 default:
8763                         assert(false && "Invalid node set type"); // unreachable
8764                         return xpath_node();
8765                 }
8766         }
8767
8768         class xpath_node_set_raw
8769         {
8770                 xpath_node_set::type_t _type;
8771
8772                 xpath_node* _begin;
8773                 xpath_node* _end;
8774                 xpath_node* _eos;
8775
8776         public:
8777                 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8778                 {
8779                 }
8780
8781                 xpath_node* begin() const
8782                 {
8783                         return _begin;
8784                 }
8785
8786                 xpath_node* end() const
8787                 {
8788                         return _end;
8789                 }
8790
8791                 bool empty() const
8792                 {
8793                         return _begin == _end;
8794                 }
8795
8796                 size_t size() const
8797                 {
8798                         return static_cast<size_t>(_end - _begin);
8799                 }
8800
8801                 xpath_node first() const
8802                 {
8803                         return xpath_first(_begin, _end, _type);
8804                 }
8805
8806                 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8807
8808                 void push_back(const xpath_node& node, xpath_allocator* alloc)
8809                 {
8810                         if (_end != _eos)
8811                                 *_end++ = node;
8812                         else
8813                                 push_back_grow(node, alloc);
8814                 }
8815
8816                 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8817                 {
8818                         if (begin_ == end_) return;
8819
8820                         size_t size_ = static_cast<size_t>(_end - _begin);
8821                         size_t capacity = static_cast<size_t>(_eos - _begin);
8822                         size_t count = static_cast<size_t>(end_ - begin_);
8823
8824                         if (size_ + count > capacity)
8825                         {
8826                                 // reallocate the old array or allocate a new one
8827                                 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8828                                 if (!data) return;
8829
8830                                 // finalize
8831                                 _begin = data;
8832                                 _end = data + size_;
8833                                 _eos = data + size_ + count;
8834                         }
8835
8836                         memcpy(_end, begin_, count * sizeof(xpath_node));
8837                         _end += count;
8838                 }
8839
8840                 void sort_do()
8841                 {
8842                         _type = xpath_sort(_begin, _end, _type, false);
8843                 }
8844
8845                 void truncate(xpath_node* pos)
8846                 {
8847                         assert(_begin <= pos && pos <= _end);
8848
8849                         _end = pos;
8850                 }
8851
8852                 void remove_duplicates()
8853                 {
8854                         if (_type == xpath_node_set::type_unsorted)
8855                                 sort(_begin, _end, duplicate_comparator());
8856
8857                         _end = unique(_begin, _end);
8858                 }
8859
8860                 xpath_node_set::type_t type() const
8861                 {
8862                         return _type;
8863                 }
8864
8865                 void set_type(xpath_node_set::type_t value)
8866                 {
8867                         _type = value;
8868                 }
8869         };
8870
8871         PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8872         {
8873                 size_t capacity = static_cast<size_t>(_eos - _begin);
8874
8875                 // get new capacity (1.5x rule)
8876                 size_t new_capacity = capacity + capacity / 2 + 1;
8877
8878                 // reallocate the old array or allocate a new one
8879                 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8880                 if (!data) return;
8881
8882                 // finalize
8883                 _begin = data;
8884                 _end = data + capacity;
8885                 _eos = data + new_capacity;
8886
8887                 // push
8888                 *_end++ = node;
8889         }
8890 PUGI__NS_END
8891
8892 PUGI__NS_BEGIN
8893         struct xpath_context
8894         {
8895                 xpath_node n;
8896                 size_t position, size;
8897
8898                 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8899                 {
8900                 }
8901         };
8902
8903         enum lexeme_t
8904         {
8905                 lex_none = 0,
8906                 lex_equal,
8907                 lex_not_equal,
8908                 lex_less,
8909                 lex_greater,
8910                 lex_less_or_equal,
8911                 lex_greater_or_equal,
8912                 lex_plus,
8913                 lex_minus,
8914                 lex_multiply,
8915                 lex_union,
8916                 lex_var_ref,
8917                 lex_open_brace,
8918                 lex_close_brace,
8919                 lex_quoted_string,
8920                 lex_number,
8921                 lex_slash,
8922                 lex_double_slash,
8923                 lex_open_square_brace,
8924                 lex_close_square_brace,
8925                 lex_string,
8926                 lex_comma,
8927                 lex_axis_attribute,
8928                 lex_dot,
8929                 lex_double_dot,
8930                 lex_double_colon,
8931                 lex_eof
8932         };
8933
8934         struct xpath_lexer_string
8935         {
8936                 const char_t* begin;
8937                 const char_t* end;
8938
8939                 xpath_lexer_string(): begin(0), end(0)
8940                 {
8941                 }
8942
8943                 bool operator==(const char_t* other) const
8944                 {
8945                         size_t length = static_cast<size_t>(end - begin);
8946
8947                         return strequalrange(other, begin, length);
8948                 }
8949         };
8950
8951         class xpath_lexer
8952         {
8953                 const char_t* _cur;
8954                 const char_t* _cur_lexeme_pos;
8955                 xpath_lexer_string _cur_lexeme_contents;
8956
8957                 lexeme_t _cur_lexeme;
8958
8959         public:
8960                 explicit xpath_lexer(const char_t* query): _cur(query)
8961                 {
8962                         next();
8963                 }
8964
8965                 const char_t* state() const
8966                 {
8967                         return _cur;
8968                 }
8969
8970                 void next()
8971                 {
8972                         const char_t* cur = _cur;
8973
8974                         while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8975
8976                         // save lexeme position for error reporting
8977                         _cur_lexeme_pos = cur;
8978
8979                         switch (*cur)
8980                         {
8981                         case 0:
8982                                 _cur_lexeme = lex_eof;
8983                                 break;
8984
8985                         case '>':
8986                                 if (*(cur+1) == '=')
8987                                 {
8988                                         cur += 2;
8989                                         _cur_lexeme = lex_greater_or_equal;
8990                                 }
8991                                 else
8992                                 {
8993                                         cur += 1;
8994                                         _cur_lexeme = lex_greater;
8995                                 }
8996                                 break;
8997
8998                         case '<':
8999                                 if (*(cur+1) == '=')
9000                                 {
9001                                         cur += 2;
9002                                         _cur_lexeme = lex_less_or_equal;
9003                                 }
9004                                 else
9005                                 {
9006                                         cur += 1;
9007                                         _cur_lexeme = lex_less;
9008                                 }
9009                                 break;
9010
9011                         case '!':
9012                                 if (*(cur+1) == '=')
9013                                 {
9014                                         cur += 2;
9015                                         _cur_lexeme = lex_not_equal;
9016                                 }
9017                                 else
9018                                 {
9019                                         _cur_lexeme = lex_none;
9020                                 }
9021                                 break;
9022
9023                         case '=':
9024                                 cur += 1;
9025                                 _cur_lexeme = lex_equal;
9026
9027                                 break;
9028
9029                         case '+':
9030                                 cur += 1;
9031                                 _cur_lexeme = lex_plus;
9032
9033                                 break;
9034
9035                         case '-':
9036                                 cur += 1;
9037                                 _cur_lexeme = lex_minus;
9038
9039                                 break;
9040
9041                         case '*':
9042                                 cur += 1;
9043                                 _cur_lexeme = lex_multiply;
9044
9045                                 break;
9046
9047                         case '|':
9048                                 cur += 1;
9049                                 _cur_lexeme = lex_union;
9050
9051                                 break;
9052
9053                         case '$':
9054                                 cur += 1;
9055
9056                                 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9057                                 {
9058                                         _cur_lexeme_contents.begin = cur;
9059
9060                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9061
9062                                         if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9063                                         {
9064                                                 cur++; // :
9065
9066                                                 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9067                                         }
9068
9069                                         _cur_lexeme_contents.end = cur;
9070
9071                                         _cur_lexeme = lex_var_ref;
9072                                 }
9073                                 else
9074                                 {
9075                                         _cur_lexeme = lex_none;
9076                                 }
9077
9078                                 break;
9079
9080                         case '(':
9081                                 cur += 1;
9082                                 _cur_lexeme = lex_open_brace;
9083
9084                                 break;
9085
9086                         case ')':
9087                                 cur += 1;
9088                                 _cur_lexeme = lex_close_brace;
9089
9090                                 break;
9091
9092                         case '[':
9093                                 cur += 1;
9094                                 _cur_lexeme = lex_open_square_brace;
9095
9096                                 break;
9097
9098                         case ']':
9099                                 cur += 1;
9100                                 _cur_lexeme = lex_close_square_brace;
9101
9102                                 break;
9103
9104                         case ',':
9105                                 cur += 1;
9106                                 _cur_lexeme = lex_comma;
9107
9108                                 break;
9109
9110                         case '/':
9111                                 if (*(cur+1) == '/')
9112                                 {
9113                                         cur += 2;
9114                                         _cur_lexeme = lex_double_slash;
9115                                 }
9116                                 else
9117                                 {
9118                                         cur += 1;
9119                                         _cur_lexeme = lex_slash;
9120                                 }
9121                                 break;
9122
9123                         case '.':
9124                                 if (*(cur+1) == '.')
9125                                 {
9126                                         cur += 2;
9127                                         _cur_lexeme = lex_double_dot;
9128                                 }
9129                                 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9130                                 {
9131                                         _cur_lexeme_contents.begin = cur; // .
9132
9133                                         ++cur;
9134
9135                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9136
9137                                         _cur_lexeme_contents.end = cur;
9138
9139                                         _cur_lexeme = lex_number;
9140                                 }
9141                                 else
9142                                 {
9143                                         cur += 1;
9144                                         _cur_lexeme = lex_dot;
9145                                 }
9146                                 break;
9147
9148                         case '@':
9149                                 cur += 1;
9150                                 _cur_lexeme = lex_axis_attribute;
9151
9152                                 break;
9153
9154                         case '"':
9155                         case '\'':
9156                         {
9157                                 char_t terminator = *cur;
9158
9159                                 ++cur;
9160
9161                                 _cur_lexeme_contents.begin = cur;
9162                                 while (*cur && *cur != terminator) cur++;
9163                                 _cur_lexeme_contents.end = cur;
9164
9165                                 if (!*cur)
9166                                         _cur_lexeme = lex_none;
9167                                 else
9168                                 {
9169                                         cur += 1;
9170                                         _cur_lexeme = lex_quoted_string;
9171                                 }
9172
9173                                 break;
9174                         }
9175
9176                         case ':':
9177                                 if (*(cur+1) == ':')
9178                                 {
9179                                         cur += 2;
9180                                         _cur_lexeme = lex_double_colon;
9181                                 }
9182                                 else
9183                                 {
9184                                         _cur_lexeme = lex_none;
9185                                 }
9186                                 break;
9187
9188                         default:
9189                                 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9190                                 {
9191                                         _cur_lexeme_contents.begin = cur;
9192
9193                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9194
9195                                         if (*cur == '.')
9196                                         {
9197                                                 cur++;
9198
9199                                                 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9200                                         }
9201
9202                                         _cur_lexeme_contents.end = cur;
9203
9204                                         _cur_lexeme = lex_number;
9205                                 }
9206                                 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9207                                 {
9208                                         _cur_lexeme_contents.begin = cur;
9209
9210                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9211
9212                                         if (cur[0] == ':')
9213                                         {
9214                                                 if (cur[1] == '*') // namespace test ncname:*
9215                                                 {
9216                                                         cur += 2; // :*
9217                                                 }
9218                                                 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9219                                                 {
9220                                                         cur++; // :
9221
9222                                                         while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9223                                                 }
9224                                         }
9225
9226                                         _cur_lexeme_contents.end = cur;
9227
9228                                         _cur_lexeme = lex_string;
9229                                 }
9230                                 else
9231                                 {
9232                                         _cur_lexeme = lex_none;
9233                                 }
9234                         }
9235
9236                         _cur = cur;
9237                 }
9238
9239                 lexeme_t current() const
9240                 {
9241                         return _cur_lexeme;
9242                 }
9243
9244                 const char_t* current_pos() const
9245                 {
9246                         return _cur_lexeme_pos;
9247                 }
9248
9249                 const xpath_lexer_string& contents() const
9250                 {
9251                         assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9252
9253                         return _cur_lexeme_contents;
9254                 }
9255         };
9256
9257         enum ast_type_t
9258         {
9259                 ast_unknown,
9260                 ast_op_or,                                              // left or right
9261                 ast_op_and,                                             // left and right
9262                 ast_op_equal,                                   // left = right
9263                 ast_op_not_equal,                               // left != right
9264                 ast_op_less,                                    // left < right
9265                 ast_op_greater,                                 // left > right
9266                 ast_op_less_or_equal,                   // left <= right
9267                 ast_op_greater_or_equal,                // left >= right
9268                 ast_op_add,                                             // left + right
9269                 ast_op_subtract,                                // left - right
9270                 ast_op_multiply,                                // left * right
9271                 ast_op_divide,                                  // left / right
9272                 ast_op_mod,                                             // left % right
9273                 ast_op_negate,                                  // left - right
9274                 ast_op_union,                                   // left | right
9275                 ast_predicate,                                  // apply predicate to set; next points to next predicate
9276                 ast_filter,                                             // select * from left where right
9277                 ast_string_constant,                    // string constant
9278                 ast_number_constant,                    // number constant
9279                 ast_variable,                                   // variable
9280                 ast_func_last,                                  // last()
9281                 ast_func_position,                              // position()
9282                 ast_func_count,                                 // count(left)
9283                 ast_func_id,                                    // id(left)
9284                 ast_func_local_name_0,                  // local-name()
9285                 ast_func_local_name_1,                  // local-name(left)
9286                 ast_func_namespace_uri_0,               // namespace-uri()
9287                 ast_func_namespace_uri_1,               // namespace-uri(left)
9288                 ast_func_name_0,                                // name()
9289                 ast_func_name_1,                                // name(left)
9290                 ast_func_string_0,                              // string()
9291                 ast_func_string_1,                              // string(left)
9292                 ast_func_concat,                                // concat(left, right, siblings)
9293                 ast_func_starts_with,                   // starts_with(left, right)
9294                 ast_func_contains,                              // contains(left, right)
9295                 ast_func_substring_before,              // substring-before(left, right)
9296                 ast_func_substring_after,               // substring-after(left, right)
9297                 ast_func_substring_2,                   // substring(left, right)
9298                 ast_func_substring_3,                   // substring(left, right, third)
9299                 ast_func_string_length_0,               // string-length()
9300                 ast_func_string_length_1,               // string-length(left)
9301                 ast_func_normalize_space_0,             // normalize-space()
9302                 ast_func_normalize_space_1,             // normalize-space(left)
9303                 ast_func_translate,                             // translate(left, right, third)
9304                 ast_func_boolean,                               // boolean(left)
9305                 ast_func_not,                                   // not(left)
9306                 ast_func_true,                                  // true()
9307                 ast_func_false,                                 // false()
9308                 ast_func_lang,                                  // lang(left)
9309                 ast_func_number_0,                              // number()
9310                 ast_func_number_1,                              // number(left)
9311                 ast_func_sum,                                   // sum(left)
9312                 ast_func_floor,                                 // floor(left)
9313                 ast_func_ceiling,                               // ceiling(left)
9314                 ast_func_round,                                 // round(left)
9315                 ast_step,                                               // process set left with step
9316                 ast_step_root,                                  // select root node
9317
9318                 ast_opt_translate_table,                // translate(left, right, third) where right/third are constants
9319                 ast_opt_compare_attribute               // @name = 'string'
9320         };
9321
9322         enum axis_t
9323         {
9324                 axis_ancestor,
9325                 axis_ancestor_or_self,
9326                 axis_attribute,
9327                 axis_child,
9328                 axis_descendant,
9329                 axis_descendant_or_self,
9330                 axis_following,
9331                 axis_following_sibling,
9332                 axis_namespace,
9333                 axis_parent,
9334                 axis_preceding,
9335                 axis_preceding_sibling,
9336                 axis_self
9337         };
9338
9339         enum nodetest_t
9340         {
9341                 nodetest_none,
9342                 nodetest_name,
9343                 nodetest_type_node,
9344                 nodetest_type_comment,
9345                 nodetest_type_pi,
9346                 nodetest_type_text,
9347                 nodetest_pi,
9348                 nodetest_all,
9349                 nodetest_all_in_namespace
9350         };
9351
9352         enum predicate_t
9353         {
9354                 predicate_default,
9355                 predicate_posinv,
9356                 predicate_constant,
9357                 predicate_constant_one
9358         };
9359
9360         enum nodeset_eval_t
9361         {
9362                 nodeset_eval_all,
9363                 nodeset_eval_any,
9364                 nodeset_eval_first
9365         };
9366
9367         template <axis_t N> struct axis_to_type
9368         {
9369                 static const axis_t axis;
9370         };
9371
9372         template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9373
9374         class xpath_ast_node
9375         {
9376         private:
9377                 // node type
9378                 char _type;
9379                 char _rettype;
9380
9381                 // for ast_step
9382                 char _axis;
9383
9384                 // for ast_step/ast_predicate/ast_filter
9385                 char _test;
9386
9387                 // tree node structure
9388                 xpath_ast_node* _left;
9389                 xpath_ast_node* _right;
9390                 xpath_ast_node* _next;
9391
9392                 union
9393                 {
9394                         // value for ast_string_constant
9395                         const char_t* string;
9396                         // value for ast_number_constant
9397                         double number;
9398                         // variable for ast_variable
9399                         xpath_variable* variable;
9400                         // node test for ast_step (node name/namespace/node type/pi target)
9401                         const char_t* nodetest;
9402                         // table for ast_opt_translate_table
9403                         const unsigned char* table;
9404                 } _data;
9405
9406                 xpath_ast_node(const xpath_ast_node&);
9407                 xpath_ast_node& operator=(const xpath_ast_node&);
9408
9409                 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9410                 {
9411                         xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9412
9413                         if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9414                         {
9415                                 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9416                                         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9417                                 else if (lt == xpath_type_number || rt == xpath_type_number)
9418                                         return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9419                                 else if (lt == xpath_type_string || rt == xpath_type_string)
9420                                 {
9421                                         xpath_allocator_capture cr(stack.result);
9422
9423                                         xpath_string ls = lhs->eval_string(c, stack);
9424                                         xpath_string rs = rhs->eval_string(c, stack);
9425
9426                                         return comp(ls, rs);
9427                                 }
9428                         }
9429                         else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9430                         {
9431                                 xpath_allocator_capture cr(stack.result);
9432
9433                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9434                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9435
9436                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9437                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9438                                         {
9439                                                 xpath_allocator_capture cri(stack.result);
9440
9441                                                 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9442                                                         return true;
9443                                         }
9444
9445                                 return false;
9446                         }
9447                         else
9448                         {
9449                                 if (lt == xpath_type_node_set)
9450                                 {
9451                                         swap(lhs, rhs);
9452                                         swap(lt, rt);
9453                                 }
9454
9455                                 if (lt == xpath_type_boolean)
9456                                         return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9457                                 else if (lt == xpath_type_number)
9458                                 {
9459                                         xpath_allocator_capture cr(stack.result);
9460
9461                                         double l = lhs->eval_number(c, stack);
9462                                         xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9463
9464                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9465                                         {
9466                                                 xpath_allocator_capture cri(stack.result);
9467
9468                                                 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9469                                                         return true;
9470                                         }
9471
9472                                         return false;
9473                                 }
9474                                 else if (lt == xpath_type_string)
9475                                 {
9476                                         xpath_allocator_capture cr(stack.result);
9477
9478                                         xpath_string l = lhs->eval_string(c, stack);
9479                                         xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9480
9481                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9482                                         {
9483                                                 xpath_allocator_capture cri(stack.result);
9484
9485                                                 if (comp(l, string_value(*ri, stack.result)))
9486                                                         return true;
9487                                         }
9488
9489                                         return false;
9490                                 }
9491                         }
9492
9493                         assert(false && "Wrong types"); // unreachable
9494                         return false;
9495                 }
9496
9497                 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9498                 {
9499                         return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9500                 }
9501
9502                 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9503                 {
9504                         xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9505
9506                         if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9507                                 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9508                         else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9509                         {
9510                                 xpath_allocator_capture cr(stack.result);
9511
9512                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9513                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9514
9515                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9516                                 {
9517                                         xpath_allocator_capture cri(stack.result);
9518
9519                                         double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9520
9521                                         for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9522                                         {
9523                                                 xpath_allocator_capture crii(stack.result);
9524
9525                                                 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9526                                                         return true;
9527                                         }
9528                                 }
9529
9530                                 return false;
9531                         }
9532                         else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9533                         {
9534                                 xpath_allocator_capture cr(stack.result);
9535
9536                                 double l = lhs->eval_number(c, stack);
9537                                 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9538
9539                                 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9540                                 {
9541                                         xpath_allocator_capture cri(stack.result);
9542
9543                                         if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9544                                                 return true;
9545                                 }
9546
9547                                 return false;
9548                         }
9549                         else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9550                         {
9551                                 xpath_allocator_capture cr(stack.result);
9552
9553                                 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9554                                 double r = rhs->eval_number(c, stack);
9555
9556                                 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9557                                 {
9558                                         xpath_allocator_capture cri(stack.result);
9559
9560                                         if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9561                                                 return true;
9562                                 }
9563
9564                                 return false;
9565                         }
9566                         else
9567                         {
9568                                 assert(false && "Wrong types"); // unreachable
9569                                 return false;
9570                         }
9571                 }
9572
9573                 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9574                 {
9575                         assert(ns.size() >= first);
9576                         assert(expr->rettype() != xpath_type_number);
9577
9578                         size_t i = 1;
9579                         size_t size = ns.size() - first;
9580
9581                         xpath_node* last = ns.begin() + first;
9582
9583                         // remove_if... or well, sort of
9584                         for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9585                         {
9586                                 xpath_context c(*it, i, size);
9587
9588                                 if (expr->eval_boolean(c, stack))
9589                                 {
9590                                         *last++ = *it;
9591
9592                                         if (once) break;
9593                                 }
9594                         }
9595
9596                         ns.truncate(last);
9597                 }
9598
9599                 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9600                 {
9601                         assert(ns.size() >= first);
9602                         assert(expr->rettype() == xpath_type_number);
9603
9604                         size_t i = 1;
9605                         size_t size = ns.size() - first;
9606
9607                         xpath_node* last = ns.begin() + first;
9608
9609                         // remove_if... or well, sort of
9610                         for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9611                         {
9612                                 xpath_context c(*it, i, size);
9613
9614                                 if (expr->eval_number(c, stack) == i)
9615                                 {
9616                                         *last++ = *it;
9617
9618                                         if (once) break;
9619                                 }
9620                         }
9621
9622                         ns.truncate(last);
9623                 }
9624
9625                 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9626                 {
9627                         assert(ns.size() >= first);
9628                         assert(expr->rettype() == xpath_type_number);
9629
9630                         size_t size = ns.size() - first;
9631
9632                         xpath_node* last = ns.begin() + first;
9633
9634                         xpath_context c(xpath_node(), 1, size);
9635
9636                         double er = expr->eval_number(c, stack);
9637
9638                         if (er >= 1.0 && er <= size)
9639                         {
9640                                 size_t eri = static_cast<size_t>(er);
9641
9642                                 if (er == eri)
9643                                 {
9644                                         xpath_node r = last[eri - 1];
9645
9646                                         *last++ = r;
9647                                 }
9648                         }
9649
9650                         ns.truncate(last);
9651                 }
9652
9653                 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9654                 {
9655                         if (ns.size() == first) return;
9656
9657                         assert(_type == ast_filter || _type == ast_predicate);
9658
9659                         if (_test == predicate_constant || _test == predicate_constant_one)
9660                                 apply_predicate_number_const(ns, first, _right, stack);
9661                         else if (_right->rettype() == xpath_type_number)
9662                                 apply_predicate_number(ns, first, _right, stack, once);
9663                         else
9664                                 apply_predicate_boolean(ns, first, _right, stack, once);
9665                 }
9666
9667                 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9668                 {
9669                         if (ns.size() == first) return;
9670
9671                         bool last_once = eval_once(ns.type(), eval);
9672
9673                         for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9674                                 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9675                 }
9676
9677                 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9678                 {
9679                         assert(a);
9680
9681                         const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9682
9683                         switch (_test)
9684                         {
9685                         case nodetest_name:
9686                                 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9687                                 {
9688                                         ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9689                                         return true;
9690                                 }
9691                                 break;
9692
9693                         case nodetest_type_node:
9694                         case nodetest_all:
9695                                 if (is_xpath_attribute(name))
9696                                 {
9697                                         ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9698                                         return true;
9699                                 }
9700                                 break;
9701
9702                         case nodetest_all_in_namespace:
9703                                 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9704                                 {
9705                                         ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9706                                         return true;
9707                                 }
9708                                 break;
9709
9710                         default:
9711                                 ;
9712                         }
9713
9714                         return false;
9715                 }
9716
9717                 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9718                 {
9719                         assert(n);
9720
9721                         xml_node_type type = PUGI__NODETYPE(n);
9722
9723                         switch (_test)
9724                         {
9725                         case nodetest_name:
9726                                 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9727                                 {
9728                                         ns.push_back(xml_node(n), alloc);
9729                                         return true;
9730                                 }
9731                                 break;
9732
9733                         case nodetest_type_node:
9734                                 ns.push_back(xml_node(n), alloc);
9735                                 return true;
9736
9737                         case nodetest_type_comment:
9738                                 if (type == node_comment)
9739                                 {
9740                                         ns.push_back(xml_node(n), alloc);
9741                                         return true;
9742                                 }
9743                                 break;
9744
9745                         case nodetest_type_text:
9746                                 if (type == node_pcdata || type == node_cdata)
9747                                 {
9748                                         ns.push_back(xml_node(n), alloc);
9749                                         return true;
9750                                 }
9751                                 break;
9752
9753                         case nodetest_type_pi:
9754                                 if (type == node_pi)
9755                                 {
9756                                         ns.push_back(xml_node(n), alloc);
9757                                         return true;
9758                                 }
9759                                 break;
9760
9761                         case nodetest_pi:
9762                                 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9763                                 {
9764                                         ns.push_back(xml_node(n), alloc);
9765                                         return true;
9766                                 }
9767                                 break;
9768
9769                         case nodetest_all:
9770                                 if (type == node_element)
9771                                 {
9772                                         ns.push_back(xml_node(n), alloc);
9773                                         return true;
9774                                 }
9775                                 break;
9776
9777                         case nodetest_all_in_namespace:
9778                                 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9779                                 {
9780                                         ns.push_back(xml_node(n), alloc);
9781                                         return true;
9782                                 }
9783                                 break;
9784
9785                         default:
9786                                 assert(false && "Unknown axis"); // unreachable
9787                         }
9788
9789                         return false;
9790                 }
9791
9792                 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9793                 {
9794                         const axis_t axis = T::axis;
9795
9796                         switch (axis)
9797                         {
9798                         case axis_attribute:
9799                         {
9800                                 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9801                                         if (step_push(ns, a, n, alloc) & once)
9802                                                 return;
9803
9804                                 break;
9805                         }
9806
9807                         case axis_child:
9808                         {
9809                                 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9810                                         if (step_push(ns, c, alloc) & once)
9811                                                 return;
9812
9813                                 break;
9814                         }
9815
9816                         case axis_descendant:
9817                         case axis_descendant_or_self:
9818                         {
9819                                 if (axis == axis_descendant_or_self)
9820                                         if (step_push(ns, n, alloc) & once)
9821                                                 return;
9822
9823                                 xml_node_struct* cur = n->first_child;
9824
9825                                 while (cur)
9826                                 {
9827                                         if (step_push(ns, cur, alloc) & once)
9828                                                 return;
9829
9830                                         if (cur->first_child)
9831                                                 cur = cur->first_child;
9832                                         else
9833                                         {
9834                                                 while (!cur->next_sibling)
9835                                                 {
9836                                                         cur = cur->parent;
9837
9838                                                         if (cur == n) return;
9839                                                 }
9840
9841                                                 cur = cur->next_sibling;
9842                                         }
9843                                 }
9844
9845                                 break;
9846                         }
9847
9848                         case axis_following_sibling:
9849                         {
9850                                 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9851                                         if (step_push(ns, c, alloc) & once)
9852                                                 return;
9853
9854                                 break;
9855                         }
9856
9857                         case axis_preceding_sibling:
9858                         {
9859                                 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9860                                         if (step_push(ns, c, alloc) & once)
9861                                                 return;
9862
9863                                 break;
9864                         }
9865
9866                         case axis_following:
9867                         {
9868                                 xml_node_struct* cur = n;
9869
9870                                 // exit from this node so that we don't include descendants
9871                                 while (!cur->next_sibling)
9872                                 {
9873                                         cur = cur->parent;
9874
9875                                         if (!cur) return;
9876                                 }
9877
9878                                 cur = cur->next_sibling;
9879
9880                                 while (cur)
9881                                 {
9882                                         if (step_push(ns, cur, alloc) & once)
9883                                                 return;
9884
9885                                         if (cur->first_child)
9886                                                 cur = cur->first_child;
9887                                         else
9888                                         {
9889                                                 while (!cur->next_sibling)
9890                                                 {
9891                                                         cur = cur->parent;
9892
9893                                                         if (!cur) return;
9894                                                 }
9895
9896                                                 cur = cur->next_sibling;
9897                                         }
9898                                 }
9899
9900                                 break;
9901                         }
9902
9903                         case axis_preceding:
9904                         {
9905                                 xml_node_struct* cur = n;
9906
9907                                 // exit from this node so that we don't include descendants
9908                                 while (!cur->prev_sibling_c->next_sibling)
9909                                 {
9910                                         cur = cur->parent;
9911
9912                                         if (!cur) return;
9913                                 }
9914
9915                                 cur = cur->prev_sibling_c;
9916
9917                                 while (cur)
9918                                 {
9919                                         if (cur->first_child)
9920                                                 cur = cur->first_child->prev_sibling_c;
9921                                         else
9922                                         {
9923                                                 // leaf node, can't be ancestor
9924                                                 if (step_push(ns, cur, alloc) & once)
9925                                                         return;
9926
9927                                                 while (!cur->prev_sibling_c->next_sibling)
9928                                                 {
9929                                                         cur = cur->parent;
9930
9931                                                         if (!cur) return;
9932
9933                                                         if (!node_is_ancestor(cur, n))
9934                                                                 if (step_push(ns, cur, alloc) & once)
9935                                                                         return;
9936                                                 }
9937
9938                                                 cur = cur->prev_sibling_c;
9939                                         }
9940                                 }
9941
9942                                 break;
9943                         }
9944
9945                         case axis_ancestor:
9946                         case axis_ancestor_or_self:
9947                         {
9948                                 if (axis == axis_ancestor_or_self)
9949                                         if (step_push(ns, n, alloc) & once)
9950                                                 return;
9951
9952                                 xml_node_struct* cur = n->parent;
9953
9954                                 while (cur)
9955                                 {
9956                                         if (step_push(ns, cur, alloc) & once)
9957                                                 return;
9958
9959                                         cur = cur->parent;
9960                                 }
9961
9962                                 break;
9963                         }
9964
9965                         case axis_self:
9966                         {
9967                                 step_push(ns, n, alloc);
9968
9969                                 break;
9970                         }
9971
9972                         case axis_parent:
9973                         {
9974                                 if (n->parent)
9975                                         step_push(ns, n->parent, alloc);
9976
9977                                 break;
9978                         }
9979
9980                         default:
9981                                 assert(false && "Unimplemented axis"); // unreachable
9982                         }
9983                 }
9984
9985                 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9986                 {
9987                         const axis_t axis = T::axis;
9988
9989                         switch (axis)
9990                         {
9991                         case axis_ancestor:
9992                         case axis_ancestor_or_self:
9993                         {
9994                                 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9995                                         if (step_push(ns, a, p, alloc) & once)
9996                                                 return;
9997
9998                                 xml_node_struct* cur = p;
9999
10000                                 while (cur)
10001                                 {
10002                                         if (step_push(ns, cur, alloc) & once)
10003                                                 return;
10004
10005                                         cur = cur->parent;
10006                                 }
10007
10008                                 break;
10009                         }
10010
10011                         case axis_descendant_or_self:
10012                         case axis_self:
10013                         {
10014                                 if (_test == nodetest_type_node) // reject attributes based on principal node type test
10015                                         step_push(ns, a, p, alloc);
10016
10017                                 break;
10018                         }
10019
10020                         case axis_following:
10021                         {
10022                                 xml_node_struct* cur = p;
10023
10024                                 while (cur)
10025                                 {
10026                                         if (cur->first_child)
10027                                                 cur = cur->first_child;
10028                                         else
10029                                         {
10030                                                 while (!cur->next_sibling)
10031                                                 {
10032                                                         cur = cur->parent;
10033
10034                                                         if (!cur) return;
10035                                                 }
10036
10037                                                 cur = cur->next_sibling;
10038                                         }
10039
10040                                         if (step_push(ns, cur, alloc) & once)
10041                                                 return;
10042                                 }
10043
10044                                 break;
10045                         }
10046
10047                         case axis_parent:
10048                         {
10049                                 step_push(ns, p, alloc);
10050
10051                                 break;
10052                         }
10053
10054                         case axis_preceding:
10055                         {
10056                                 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10057                                 step_fill(ns, p, alloc, once, v);
10058                                 break;
10059                         }
10060
10061                         default:
10062                                 assert(false && "Unimplemented axis"); // unreachable
10063                         }
10064                 }
10065
10066                 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10067                 {
10068                         const axis_t axis = T::axis;
10069                         const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10070
10071                         if (xn.node())
10072                                 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10073                         else if (axis_has_attributes && xn.attribute() && xn.parent())
10074                                 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10075                 }
10076
10077                 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10078                 {
10079                         const axis_t axis = T::axis;
10080                         const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10081                         const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10082
10083                         bool once =
10084                                 (axis == axis_attribute && _test == nodetest_name) ||
10085                                 (!_right && eval_once(axis_type, eval)) ||
10086                                 (_right && !_right->_next && _right->_test == predicate_constant_one);
10087
10088                         xpath_node_set_raw ns;
10089                         ns.set_type(axis_type);
10090
10091                         if (_left)
10092                         {
10093                                 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10094
10095                                 // self axis preserves the original order
10096                                 if (axis == axis_self) ns.set_type(s.type());
10097
10098                                 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10099                                 {
10100                                         size_t size = ns.size();
10101
10102                                         // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10103                                         if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10104
10105                                         step_fill(ns, *it, stack.result, once, v);
10106                                         if (_right) apply_predicates(ns, size, stack, eval);
10107                                 }
10108                         }
10109                         else
10110                         {
10111                                 step_fill(ns, c.n, stack.result, once, v);
10112                                 if (_right) apply_predicates(ns, 0, stack, eval);
10113                         }
10114
10115                         // child, attribute and self axes always generate unique set of nodes
10116                         // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10117                         if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10118                                 ns.remove_duplicates();
10119
10120                         return ns;
10121                 }
10122
10123         public:
10124                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10125                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10126                 {
10127                         assert(type == ast_string_constant);
10128                         _data.string = value;
10129                 }
10130
10131                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10132                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10133                 {
10134                         assert(type == ast_number_constant);
10135                         _data.number = value;
10136                 }
10137
10138                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10139                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10140                 {
10141                         assert(type == ast_variable);
10142                         _data.variable = value;
10143                 }
10144
10145                 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10146                         _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10147                 {
10148                 }
10149
10150                 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10151                         _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10152                 {
10153                         assert(type == ast_step);
10154                         _data.nodetest = contents;
10155                 }
10156
10157                 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10158                         _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10159                 {
10160                         assert(type == ast_filter || type == ast_predicate);
10161                 }
10162
10163                 void set_next(xpath_ast_node* value)
10164                 {
10165                         _next = value;
10166                 }
10167
10168                 void set_right(xpath_ast_node* value)
10169                 {
10170                         _right = value;
10171                 }
10172
10173                 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10174                 {
10175                         switch (_type)
10176                         {
10177                         case ast_op_or:
10178                                 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10179
10180                         case ast_op_and:
10181                                 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10182
10183                         case ast_op_equal:
10184                                 return compare_eq(_left, _right, c, stack, equal_to());
10185
10186                         case ast_op_not_equal:
10187                                 return compare_eq(_left, _right, c, stack, not_equal_to());
10188
10189                         case ast_op_less:
10190                                 return compare_rel(_left, _right, c, stack, less());
10191
10192                         case ast_op_greater:
10193                                 return compare_rel(_right, _left, c, stack, less());
10194
10195                         case ast_op_less_or_equal:
10196                                 return compare_rel(_left, _right, c, stack, less_equal());
10197
10198                         case ast_op_greater_or_equal:
10199                                 return compare_rel(_right, _left, c, stack, less_equal());
10200
10201                         case ast_func_starts_with:
10202                         {
10203                                 xpath_allocator_capture cr(stack.result);
10204
10205                                 xpath_string lr = _left->eval_string(c, stack);
10206                                 xpath_string rr = _right->eval_string(c, stack);
10207
10208                                 return starts_with(lr.c_str(), rr.c_str());
10209                         }
10210
10211                         case ast_func_contains:
10212                         {
10213                                 xpath_allocator_capture cr(stack.result);
10214
10215                                 xpath_string lr = _left->eval_string(c, stack);
10216                                 xpath_string rr = _right->eval_string(c, stack);
10217
10218                                 return find_substring(lr.c_str(), rr.c_str()) != 0;
10219                         }
10220
10221                         case ast_func_boolean:
10222                                 return _left->eval_boolean(c, stack);
10223
10224                         case ast_func_not:
10225                                 return !_left->eval_boolean(c, stack);
10226
10227                         case ast_func_true:
10228                                 return true;
10229
10230                         case ast_func_false:
10231                                 return false;
10232
10233                         case ast_func_lang:
10234                         {
10235                                 if (c.n.attribute()) return false;
10236
10237                                 xpath_allocator_capture cr(stack.result);
10238
10239                                 xpath_string lang = _left->eval_string(c, stack);
10240
10241                                 for (xml_node n = c.n.node(); n; n = n.parent())
10242                                 {
10243                                         xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10244
10245                                         if (a)
10246                                         {
10247                                                 const char_t* value = a.value();
10248
10249                                                 // strnicmp / strncasecmp is not portable
10250                                                 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10251                                                 {
10252                                                         if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10253                                                         ++value;
10254                                                 }
10255
10256                                                 return *value == 0 || *value == '-';
10257                                         }
10258                                 }
10259
10260                                 return false;
10261                         }
10262
10263                         case ast_opt_compare_attribute:
10264                         {
10265                                 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10266
10267                                 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10268
10269                                 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10270                         }
10271
10272                         case ast_variable:
10273                         {
10274                                 assert(_rettype == _data.variable->type());
10275
10276                                 if (_rettype == xpath_type_boolean)
10277                                         return _data.variable->get_boolean();
10278                         }
10279
10280                         // fallthrough
10281                         default:
10282                         {
10283                                 switch (_rettype)
10284                                 {
10285                                 case xpath_type_number:
10286                                         return convert_number_to_boolean(eval_number(c, stack));
10287
10288                                 case xpath_type_string:
10289                                 {
10290                                         xpath_allocator_capture cr(stack.result);
10291
10292                                         return !eval_string(c, stack).empty();
10293                                 }
10294
10295                                 case xpath_type_node_set:
10296                                 {
10297                                         xpath_allocator_capture cr(stack.result);
10298
10299                                         return !eval_node_set(c, stack, nodeset_eval_any).empty();
10300                                 }
10301
10302                                 default:
10303                                         assert(false && "Wrong expression for return type boolean"); // unreachable
10304                                         return false;
10305                                 }
10306                         }
10307                         }
10308                 }
10309
10310                 double eval_number(const xpath_context& c, const xpath_stack& stack)
10311                 {
10312                         switch (_type)
10313                         {
10314                         case ast_op_add:
10315                                 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10316
10317                         case ast_op_subtract:
10318                                 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10319
10320                         case ast_op_multiply:
10321                                 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10322
10323                         case ast_op_divide:
10324                                 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10325
10326                         case ast_op_mod:
10327                                 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10328
10329                         case ast_op_negate:
10330                                 return -_left->eval_number(c, stack);
10331
10332                         case ast_number_constant:
10333                                 return _data.number;
10334
10335                         case ast_func_last:
10336                                 return static_cast<double>(c.size);
10337
10338                         case ast_func_position:
10339                                 return static_cast<double>(c.position);
10340
10341                         case ast_func_count:
10342                         {
10343                                 xpath_allocator_capture cr(stack.result);
10344
10345                                 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10346                         }
10347
10348                         case ast_func_string_length_0:
10349                         {
10350                                 xpath_allocator_capture cr(stack.result);
10351
10352                                 return static_cast<double>(string_value(c.n, stack.result).length());
10353                         }
10354
10355                         case ast_func_string_length_1:
10356                         {
10357                                 xpath_allocator_capture cr(stack.result);
10358
10359                                 return static_cast<double>(_left->eval_string(c, stack).length());
10360                         }
10361
10362                         case ast_func_number_0:
10363                         {
10364                                 xpath_allocator_capture cr(stack.result);
10365
10366                                 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10367                         }
10368
10369                         case ast_func_number_1:
10370                                 return _left->eval_number(c, stack);
10371
10372                         case ast_func_sum:
10373                         {
10374                                 xpath_allocator_capture cr(stack.result);
10375
10376                                 double r = 0;
10377
10378                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10379
10380                                 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10381                                 {
10382                                         xpath_allocator_capture cri(stack.result);
10383
10384                                         r += convert_string_to_number(string_value(*it, stack.result).c_str());
10385                                 }
10386
10387                                 return r;
10388                         }
10389
10390                         case ast_func_floor:
10391                         {
10392                                 double r = _left->eval_number(c, stack);
10393
10394                                 return r == r ? floor(r) : r;
10395                         }
10396
10397                         case ast_func_ceiling:
10398                         {
10399                                 double r = _left->eval_number(c, stack);
10400
10401                                 return r == r ? ceil(r) : r;
10402                         }
10403
10404                         case ast_func_round:
10405                                 return round_nearest_nzero(_left->eval_number(c, stack));
10406
10407                         case ast_variable:
10408                         {
10409                                 assert(_rettype == _data.variable->type());
10410
10411                                 if (_rettype == xpath_type_number)
10412                                         return _data.variable->get_number();
10413                         }
10414
10415                         // fallthrough
10416                         default:
10417                         {
10418                                 switch (_rettype)
10419                                 {
10420                                 case xpath_type_boolean:
10421                                         return eval_boolean(c, stack) ? 1 : 0;
10422
10423                                 case xpath_type_string:
10424                                 {
10425                                         xpath_allocator_capture cr(stack.result);
10426
10427                                         return convert_string_to_number(eval_string(c, stack).c_str());
10428                                 }
10429
10430                                 case xpath_type_node_set:
10431                                 {
10432                                         xpath_allocator_capture cr(stack.result);
10433
10434                                         return convert_string_to_number(eval_string(c, stack).c_str());
10435                                 }
10436
10437                                 default:
10438                                         assert(false && "Wrong expression for return type number"); // unreachable
10439                                         return 0;
10440                                 }
10441
10442                         }
10443                         }
10444                 }
10445
10446                 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10447                 {
10448                         assert(_type == ast_func_concat);
10449
10450                         xpath_allocator_capture ct(stack.temp);
10451
10452                         // count the string number
10453                         size_t count = 1;
10454                         for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10455
10456                         // allocate a buffer for temporary string objects
10457                         xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10458                         if (!buffer) return xpath_string();
10459
10460                         // evaluate all strings to temporary stack
10461                         xpath_stack swapped_stack = {stack.temp, stack.result};
10462
10463                         buffer[0] = _left->eval_string(c, swapped_stack);
10464
10465                         size_t pos = 1;
10466                         for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10467                         assert(pos == count);
10468
10469                         // get total length
10470                         size_t length = 0;
10471                         for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10472
10473                         // create final string
10474                         char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10475                         if (!result) return xpath_string();
10476
10477                         char_t* ri = result;
10478
10479                         for (size_t j = 0; j < count; ++j)
10480                                 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10481                                         *ri++ = *bi;
10482
10483                         *ri = 0;
10484
10485                         return xpath_string::from_heap_preallocated(result, ri);
10486                 }
10487
10488                 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10489                 {
10490                         switch (_type)
10491                         {
10492                         case ast_string_constant:
10493                                 return xpath_string::from_const(_data.string);
10494
10495                         case ast_func_local_name_0:
10496                         {
10497                                 xpath_node na = c.n;
10498
10499                                 return xpath_string::from_const(local_name(na));
10500                         }
10501
10502                         case ast_func_local_name_1:
10503                         {
10504                                 xpath_allocator_capture cr(stack.result);
10505
10506                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10507                                 xpath_node na = ns.first();
10508
10509                                 return xpath_string::from_const(local_name(na));
10510                         }
10511
10512                         case ast_func_name_0:
10513                         {
10514                                 xpath_node na = c.n;
10515
10516                                 return xpath_string::from_const(qualified_name(na));
10517                         }
10518
10519                         case ast_func_name_1:
10520                         {
10521                                 xpath_allocator_capture cr(stack.result);
10522
10523                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10524                                 xpath_node na = ns.first();
10525
10526                                 return xpath_string::from_const(qualified_name(na));
10527                         }
10528
10529                         case ast_func_namespace_uri_0:
10530                         {
10531                                 xpath_node na = c.n;
10532
10533                                 return xpath_string::from_const(namespace_uri(na));
10534                         }
10535
10536                         case ast_func_namespace_uri_1:
10537                         {
10538                                 xpath_allocator_capture cr(stack.result);
10539
10540                                 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10541                                 xpath_node na = ns.first();
10542
10543                                 return xpath_string::from_const(namespace_uri(na));
10544                         }
10545
10546                         case ast_func_string_0:
10547                                 return string_value(c.n, stack.result);
10548
10549                         case ast_func_string_1:
10550                                 return _left->eval_string(c, stack);
10551
10552                         case ast_func_concat:
10553                                 return eval_string_concat(c, stack);
10554
10555                         case ast_func_substring_before:
10556                         {
10557                                 xpath_allocator_capture cr(stack.temp);
10558
10559                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10560
10561                                 xpath_string s = _left->eval_string(c, swapped_stack);
10562                                 xpath_string p = _right->eval_string(c, swapped_stack);
10563
10564                                 const char_t* pos = find_substring(s.c_str(), p.c_str());
10565
10566                                 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10567                         }
10568
10569                         case ast_func_substring_after:
10570                         {
10571                                 xpath_allocator_capture cr(stack.temp);
10572
10573                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10574
10575                                 xpath_string s = _left->eval_string(c, swapped_stack);
10576                                 xpath_string p = _right->eval_string(c, swapped_stack);
10577
10578                                 const char_t* pos = find_substring(s.c_str(), p.c_str());
10579                                 if (!pos) return xpath_string();
10580
10581                                 const char_t* rbegin = pos + p.length();
10582                                 const char_t* rend = s.c_str() + s.length();
10583
10584                                 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10585                         }
10586
10587                         case ast_func_substring_2:
10588                         {
10589                                 xpath_allocator_capture cr(stack.temp);
10590
10591                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10592
10593                                 xpath_string s = _left->eval_string(c, swapped_stack);
10594                                 size_t s_length = s.length();
10595
10596                                 double first = round_nearest(_right->eval_number(c, stack));
10597
10598                                 if (is_nan(first)) return xpath_string(); // NaN
10599                                 else if (first >= s_length + 1) return xpath_string();
10600
10601                                 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10602                                 assert(1 <= pos && pos <= s_length + 1);
10603
10604                                 const char_t* rbegin = s.c_str() + (pos - 1);
10605                                 const char_t* rend = s.c_str() + s.length();
10606
10607                                 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10608                         }
10609
10610                         case ast_func_substring_3:
10611                         {
10612                                 xpath_allocator_capture cr(stack.temp);
10613
10614                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10615
10616                                 xpath_string s = _left->eval_string(c, swapped_stack);
10617                                 size_t s_length = s.length();
10618
10619                                 double first = round_nearest(_right->eval_number(c, stack));
10620                                 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10621
10622                                 if (is_nan(first) || is_nan(last)) return xpath_string();
10623                                 else if (first >= s_length + 1) return xpath_string();
10624                                 else if (first >= last) return xpath_string();
10625                                 else if (last < 1) return xpath_string();
10626
10627                                 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10628                                 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10629
10630                                 assert(1 <= pos && pos <= end && end <= s_length + 1);
10631                                 const char_t* rbegin = s.c_str() + (pos - 1);
10632                                 const char_t* rend = s.c_str() + (end - 1);
10633
10634                                 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10635                         }
10636
10637                         case ast_func_normalize_space_0:
10638                         {
10639                                 xpath_string s = string_value(c.n, stack.result);
10640
10641                                 char_t* begin = s.data(stack.result);
10642                                 if (!begin) return xpath_string();
10643
10644                                 char_t* end = normalize_space(begin);
10645
10646                                 return xpath_string::from_heap_preallocated(begin, end);
10647                         }
10648
10649                         case ast_func_normalize_space_1:
10650                         {
10651                                 xpath_string s = _left->eval_string(c, stack);
10652
10653                                 char_t* begin = s.data(stack.result);
10654                                 if (!begin) return xpath_string();
10655
10656                                 char_t* end = normalize_space(begin);
10657
10658                                 return xpath_string::from_heap_preallocated(begin, end);
10659                         }
10660
10661                         case ast_func_translate:
10662                         {
10663                                 xpath_allocator_capture cr(stack.temp);
10664
10665                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10666
10667                                 xpath_string s = _left->eval_string(c, stack);
10668                                 xpath_string from = _right->eval_string(c, swapped_stack);
10669                                 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10670
10671                                 char_t* begin = s.data(stack.result);
10672                                 if (!begin) return xpath_string();
10673
10674                                 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10675
10676                                 return xpath_string::from_heap_preallocated(begin, end);
10677                         }
10678
10679                         case ast_opt_translate_table:
10680                         {
10681                                 xpath_string s = _left->eval_string(c, stack);
10682
10683                                 char_t* begin = s.data(stack.result);
10684                                 if (!begin) return xpath_string();
10685
10686                                 char_t* end = translate_table(begin, _data.table);
10687
10688                                 return xpath_string::from_heap_preallocated(begin, end);
10689                         }
10690
10691                         case ast_variable:
10692                         {
10693                                 assert(_rettype == _data.variable->type());
10694
10695                                 if (_rettype == xpath_type_string)
10696                                         return xpath_string::from_const(_data.variable->get_string());
10697                         }
10698
10699                         // fallthrough
10700                         default:
10701                         {
10702                                 switch (_rettype)
10703                                 {
10704                                 case xpath_type_boolean:
10705                                         return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10706
10707                                 case xpath_type_number:
10708                                         return convert_number_to_string(eval_number(c, stack), stack.result);
10709
10710                                 case xpath_type_node_set:
10711                                 {
10712                                         xpath_allocator_capture cr(stack.temp);
10713
10714                                         xpath_stack swapped_stack = {stack.temp, stack.result};
10715
10716                                         xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10717                                         return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10718                                 }
10719
10720                                 default:
10721                                         assert(false && "Wrong expression for return type string"); // unreachable
10722                                         return xpath_string();
10723                                 }
10724                         }
10725                         }
10726                 }
10727
10728                 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10729                 {
10730                         switch (_type)
10731                         {
10732                         case ast_op_union:
10733                         {
10734                                 xpath_allocator_capture cr(stack.temp);
10735
10736                                 xpath_stack swapped_stack = {stack.temp, stack.result};
10737
10738                                 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10739                                 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10740
10741                                 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10742                                 rs.set_type(xpath_node_set::type_unsorted);
10743
10744                                 rs.append(ls.begin(), ls.end(), stack.result);
10745                                 rs.remove_duplicates();
10746
10747                                 return rs;
10748                         }
10749
10750                         case ast_filter:
10751                         {
10752                                 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10753
10754                                 // either expression is a number or it contains position() call; sort by document order
10755                                 if (_test != predicate_posinv) set.sort_do();
10756
10757                                 bool once = eval_once(set.type(), eval);
10758
10759                                 apply_predicate(set, 0, stack, once);
10760
10761                                 return set;
10762                         }
10763
10764                         case ast_func_id:
10765                                 return xpath_node_set_raw();
10766
10767                         case ast_step:
10768                         {
10769                                 switch (_axis)
10770                                 {
10771                                 case axis_ancestor:
10772                                         return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10773
10774                                 case axis_ancestor_or_self:
10775                                         return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10776
10777                                 case axis_attribute:
10778                                         return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10779
10780                                 case axis_child:
10781                                         return step_do(c, stack, eval, axis_to_type<axis_child>());
10782
10783                                 case axis_descendant:
10784                                         return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10785
10786                                 case axis_descendant_or_self:
10787                                         return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10788
10789                                 case axis_following:
10790                                         return step_do(c, stack, eval, axis_to_type<axis_following>());
10791
10792                                 case axis_following_sibling:
10793                                         return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10794
10795                                 case axis_namespace:
10796                                         // namespaced axis is not supported
10797                                         return xpath_node_set_raw();
10798
10799                                 case axis_parent:
10800                                         return step_do(c, stack, eval, axis_to_type<axis_parent>());
10801
10802                                 case axis_preceding:
10803                                         return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10804
10805                                 case axis_preceding_sibling:
10806                                         return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10807
10808                                 case axis_self:
10809                                         return step_do(c, stack, eval, axis_to_type<axis_self>());
10810
10811                                 default:
10812                                         assert(false && "Unknown axis"); // unreachable
10813                                         return xpath_node_set_raw();
10814                                 }
10815                         }
10816
10817                         case ast_step_root:
10818                         {
10819                                 assert(!_right); // root step can't have any predicates
10820
10821                                 xpath_node_set_raw ns;
10822
10823                                 ns.set_type(xpath_node_set::type_sorted);
10824
10825                                 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10826                                 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10827
10828                                 return ns;
10829                         }
10830
10831                         case ast_variable:
10832                         {
10833                                 assert(_rettype == _data.variable->type());
10834
10835                                 if (_rettype == xpath_type_node_set)
10836                                 {
10837                                         const xpath_node_set& s = _data.variable->get_node_set();
10838
10839                                         xpath_node_set_raw ns;
10840
10841                                         ns.set_type(s.type());
10842                                         ns.append(s.begin(), s.end(), stack.result);
10843
10844                                         return ns;
10845                                 }
10846                         }
10847
10848                         // fallthrough
10849                         default:
10850                                 assert(false && "Wrong expression for return type node set"); // unreachable
10851                                 return xpath_node_set_raw();
10852                         }
10853                 }
10854
10855                 void optimize(xpath_allocator* alloc)
10856                 {
10857                         if (_left)
10858                                 _left->optimize(alloc);
10859
10860                         if (_right)
10861                                 _right->optimize(alloc);
10862
10863                         if (_next)
10864                                 _next->optimize(alloc);
10865
10866                         optimize_self(alloc);
10867                 }
10868
10869                 void optimize_self(xpath_allocator* alloc)
10870                 {
10871                         // Rewrite [position()=expr] with [expr]
10872                         // Note that this step has to go before classification to recognize [position()=1]
10873                         if ((_type == ast_filter || _type == ast_predicate) &&
10874                                 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10875                         {
10876                                 _right = _right->_right;
10877                         }
10878
10879                         // Classify filter/predicate ops to perform various optimizations during evaluation
10880                         if (_type == ast_filter || _type == ast_predicate)
10881                         {
10882                                 assert(_test == predicate_default);
10883
10884                                 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10885                                         _test = predicate_constant_one;
10886                                 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10887                                         _test = predicate_constant;
10888                                 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10889                                         _test = predicate_posinv;
10890                         }
10891
10892                         // Rewrite descendant-or-self::node()/child::foo with descendant::foo
10893                         // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10894                         // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10895                         // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10896                         if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10897                                 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10898                                 is_posinv_step())
10899                         {
10900                                 if (_axis == axis_child || _axis == axis_descendant)
10901                                         _axis = axis_descendant;
10902                                 else
10903                                         _axis = axis_descendant_or_self;
10904
10905                                 _left = _left->_left;
10906                         }
10907
10908                         // Use optimized lookup table implementation for translate() with constant arguments
10909                         if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10910                         {
10911                                 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10912
10913                                 if (table)
10914                                 {
10915                                         _type = ast_opt_translate_table;
10916                                         _data.table = table;
10917                                 }
10918                         }
10919
10920                         // Use optimized path for @attr = 'value' or @attr = $value
10921                         if (_type == ast_op_equal &&
10922                                 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10923                                 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10924                         {
10925                                 _type = ast_opt_compare_attribute;
10926                         }
10927                 }
10928
10929                 bool is_posinv_expr() const
10930                 {
10931                         switch (_type)
10932                         {
10933                         case ast_func_position:
10934                         case ast_func_last:
10935                                 return false;
10936
10937                         case ast_string_constant:
10938                         case ast_number_constant:
10939                         case ast_variable:
10940                                 return true;
10941
10942                         case ast_step:
10943                         case ast_step_root:
10944                                 return true;
10945
10946                         case ast_predicate:
10947                         case ast_filter:
10948                                 return true;
10949
10950                         default:
10951                                 if (_left && !_left->is_posinv_expr()) return false;
10952
10953                                 for (xpath_ast_node* n = _right; n; n = n->_next)
10954                                         if (!n->is_posinv_expr()) return false;
10955
10956                                 return true;
10957                         }
10958                 }
10959
10960                 bool is_posinv_step() const
10961                 {
10962                         assert(_type == ast_step);
10963
10964                         for (xpath_ast_node* n = _right; n; n = n->_next)
10965                         {
10966                                 assert(n->_type == ast_predicate);
10967
10968                                 if (n->_test != predicate_posinv)
10969                                         return false;
10970                         }
10971
10972                         return true;
10973                 }
10974
10975                 xpath_value_type rettype() const
10976                 {
10977                         return static_cast<xpath_value_type>(_rettype);
10978                 }
10979         };
10980
10981         struct xpath_parser
10982         {
10983                 xpath_allocator* _alloc;
10984                 xpath_lexer _lexer;
10985
10986                 const char_t* _query;
10987                 xpath_variable_set* _variables;
10988
10989                 xpath_parse_result* _result;
10990
10991                 char_t _scratch[32];
10992
10993                 xpath_ast_node* error(const char* message)
10994                 {
10995                         _result->error = message;
10996                         _result->offset = _lexer.current_pos() - _query;
10997
10998                         return 0;
10999                 }
11000
11001                 xpath_ast_node* error_oom()
11002                 {
11003                         assert(_alloc->_error);
11004                         *_alloc->_error = true;
11005
11006                         return 0;
11007                 }
11008
11009                 void* alloc_node()
11010                 {
11011                         return _alloc->allocate(sizeof(xpath_ast_node));
11012                 }
11013
11014                 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11015                 {
11016                         void* memory = alloc_node();
11017                         return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11018                 }
11019
11020                 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11021                 {
11022                         void* memory = alloc_node();
11023                         return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11024                 }
11025
11026                 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11027                 {
11028                         void* memory = alloc_node();
11029                         return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11030                 }
11031
11032                 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11033                 {
11034                         void* memory = alloc_node();
11035                         return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11036                 }
11037
11038                 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11039                 {
11040                         void* memory = alloc_node();
11041                         return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11042                 }
11043
11044                 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11045                 {
11046                         void* memory = alloc_node();
11047                         return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11048                 }
11049
11050                 const char_t* alloc_string(const xpath_lexer_string& value)
11051                 {
11052                         if (!value.begin)
11053                                 return PUGIXML_TEXT("");
11054
11055                         size_t length = static_cast<size_t>(value.end - value.begin);
11056
11057                         char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11058                         if (!c) return 0;
11059
11060                         memcpy(c, value.begin, length * sizeof(char_t));
11061                         c[length] = 0;
11062
11063                         return c;
11064                 }
11065
11066                 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11067                 {
11068                         switch (name.begin[0])
11069                         {
11070                         case 'b':
11071                                 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11072                                         return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11073
11074                                 break;
11075
11076                         case 'c':
11077                                 if (name == PUGIXML_TEXT("count") && argc == 1)
11078                                 {
11079                                         if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11080                                         return alloc_node(ast_func_count, xpath_type_number, args[0]);
11081                                 }
11082                                 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11083                                         return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11084                                 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11085                                         return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11086                                 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11087                                         return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11088
11089                                 break;
11090
11091                         case 'f':
11092                                 if (name == PUGIXML_TEXT("false") && argc == 0)
11093                                         return alloc_node(ast_func_false, xpath_type_boolean);
11094                                 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11095                                         return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11096
11097                                 break;
11098
11099                         case 'i':
11100                                 if (name == PUGIXML_TEXT("id") && argc == 1)
11101                                         return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11102
11103                                 break;
11104
11105                         case 'l':
11106                                 if (name == PUGIXML_TEXT("last") && argc == 0)
11107                                         return alloc_node(ast_func_last, xpath_type_number);
11108                                 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11109                                         return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11110                                 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11111                                 {
11112                                         if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11113                                         return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11114                                 }
11115
11116                                 break;
11117
11118                         case 'n':
11119                                 if (name == PUGIXML_TEXT("name") && argc <= 1)
11120                                 {
11121                                         if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11122                                         return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11123                                 }
11124                                 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11125                                 {
11126                                         if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11127                                         return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11128                                 }
11129                                 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11130                                         return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11131                                 else if (name == PUGIXML_TEXT("not") && argc == 1)
11132                                         return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11133                                 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11134                                         return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11135
11136                                 break;
11137
11138                         case 'p':
11139                                 if (name == PUGIXML_TEXT("position") && argc == 0)
11140                                         return alloc_node(ast_func_position, xpath_type_number);
11141
11142                                 break;
11143
11144                         case 'r':
11145                                 if (name == PUGIXML_TEXT("round") && argc == 1)
11146                                         return alloc_node(ast_func_round, xpath_type_number, args[0]);
11147
11148                                 break;
11149
11150                         case 's':
11151                                 if (name == PUGIXML_TEXT("string") && argc <= 1)
11152                                         return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11153                                 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11154                                         return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11155                                 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11156                                         return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11157                                 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11158                                         return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11159                                 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11160                                         return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11161                                 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11162                                         return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11163                                 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11164                                 {
11165                                         if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11166                                         return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11167                                 }
11168
11169                                 break;
11170
11171                         case 't':
11172                                 if (name == PUGIXML_TEXT("translate") && argc == 3)
11173                                         return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11174                                 else if (name == PUGIXML_TEXT("true") && argc == 0)
11175                                         return alloc_node(ast_func_true, xpath_type_boolean);
11176
11177                                 break;
11178
11179                         default:
11180                                 break;
11181                         }
11182
11183                         return error("Unrecognized function or wrong parameter count");
11184                 }
11185
11186                 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11187                 {
11188                         specified = true;
11189
11190                         switch (name.begin[0])
11191                         {
11192                         case 'a':
11193                                 if (name == PUGIXML_TEXT("ancestor"))
11194                                         return axis_ancestor;
11195                                 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11196                                         return axis_ancestor_or_self;
11197                                 else if (name == PUGIXML_TEXT("attribute"))
11198                                         return axis_attribute;
11199
11200                                 break;
11201
11202                         case 'c':
11203                                 if (name == PUGIXML_TEXT("child"))
11204                                         return axis_child;
11205
11206                                 break;
11207
11208                         case 'd':
11209                                 if (name == PUGIXML_TEXT("descendant"))
11210                                         return axis_descendant;
11211                                 else if (name == PUGIXML_TEXT("descendant-or-self"))
11212                                         return axis_descendant_or_self;
11213
11214                                 break;
11215
11216                         case 'f':
11217                                 if (name == PUGIXML_TEXT("following"))
11218                                         return axis_following;
11219                                 else if (name == PUGIXML_TEXT("following-sibling"))
11220                                         return axis_following_sibling;
11221
11222                                 break;
11223
11224                         case 'n':
11225                                 if (name == PUGIXML_TEXT("namespace"))
11226                                         return axis_namespace;
11227
11228                                 break;
11229
11230                         case 'p':
11231                                 if (name == PUGIXML_TEXT("parent"))
11232                                         return axis_parent;
11233                                 else if (name == PUGIXML_TEXT("preceding"))
11234                                         return axis_preceding;
11235                                 else if (name == PUGIXML_TEXT("preceding-sibling"))
11236                                         return axis_preceding_sibling;
11237
11238                                 break;
11239
11240                         case 's':
11241                                 if (name == PUGIXML_TEXT("self"))
11242                                         return axis_self;
11243
11244                                 break;
11245
11246                         default:
11247                                 break;
11248                         }
11249
11250                         specified = false;
11251                         return axis_child;
11252                 }
11253
11254                 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11255                 {
11256                         switch (name.begin[0])
11257                         {
11258                         case 'c':
11259                                 if (name == PUGIXML_TEXT("comment"))
11260                                         return nodetest_type_comment;
11261
11262                                 break;
11263
11264                         case 'n':
11265                                 if (name == PUGIXML_TEXT("node"))
11266                                         return nodetest_type_node;
11267
11268                                 break;
11269
11270                         case 'p':
11271                                 if (name == PUGIXML_TEXT("processing-instruction"))
11272                                         return nodetest_type_pi;
11273
11274                                 break;
11275
11276                         case 't':
11277                                 if (name == PUGIXML_TEXT("text"))
11278                                         return nodetest_type_text;
11279
11280                                 break;
11281
11282                         default:
11283                                 break;
11284                         }
11285
11286                         return nodetest_none;
11287                 }
11288
11289                 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11290                 xpath_ast_node* parse_primary_expression()
11291                 {
11292                         switch (_lexer.current())
11293                         {
11294                         case lex_var_ref:
11295                         {
11296                                 xpath_lexer_string name = _lexer.contents();
11297
11298                                 if (!_variables)
11299                                         return error("Unknown variable: variable set is not provided");
11300
11301                                 xpath_variable* var = 0;
11302                                 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11303                                         return error_oom();
11304
11305                                 if (!var)
11306                                         return error("Unknown variable: variable set does not contain the given name");
11307
11308                                 _lexer.next();
11309
11310                                 return alloc_node(ast_variable, var->type(), var);
11311                         }
11312
11313                         case lex_open_brace:
11314                         {
11315                                 _lexer.next();
11316
11317                                 xpath_ast_node* n = parse_expression();
11318                                 if (!n) return 0;
11319
11320                                 if (_lexer.current() != lex_close_brace)
11321                                         return error("Expected ')' to match an opening '('");
11322
11323                                 _lexer.next();
11324
11325                                 return n;
11326                         }
11327
11328                         case lex_quoted_string:
11329                         {
11330                                 const char_t* value = alloc_string(_lexer.contents());
11331                                 if (!value) return 0;
11332
11333                                 _lexer.next();
11334
11335                                 return alloc_node(ast_string_constant, xpath_type_string, value);
11336                         }
11337
11338                         case lex_number:
11339                         {
11340                                 double value = 0;
11341
11342                                 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11343                                         return error_oom();
11344
11345                                 _lexer.next();
11346
11347                                 return alloc_node(ast_number_constant, xpath_type_number, value);
11348                         }
11349
11350                         case lex_string:
11351                         {
11352                                 xpath_ast_node* args[2] = {0};
11353                                 size_t argc = 0;
11354
11355                                 xpath_lexer_string function = _lexer.contents();
11356                                 _lexer.next();
11357
11358                                 xpath_ast_node* last_arg = 0;
11359
11360                                 if (_lexer.current() != lex_open_brace)
11361                                         return error("Unrecognized function call");
11362                                 _lexer.next();
11363
11364                                 while (_lexer.current() != lex_close_brace)
11365                                 {
11366                                         if (argc > 0)
11367                                         {
11368                                                 if (_lexer.current() != lex_comma)
11369                                                         return error("No comma between function arguments");
11370                                                 _lexer.next();
11371                                         }
11372
11373                                         xpath_ast_node* n = parse_expression();
11374                                         if (!n) return 0;
11375
11376                                         if (argc < 2) args[argc] = n;
11377                                         else last_arg->set_next(n);
11378
11379                                         argc++;
11380                                         last_arg = n;
11381                                 }
11382
11383                                 _lexer.next();
11384
11385                                 return parse_function(function, argc, args);
11386                         }
11387
11388                         default:
11389                                 return error("Unrecognizable primary expression");
11390                         }
11391                 }
11392
11393                 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11394                 // Predicate ::= '[' PredicateExpr ']'
11395                 // PredicateExpr ::= Expr
11396                 xpath_ast_node* parse_filter_expression()
11397                 {
11398                         xpath_ast_node* n = parse_primary_expression();
11399                         if (!n) return 0;
11400
11401                         while (_lexer.current() == lex_open_square_brace)
11402                         {
11403                                 _lexer.next();
11404
11405                                 if (n->rettype() != xpath_type_node_set)
11406                                         return error("Predicate has to be applied to node set");
11407
11408                                 xpath_ast_node* expr = parse_expression();
11409                                 if (!expr) return 0;
11410
11411                                 n = alloc_node(ast_filter, n, expr, predicate_default);
11412                                 if (!n) return 0;
11413
11414                                 if (_lexer.current() != lex_close_square_brace)
11415                                         return error("Expected ']' to match an opening '['");
11416
11417                                 _lexer.next();
11418                         }
11419
11420                         return n;
11421                 }
11422
11423                 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11424                 // AxisSpecifier ::= AxisName '::' | '@'?
11425                 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11426                 // NameTest ::= '*' | NCName ':' '*' | QName
11427                 // AbbreviatedStep ::= '.' | '..'
11428                 xpath_ast_node* parse_step(xpath_ast_node* set)
11429                 {
11430                         if (set && set->rettype() != xpath_type_node_set)
11431                                 return error("Step has to be applied to node set");
11432
11433                         bool axis_specified = false;
11434                         axis_t axis = axis_child; // implied child axis
11435
11436                         if (_lexer.current() == lex_axis_attribute)
11437                         {
11438                                 axis = axis_attribute;
11439                                 axis_specified = true;
11440
11441                                 _lexer.next();
11442                         }
11443                         else if (_lexer.current() == lex_dot)
11444                         {
11445                                 _lexer.next();
11446
11447                                 if (_lexer.current() == lex_open_square_brace)
11448                                         return error("Predicates are not allowed after an abbreviated step");
11449
11450                                 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11451                         }
11452                         else if (_lexer.current() == lex_double_dot)
11453                         {
11454                                 _lexer.next();
11455
11456                                 if (_lexer.current() == lex_open_square_brace)
11457                                         return error("Predicates are not allowed after an abbreviated step");
11458
11459                                 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11460                         }
11461
11462                         nodetest_t nt_type = nodetest_none;
11463                         xpath_lexer_string nt_name;
11464
11465                         if (_lexer.current() == lex_string)
11466                         {
11467                                 // node name test
11468                                 nt_name = _lexer.contents();
11469                                 _lexer.next();
11470
11471                                 // was it an axis name?
11472                                 if (_lexer.current() == lex_double_colon)
11473                                 {
11474                                         // parse axis name
11475                                         if (axis_specified)
11476                                                 return error("Two axis specifiers in one step");
11477
11478                                         axis = parse_axis_name(nt_name, axis_specified);
11479
11480                                         if (!axis_specified)
11481                                                 return error("Unknown axis");
11482
11483                                         // read actual node test
11484                                         _lexer.next();
11485
11486                                         if (_lexer.current() == lex_multiply)
11487                                         {
11488                                                 nt_type = nodetest_all;
11489                                                 nt_name = xpath_lexer_string();
11490                                                 _lexer.next();
11491                                         }
11492                                         else if (_lexer.current() == lex_string)
11493                                         {
11494                                                 nt_name = _lexer.contents();
11495                                                 _lexer.next();
11496                                         }
11497                                         else
11498                                         {
11499                                                 return error("Unrecognized node test");
11500                                         }
11501                                 }
11502
11503                                 if (nt_type == nodetest_none)
11504                                 {
11505                                         // node type test or processing-instruction
11506                                         if (_lexer.current() == lex_open_brace)
11507                                         {
11508                                                 _lexer.next();
11509
11510                                                 if (_lexer.current() == lex_close_brace)
11511                                                 {
11512                                                         _lexer.next();
11513
11514                                                         nt_type = parse_node_test_type(nt_name);
11515
11516                                                         if (nt_type == nodetest_none)
11517                                                                 return error("Unrecognized node type");
11518
11519                                                         nt_name = xpath_lexer_string();
11520                                                 }
11521                                                 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11522                                                 {
11523                                                         if (_lexer.current() != lex_quoted_string)
11524                                                                 return error("Only literals are allowed as arguments to processing-instruction()");
11525
11526                                                         nt_type = nodetest_pi;
11527                                                         nt_name = _lexer.contents();
11528                                                         _lexer.next();
11529
11530                                                         if (_lexer.current() != lex_close_brace)
11531                                                                 return error("Unmatched brace near processing-instruction()");
11532                                                         _lexer.next();
11533                                                 }
11534                                                 else
11535                                                 {
11536                                                         return error("Unmatched brace near node type test");
11537                                                 }
11538                                         }
11539                                         // QName or NCName:*
11540                                         else
11541                                         {
11542                                                 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11543                                                 {
11544                                                         nt_name.end--; // erase *
11545
11546                                                         nt_type = nodetest_all_in_namespace;
11547                                                 }
11548                                                 else
11549                                                 {
11550                                                         nt_type = nodetest_name;
11551                                                 }
11552                                         }
11553                                 }
11554                         }
11555                         else if (_lexer.current() == lex_multiply)
11556                         {
11557                                 nt_type = nodetest_all;
11558                                 _lexer.next();
11559                         }
11560                         else
11561                         {
11562                                 return error("Unrecognized node test");
11563                         }
11564
11565                         const char_t* nt_name_copy = alloc_string(nt_name);
11566                         if (!nt_name_copy) return 0;
11567
11568                         xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11569                         if (!n) return 0;
11570
11571                         xpath_ast_node* last = 0;
11572
11573                         while (_lexer.current() == lex_open_square_brace)
11574                         {
11575                                 _lexer.next();
11576
11577                                 xpath_ast_node* expr = parse_expression();
11578                                 if (!expr) return 0;
11579
11580                                 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11581                                 if (!pred) return 0;
11582
11583                                 if (_lexer.current() != lex_close_square_brace)
11584                                         return error("Expected ']' to match an opening '['");
11585                                 _lexer.next();
11586
11587                                 if (last) last->set_next(pred);
11588                                 else n->set_right(pred);
11589
11590                                 last = pred;
11591                         }
11592
11593                         return n;
11594                 }
11595
11596                 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11597                 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11598                 {
11599                         xpath_ast_node* n = parse_step(set);
11600                         if (!n) return 0;
11601
11602                         while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11603                         {
11604                                 lexeme_t l = _lexer.current();
11605                                 _lexer.next();
11606
11607                                 if (l == lex_double_slash)
11608                                 {
11609                                         n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11610                                         if (!n) return 0;
11611                                 }
11612
11613                                 n = parse_step(n);
11614                                 if (!n) return 0;
11615                         }
11616
11617                         return n;
11618                 }
11619
11620                 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11621                 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11622                 xpath_ast_node* parse_location_path()
11623                 {
11624                         if (_lexer.current() == lex_slash)
11625                         {
11626                                 _lexer.next();
11627
11628                                 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11629                                 if (!n) return 0;
11630
11631                                 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11632                                 lexeme_t l = _lexer.current();
11633
11634                                 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11635                                         return parse_relative_location_path(n);
11636                                 else
11637                                         return n;
11638                         }
11639                         else if (_lexer.current() == lex_double_slash)
11640                         {
11641                                 _lexer.next();
11642
11643                                 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11644                                 if (!n) return 0;
11645
11646                                 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11647                                 if (!n) return 0;
11648
11649                                 return parse_relative_location_path(n);
11650                         }
11651
11652                         // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11653                         return parse_relative_location_path(0);
11654                 }
11655
11656                 // PathExpr ::= LocationPath
11657                 //                              | FilterExpr
11658                 //                              | FilterExpr '/' RelativeLocationPath
11659                 //                              | FilterExpr '//' RelativeLocationPath
11660                 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11661                 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11662                 xpath_ast_node* parse_path_or_unary_expression()
11663                 {
11664                         // Clarification.
11665                         // PathExpr begins with either LocationPath or FilterExpr.
11666                         // FilterExpr begins with PrimaryExpr
11667                         // PrimaryExpr begins with '$' in case of it being a variable reference,
11668                         // '(' in case of it being an expression, string literal, number constant or
11669                         // function call.
11670                         if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11671                                 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11672                                 _lexer.current() == lex_string)
11673                         {
11674                                 if (_lexer.current() == lex_string)
11675                                 {
11676                                         // This is either a function call, or not - if not, we shall proceed with location path
11677                                         const char_t* state = _lexer.state();
11678
11679                                         while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11680
11681                                         if (*state != '(')
11682                                                 return parse_location_path();
11683
11684                                         // This looks like a function call; however this still can be a node-test. Check it.
11685                                         if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11686                                                 return parse_location_path();
11687                                 }
11688
11689                                 xpath_ast_node* n = parse_filter_expression();
11690                                 if (!n) return 0;
11691
11692                                 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11693                                 {
11694                                         lexeme_t l = _lexer.current();
11695                                         _lexer.next();
11696
11697                                         if (l == lex_double_slash)
11698                                         {
11699                                                 if (n->rettype() != xpath_type_node_set)
11700                                                         return error("Step has to be applied to node set");
11701
11702                                                 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11703                                                 if (!n) return 0;
11704                                         }
11705
11706                                         // select from location path
11707                                         return parse_relative_location_path(n);
11708                                 }
11709
11710                                 return n;
11711                         }
11712                         else if (_lexer.current() == lex_minus)
11713                         {
11714                                 _lexer.next();
11715
11716                                 // precedence 7+ - only parses union expressions
11717                                 xpath_ast_node* n = parse_expression(7);
11718                                 if (!n) return 0;
11719
11720                                 return alloc_node(ast_op_negate, xpath_type_number, n);
11721                         }
11722                         else
11723                         {
11724                                 return parse_location_path();
11725                         }
11726                 }
11727
11728                 struct binary_op_t
11729                 {
11730                         ast_type_t asttype;
11731                         xpath_value_type rettype;
11732                         int precedence;
11733
11734                         binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11735                         {
11736                         }
11737
11738                         binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11739                         {
11740                         }
11741
11742                         static binary_op_t parse(xpath_lexer& lexer)
11743                         {
11744                                 switch (lexer.current())
11745                                 {
11746                                 case lex_string:
11747                                         if (lexer.contents() == PUGIXML_TEXT("or"))
11748                                                 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11749                                         else if (lexer.contents() == PUGIXML_TEXT("and"))
11750                                                 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11751                                         else if (lexer.contents() == PUGIXML_TEXT("div"))
11752                                                 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11753                                         else if (lexer.contents() == PUGIXML_TEXT("mod"))
11754                                                 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11755                                         else
11756                                                 return binary_op_t();
11757
11758                                 case lex_equal:
11759                                         return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11760
11761                                 case lex_not_equal:
11762                                         return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11763
11764                                 case lex_less:
11765                                         return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11766
11767                                 case lex_greater:
11768                                         return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11769
11770                                 case lex_less_or_equal:
11771                                         return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11772
11773                                 case lex_greater_or_equal:
11774                                         return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11775
11776                                 case lex_plus:
11777                                         return binary_op_t(ast_op_add, xpath_type_number, 5);
11778
11779                                 case lex_minus:
11780                                         return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11781
11782                                 case lex_multiply:
11783                                         return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11784
11785                                 case lex_union:
11786                                         return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11787
11788                                 default:
11789                                         return binary_op_t();
11790                                 }
11791                         }
11792                 };
11793
11794                 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11795                 {
11796                         binary_op_t op = binary_op_t::parse(_lexer);
11797
11798                         while (op.asttype != ast_unknown && op.precedence >= limit)
11799                         {
11800                                 _lexer.next();
11801
11802                                 xpath_ast_node* rhs = parse_path_or_unary_expression();
11803                                 if (!rhs) return 0;
11804
11805                                 binary_op_t nextop = binary_op_t::parse(_lexer);
11806
11807                                 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11808                                 {
11809                                         rhs = parse_expression_rec(rhs, nextop.precedence);
11810                                         if (!rhs) return 0;
11811
11812                                         nextop = binary_op_t::parse(_lexer);
11813                                 }
11814
11815                                 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11816                                         return error("Union operator has to be applied to node sets");
11817
11818                                 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
11819                                 if (!lhs) return 0;
11820
11821                                 op = binary_op_t::parse(_lexer);
11822                         }
11823
11824                         return lhs;
11825                 }
11826
11827                 // Expr ::= OrExpr
11828                 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11829                 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11830                 // EqualityExpr ::= RelationalExpr
11831                 //                                      | EqualityExpr '=' RelationalExpr
11832                 //                                      | EqualityExpr '!=' RelationalExpr
11833                 // RelationalExpr ::= AdditiveExpr
11834                 //                                        | RelationalExpr '<' AdditiveExpr
11835                 //                                        | RelationalExpr '>' AdditiveExpr
11836                 //                                        | RelationalExpr '<=' AdditiveExpr
11837                 //                                        | RelationalExpr '>=' AdditiveExpr
11838                 // AdditiveExpr ::= MultiplicativeExpr
11839                 //                                      | AdditiveExpr '+' MultiplicativeExpr
11840                 //                                      | AdditiveExpr '-' MultiplicativeExpr
11841                 // MultiplicativeExpr ::= UnaryExpr
11842                 //                                                | MultiplicativeExpr '*' UnaryExpr
11843                 //                                                | MultiplicativeExpr 'div' UnaryExpr
11844                 //                                                | MultiplicativeExpr 'mod' UnaryExpr
11845                 xpath_ast_node* parse_expression(int limit = 0)
11846                 {
11847                         xpath_ast_node* n = parse_path_or_unary_expression();
11848                         if (!n) return 0;
11849
11850                         return parse_expression_rec(n, limit);
11851                 }
11852
11853                 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11854                 {
11855                 }
11856
11857                 xpath_ast_node* parse()
11858                 {
11859                         xpath_ast_node* n = parse_expression();
11860                         if (!n) return 0;
11861
11862                         // check if there are unparsed tokens left
11863                         if (_lexer.current() != lex_eof)
11864                                 return error("Incorrect query");
11865
11866                         return n;
11867                 }
11868
11869                 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11870                 {
11871                         xpath_parser parser(query, variables, alloc, result);
11872
11873                         return parser.parse();
11874                 }
11875         };
11876
11877         struct xpath_query_impl
11878         {
11879                 static xpath_query_impl* create()
11880                 {
11881                         void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11882                         if (!memory) return 0;
11883
11884                         return new (memory) xpath_query_impl();
11885                 }
11886
11887                 static void destroy(xpath_query_impl* impl)
11888                 {
11889                         // free all allocated pages
11890                         impl->alloc.release();
11891
11892                         // free allocator memory (with the first page)
11893                         xml_memory::deallocate(impl);
11894                 }
11895
11896                 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
11897                 {
11898                         block.next = 0;
11899                         block.capacity = sizeof(block.data);
11900                 }
11901
11902                 xpath_ast_node* root;
11903                 xpath_allocator alloc;
11904                 xpath_memory_block block;
11905                 bool oom;
11906         };
11907
11908         PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11909         {
11910                 if (!impl) return 0;
11911
11912                 if (impl->root->rettype() != xpath_type_node_set)
11913                 {
11914                 #ifdef PUGIXML_NO_EXCEPTIONS
11915                         return 0;
11916                 #else
11917                         xpath_parse_result res;
11918                         res.error = "Expression does not evaluate to node set";
11919
11920                         throw xpath_exception(res);
11921                 #endif
11922                 }
11923
11924                 return impl->root;
11925         }
11926 PUGI__NS_END
11927
11928 namespace pugi
11929 {
11930 #ifndef PUGIXML_NO_EXCEPTIONS
11931         PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11932         {
11933                 assert(_result.error);
11934         }
11935
11936         PUGI__FN const char* xpath_exception::what() const throw()
11937         {
11938                 return _result.error;
11939         }
11940
11941         PUGI__FN const xpath_parse_result& xpath_exception::result() const
11942         {
11943                 return _result;
11944         }
11945 #endif
11946
11947         PUGI__FN xpath_node::xpath_node()
11948         {
11949         }
11950
11951         PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11952         {
11953         }
11954
11955         PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11956         {
11957         }
11958
11959         PUGI__FN xml_node xpath_node::node() const
11960         {
11961                 return _attribute ? xml_node() : _node;
11962         }
11963
11964         PUGI__FN xml_attribute xpath_node::attribute() const
11965         {
11966                 return _attribute;
11967         }
11968
11969         PUGI__FN xml_node xpath_node::parent() const
11970         {
11971                 return _attribute ? _node : _node.parent();
11972         }
11973
11974         PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11975         {
11976         }
11977
11978         PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11979         {
11980                 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11981         }
11982
11983         PUGI__FN bool xpath_node::operator!() const
11984         {
11985                 return !(_node || _attribute);
11986         }
11987
11988         PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11989         {
11990                 return _node == n._node && _attribute == n._attribute;
11991         }
11992
11993         PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11994         {
11995                 return _node != n._node || _attribute != n._attribute;
11996         }
11997
11998 #ifdef __BORLANDC__
11999         PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12000         {
12001                 return (bool)lhs && rhs;
12002         }
12003
12004         PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12005         {
12006                 return (bool)lhs || rhs;
12007         }
12008 #endif
12009
12010         PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12011         {
12012                 assert(begin_ <= end_);
12013
12014                 size_t size_ = static_cast<size_t>(end_ - begin_);
12015
12016                 if (size_ <= 1)
12017                 {
12018                         // deallocate old buffer
12019                         if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
12020
12021                         // use internal buffer
12022                         if (begin_ != end_) _storage = *begin_;
12023
12024                         _begin = &_storage;
12025                         _end = &_storage + size_;
12026                         _type = type_;
12027                 }
12028                 else
12029                 {
12030                         // make heap copy
12031                         xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12032
12033                         if (!storage)
12034                         {
12035                         #ifdef PUGIXML_NO_EXCEPTIONS
12036                                 return;
12037                         #else
12038                                 throw std::bad_alloc();
12039                         #endif
12040                         }
12041
12042                         memcpy(storage, begin_, size_ * sizeof(xpath_node));
12043
12044                         // deallocate old buffer
12045                         if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
12046
12047                         // finalize
12048                         _begin = storage;
12049                         _end = storage + size_;
12050                         _type = type_;
12051                 }
12052         }
12053
12054 #ifdef PUGIXML_HAS_MOVE
12055         PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12056         {
12057                 _type = rhs._type;
12058                 _storage = rhs._storage;
12059                 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
12060                 _end = _begin + (rhs._end - rhs._begin);
12061
12062                 rhs._type = type_unsorted;
12063                 rhs._begin = &rhs._storage;
12064                 rhs._end = rhs._begin;
12065         }
12066 #endif
12067
12068         PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12069         {
12070         }
12071
12072         PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12073         {
12074                 _assign(begin_, end_, type_);
12075         }
12076
12077         PUGI__FN xpath_node_set::~xpath_node_set()
12078         {
12079                 if (_begin != &_storage)
12080                         impl::xml_memory::deallocate(_begin);
12081         }
12082
12083         PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12084         {
12085                 _assign(ns._begin, ns._end, ns._type);
12086         }
12087
12088         PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12089         {
12090                 if (this == &ns) return *this;
12091
12092                 _assign(ns._begin, ns._end, ns._type);
12093
12094                 return *this;
12095         }
12096
12097 #ifdef PUGIXML_HAS_MOVE
12098         PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(&_storage), _end(&_storage)
12099         {
12100                 _move(rhs);
12101         }
12102
12103         PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12104         {
12105                 if (this == &rhs) return *this;
12106
12107                 if (_begin != &_storage)
12108                         impl::xml_memory::deallocate(_begin);
12109
12110                 _move(rhs);
12111
12112                 return *this;
12113         }
12114 #endif
12115
12116         PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12117         {
12118                 return _type;
12119         }
12120
12121         PUGI__FN size_t xpath_node_set::size() const
12122         {
12123                 return _end - _begin;
12124         }
12125
12126         PUGI__FN bool xpath_node_set::empty() const
12127         {
12128                 return _begin == _end;
12129         }
12130
12131         PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12132         {
12133                 assert(index < size());
12134                 return _begin[index];
12135         }
12136
12137         PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12138         {
12139                 return _begin;
12140         }
12141
12142         PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12143         {
12144                 return _end;
12145         }
12146
12147         PUGI__FN void xpath_node_set::sort(bool reverse)
12148         {
12149                 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12150         }
12151
12152         PUGI__FN xpath_node xpath_node_set::first() const
12153         {
12154                 return impl::xpath_first(_begin, _end, _type);
12155         }
12156
12157         PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12158         {
12159         }
12160
12161         PUGI__FN xpath_parse_result::operator bool() const
12162         {
12163                 return error == 0;
12164         }
12165
12166         PUGI__FN const char* xpath_parse_result::description() const
12167         {
12168                 return error ? error : "No error";
12169         }
12170
12171         PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12172         {
12173         }
12174
12175         PUGI__FN const char_t* xpath_variable::name() const
12176         {
12177                 switch (_type)
12178                 {
12179                 case xpath_type_node_set:
12180                         return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12181
12182                 case xpath_type_number:
12183                         return static_cast<const impl::xpath_variable_number*>(this)->name;
12184
12185                 case xpath_type_string:
12186                         return static_cast<const impl::xpath_variable_string*>(this)->name;
12187
12188                 case xpath_type_boolean:
12189                         return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12190
12191                 default:
12192                         assert(false && "Invalid variable type"); // unreachable
12193                         return 0;
12194                 }
12195         }
12196
12197         PUGI__FN xpath_value_type xpath_variable::type() const
12198         {
12199                 return _type;
12200         }
12201
12202         PUGI__FN bool xpath_variable::get_boolean() const
12203         {
12204                 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12205         }
12206
12207         PUGI__FN double xpath_variable::get_number() const
12208         {
12209                 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12210         }
12211
12212         PUGI__FN const char_t* xpath_variable::get_string() const
12213         {
12214                 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12215                 return value ? value : PUGIXML_TEXT("");
12216         }
12217
12218         PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12219         {
12220                 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12221         }
12222
12223         PUGI__FN bool xpath_variable::set(bool value)
12224         {
12225                 if (_type != xpath_type_boolean) return false;
12226
12227                 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12228                 return true;
12229         }
12230
12231         PUGI__FN bool xpath_variable::set(double value)
12232         {
12233                 if (_type != xpath_type_number) return false;
12234
12235                 static_cast<impl::xpath_variable_number*>(this)->value = value;
12236                 return true;
12237         }
12238
12239         PUGI__FN bool xpath_variable::set(const char_t* value)
12240         {
12241                 if (_type != xpath_type_string) return false;
12242
12243                 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12244
12245                 // duplicate string
12246                 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12247
12248                 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12249                 if (!copy) return false;
12250
12251                 memcpy(copy, value, size);
12252
12253                 // replace old string
12254                 if (var->value) impl::xml_memory::deallocate(var->value);
12255                 var->value = copy;
12256
12257                 return true;
12258         }
12259
12260         PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12261         {
12262                 if (_type != xpath_type_node_set) return false;
12263
12264                 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12265                 return true;
12266         }
12267
12268         PUGI__FN xpath_variable_set::xpath_variable_set()
12269         {
12270                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12271                         _data[i] = 0;
12272         }
12273
12274         PUGI__FN xpath_variable_set::~xpath_variable_set()
12275         {
12276                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12277                         _destroy(_data[i]);
12278         }
12279
12280         PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12281         {
12282                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12283                         _data[i] = 0;
12284
12285                 _assign(rhs);
12286         }
12287
12288         PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12289         {
12290                 if (this == &rhs) return *this;
12291
12292                 _assign(rhs);
12293
12294                 return *this;
12295         }
12296
12297 #ifdef PUGIXML_HAS_MOVE
12298         PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12299         {
12300                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12301                 {
12302                         _data[i] = rhs._data[i];
12303                         rhs._data[i] = 0;
12304                 }
12305         }
12306
12307         PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12308         {
12309                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12310                 {
12311                         _destroy(_data[i]);
12312
12313                         _data[i] = rhs._data[i];
12314                         rhs._data[i] = 0;
12315                 }
12316
12317                 return *this;
12318         }
12319 #endif
12320
12321         PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12322         {
12323                 xpath_variable_set temp;
12324
12325                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12326                         if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12327                                 return;
12328
12329                 _swap(temp);
12330         }
12331
12332         PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12333         {
12334                 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12335                 {
12336                         xpath_variable* chain = _data[i];
12337
12338                         _data[i] = rhs._data[i];
12339                         rhs._data[i] = chain;
12340                 }
12341         }
12342
12343         PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12344         {
12345                 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12346                 size_t hash = impl::hash_string(name) % hash_size;
12347
12348                 // look for existing variable
12349                 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12350                         if (impl::strequal(var->name(), name))
12351                                 return var;
12352
12353                 return 0;
12354         }
12355
12356         PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12357         {
12358                 xpath_variable* last = 0;
12359
12360                 while (var)
12361                 {
12362                         // allocate storage for new variable
12363                         xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12364                         if (!nvar) return false;
12365
12366                         // link the variable to the result immediately to handle failures gracefully
12367                         if (last)
12368                                 last->_next = nvar;
12369                         else
12370                                 *out_result = nvar;
12371
12372                         last = nvar;
12373
12374                         // copy the value; this can fail due to out-of-memory conditions
12375                         if (!impl::copy_xpath_variable(nvar, var)) return false;
12376
12377                         var = var->_next;
12378                 }
12379
12380                 return true;
12381         }
12382
12383         PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12384         {
12385                 while (var)
12386                 {
12387                         xpath_variable* next = var->_next;
12388
12389                         impl::delete_xpath_variable(var->_type, var);
12390
12391                         var = next;
12392                 }
12393         }
12394
12395         PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12396         {
12397                 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12398                 size_t hash = impl::hash_string(name) % hash_size;
12399
12400                 // look for existing variable
12401                 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12402                         if (impl::strequal(var->name(), name))
12403                                 return var->type() == type ? var : 0;
12404
12405                 // add new variable
12406                 xpath_variable* result = impl::new_xpath_variable(type, name);
12407
12408                 if (result)
12409                 {
12410                         result->_next = _data[hash];
12411
12412                         _data[hash] = result;
12413                 }
12414
12415                 return result;
12416         }
12417
12418         PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12419         {
12420                 xpath_variable* var = add(name, xpath_type_boolean);
12421                 return var ? var->set(value) : false;
12422         }
12423
12424         PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12425         {
12426                 xpath_variable* var = add(name, xpath_type_number);
12427                 return var ? var->set(value) : false;
12428         }
12429
12430         PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12431         {
12432                 xpath_variable* var = add(name, xpath_type_string);
12433                 return var ? var->set(value) : false;
12434         }
12435
12436         PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12437         {
12438                 xpath_variable* var = add(name, xpath_type_node_set);
12439                 return var ? var->set(value) : false;
12440         }
12441
12442         PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12443         {
12444                 return _find(name);
12445         }
12446
12447         PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12448         {
12449                 return _find(name);
12450         }
12451
12452         PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12453         {
12454                 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12455
12456                 if (!qimpl)
12457                 {
12458                 #ifdef PUGIXML_NO_EXCEPTIONS
12459                         _result.error = "Out of memory";
12460                 #else
12461                         throw std::bad_alloc();
12462                 #endif
12463                 }
12464                 else
12465                 {
12466                         using impl::auto_deleter; // MSVC7 workaround
12467                         auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12468
12469                         qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12470
12471                         if (qimpl->root)
12472                         {
12473                                 qimpl->root->optimize(&qimpl->alloc);
12474
12475                                 _impl = impl.release();
12476                                 _result.error = 0;
12477                         }
12478                         else
12479                         {
12480                         #ifdef PUGIXML_NO_EXCEPTIONS
12481                                 if (qimpl->oom) _result.error = "Out of memory";
12482                         #else
12483                                 if (qimpl->oom) throw std::bad_alloc();
12484                                 throw xpath_exception(_result);
12485                         #endif
12486                         }
12487                 }
12488         }
12489
12490         PUGI__FN xpath_query::xpath_query(): _impl(0)
12491         {
12492         }
12493
12494         PUGI__FN xpath_query::~xpath_query()
12495         {
12496                 if (_impl)
12497                         impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12498         }
12499
12500 #ifdef PUGIXML_HAS_MOVE
12501         PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12502         {
12503                 _impl = rhs._impl;
12504                 _result = rhs._result;
12505                 rhs._impl = 0;
12506                 rhs._result = xpath_parse_result();
12507         }
12508
12509         PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12510         {
12511                 if (this == &rhs) return *this;
12512
12513                 if (_impl)
12514                         impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12515
12516                 _impl = rhs._impl;
12517                 _result = rhs._result;
12518                 rhs._impl = 0;
12519                 rhs._result = xpath_parse_result();
12520
12521                 return *this;
12522         }
12523 #endif
12524
12525         PUGI__FN xpath_value_type xpath_query::return_type() const
12526         {
12527                 if (!_impl) return xpath_type_none;
12528
12529                 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12530         }
12531
12532         PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12533         {
12534                 if (!_impl) return false;
12535
12536                 impl::xpath_context c(n, 1, 1);
12537                 impl::xpath_stack_data sd;
12538
12539                 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12540
12541                 if (sd.oom)
12542                 {
12543                 #ifdef PUGIXML_NO_EXCEPTIONS
12544                         return false;
12545                 #else
12546                         throw std::bad_alloc();
12547                 #endif
12548                 }
12549
12550                 return r;
12551         }
12552
12553         PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12554         {
12555                 if (!_impl) return impl::gen_nan();
12556
12557                 impl::xpath_context c(n, 1, 1);
12558                 impl::xpath_stack_data sd;
12559
12560                 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12561
12562                 if (sd.oom)
12563                 {
12564                 #ifdef PUGIXML_NO_EXCEPTIONS
12565                         return impl::gen_nan();
12566                 #else
12567                         throw std::bad_alloc();
12568                 #endif
12569                 }
12570
12571                 return r;
12572         }
12573
12574 #ifndef PUGIXML_NO_STL
12575         PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12576         {
12577                 if (!_impl) return string_t();
12578
12579                 impl::xpath_context c(n, 1, 1);
12580                 impl::xpath_stack_data sd;
12581
12582                 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12583
12584                 if (sd.oom)
12585                 {
12586                 #ifdef PUGIXML_NO_EXCEPTIONS
12587                         return string_t();
12588                 #else
12589                         throw std::bad_alloc();
12590                 #endif
12591                 }
12592
12593                 return string_t(r.c_str(), r.length());
12594         }
12595 #endif
12596
12597         PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12598         {
12599                 impl::xpath_context c(n, 1, 1);
12600                 impl::xpath_stack_data sd;
12601
12602                 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12603
12604                 if (sd.oom)
12605                 {
12606                 #ifdef PUGIXML_NO_EXCEPTIONS
12607                         r = impl::xpath_string();
12608                 #else
12609                         throw std::bad_alloc();
12610                 #endif
12611                 }
12612
12613                 size_t full_size = r.length() + 1;
12614
12615                 if (capacity > 0)
12616                 {
12617                         size_t size = (full_size < capacity) ? full_size : capacity;
12618                         assert(size > 0);
12619
12620                         memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12621                         buffer[size - 1] = 0;
12622                 }
12623
12624                 return full_size;
12625         }
12626
12627         PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12628         {
12629                 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12630                 if (!root) return xpath_node_set();
12631
12632                 impl::xpath_context c(n, 1, 1);
12633                 impl::xpath_stack_data sd;
12634
12635                 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12636
12637                 if (sd.oom)
12638                 {
12639                 #ifdef PUGIXML_NO_EXCEPTIONS
12640                         return xpath_node_set();
12641                 #else
12642                         throw std::bad_alloc();
12643                 #endif
12644                 }
12645
12646                 return xpath_node_set(r.begin(), r.end(), r.type());
12647         }
12648
12649         PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12650         {
12651                 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12652                 if (!root) return xpath_node();
12653
12654                 impl::xpath_context c(n, 1, 1);
12655                 impl::xpath_stack_data sd;
12656
12657                 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12658
12659                 if (sd.oom)
12660                 {
12661                 #ifdef PUGIXML_NO_EXCEPTIONS
12662                         return xpath_node();
12663                 #else
12664                         throw std::bad_alloc();
12665                 #endif
12666                 }
12667
12668                 return r.first();
12669         }
12670
12671         PUGI__FN const xpath_parse_result& xpath_query::result() const
12672         {
12673                 return _result;
12674         }
12675
12676         PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12677         {
12678         }
12679
12680         PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12681         {
12682                 return _impl ? unspecified_bool_xpath_query : 0;
12683         }
12684
12685         PUGI__FN bool xpath_query::operator!() const
12686         {
12687                 return !_impl;
12688         }
12689
12690         PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12691         {
12692                 xpath_query q(query, variables);
12693                 return q.evaluate_node(*this);
12694         }
12695
12696         PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12697         {
12698                 return query.evaluate_node(*this);
12699         }
12700
12701         PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12702         {
12703                 xpath_query q(query, variables);
12704                 return q.evaluate_node_set(*this);
12705         }
12706
12707         PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12708         {
12709                 return query.evaluate_node_set(*this);
12710         }
12711
12712         PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12713         {
12714                 xpath_query q(query, variables);
12715                 return q.evaluate_node(*this);
12716         }
12717
12718         PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12719         {
12720                 return query.evaluate_node(*this);
12721         }
12722 }
12723
12724 #endif
12725
12726 #ifdef __BORLANDC__
12727 #       pragma option pop
12728 #endif
12729
12730 // Intel C++ does not properly keep warning state for function templates,
12731 // so popping warning state at the end of translation unit leads to warnings in the middle.
12732 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12733 #       pragma warning(pop)
12734 #endif
12735
12736 #if defined(_MSC_VER) && defined(__c2__)
12737 #       pragma clang diagnostic pop
12738 #endif
12739
12740 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12741 #undef PUGI__NO_INLINE
12742 #undef PUGI__UNLIKELY
12743 #undef PUGI__STATIC_ASSERT
12744 #undef PUGI__DMC_VOLATILE
12745 #undef PUGI__UNSIGNED_OVERFLOW
12746 #undef PUGI__MSVC_CRT_VERSION
12747 #undef PUGI__SNPRINTF
12748 #undef PUGI__NS_BEGIN
12749 #undef PUGI__NS_END
12750 #undef PUGI__FN
12751 #undef PUGI__FN_NO_INLINE
12752 #undef PUGI__GETHEADER_IMPL
12753 #undef PUGI__GETPAGE_IMPL
12754 #undef PUGI__GETPAGE
12755 #undef PUGI__NODETYPE
12756 #undef PUGI__IS_CHARTYPE_IMPL
12757 #undef PUGI__IS_CHARTYPE
12758 #undef PUGI__IS_CHARTYPEX
12759 #undef PUGI__ENDSWITH
12760 #undef PUGI__SKIPWS
12761 #undef PUGI__OPTSET
12762 #undef PUGI__PUSHNODE
12763 #undef PUGI__POPNODE
12764 #undef PUGI__SCANFOR
12765 #undef PUGI__SCANWHILE
12766 #undef PUGI__SCANWHILE_UNROLL
12767 #undef PUGI__ENDSEG
12768 #undef PUGI__THROW_ERROR
12769 #undef PUGI__CHECK_ERROR
12770
12771 #endif
12772
12773 /**
12774  * Copyright (c) 2006-2018 Arseny Kapoulkine
12775  *
12776  * Permission is hereby granted, free of charge, to any person
12777  * obtaining a copy of this software and associated documentation
12778  * files (the "Software"), to deal in the Software without
12779  * restriction, including without limitation the rights to use,
12780  * copy, modify, merge, publish, distribute, sublicense, and/or sell
12781  * copies of the Software, and to permit persons to whom the
12782  * Software is furnished to do so, subject to the following
12783  * conditions:
12784  *
12785  * The above copyright notice and this permission notice shall be
12786  * included in all copies or substantial portions of the Software.
12787  *
12788  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12789  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12790  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12791  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12792  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12793  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12794  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12795  * OTHER DEALINGS IN THE SOFTWARE.
12796  */