2 * pugixml parser - version 1.9
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at http://pugixml.org/
7 * This library is distributed under the MIT License. See notice at the end
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
17 #include "pugixml.hpp"
25 #ifdef PUGIXML_WCHAR_MODE
29 #ifndef PUGIXML_NO_XPATH
34 #ifndef PUGIXML_NO_STL
44 # pragma warning(push)
45 # pragma warning(disable: 4127) // conditional expression is constant
46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47 # pragma warning(disable: 4702) // unreachable code
48 # pragma warning(disable: 4996) // this function or variable may be unsafe
51 #if defined(_MSC_VER) && defined(__c2__)
52 # pragma clang diagnostic push
53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
56 #ifdef __INTEL_COMPILER
57 # pragma warning(disable: 177) // function was declared but never referenced
58 # pragma warning(disable: 279) // controlling expression is constant
59 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
69 # pragma warn -8008 // condition is always false
70 # pragma warn -8066 // unreachable code
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 # pragma diag_suppress=178 // function was declared but never referenced
76 # pragma diag_suppress=237 // controlling expression is constant
79 #ifdef __TI_COMPILER_VERSION__
80 # pragma diag_suppress 179 // function was declared but never referenced
84 #if defined(_MSC_VER) && _MSC_VER >= 1300
85 # define PUGI__NO_INLINE __declspec(noinline)
86 #elif defined(__GNUC__)
87 # define PUGI__NO_INLINE __attribute__((noinline))
89 # define PUGI__NO_INLINE
92 // Branch weight controls
93 #if defined(__GNUC__) && !defined(__c2__)
94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
96 # define PUGI__UNLIKELY(cond) (cond)
99 // Simple static assertion
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
104 # define PUGI__DMC_VOLATILE volatile
106 # define PUGI__DMC_VOLATILE
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110 #if defined(__clang__) && defined(__has_attribute)
111 # if __has_attribute(no_sanitize)
112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
114 # define PUGI__UNSIGNED_OVERFLOW
117 # define PUGI__UNSIGNED_OVERFLOW
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129 # define LLONG_MIN (-LLONG_MAX - 1LL)
130 # define LLONG_MAX __LONG_LONG_MAX__
131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135 #if defined(_MSC_VER) && !defined(__S3E__)
136 # define PUGI__MSVC_CRT_VERSION _MSC_VER
139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
140 #if __cplusplus >= 201103
141 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
143 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
145 # define PUGI__SNPRINTF sprintf
148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
149 #ifdef PUGIXML_HEADER_ONLY
150 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
151 # define PUGI__NS_END } }
152 # define PUGI__FN inline
153 # define PUGI__FN_NO_INLINE inline
155 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
156 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
157 # define PUGI__NS_END } }
159 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
160 # define PUGI__NS_END } } }
163 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
170 # ifndef _UINTPTR_T_DEFINED
171 typedef size_t uintptr_t;
174 typedef unsigned __int8 uint8_t;
175 typedef unsigned __int16 uint16_t;
176 typedef unsigned __int32 uint32_t;
184 PUGI__FN void* default_allocate(size_t size)
189 PUGI__FN void default_deallocate(void* ptr)
194 template <typename T>
195 struct xml_memory_management_function_storage
197 static allocation_function allocate;
198 static deallocation_function deallocate;
201 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
202 // Without a template<> we'll get multiple definitions of the same static
203 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
204 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
206 typedef xml_memory_management_function_storage<int> xml_memory;
212 PUGI__FN size_t strlength(const char_t* s)
216 #ifdef PUGIXML_WCHAR_MODE
223 // Compare two strings
224 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
228 #ifdef PUGIXML_WCHAR_MODE
229 return wcscmp(src, dst) == 0;
231 return strcmp(src, dst) == 0;
235 // Compare lhs with [rhs_begin, rhs_end)
236 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
238 for (size_t i = 0; i < count; ++i)
239 if (lhs[i] != rhs[i])
242 return lhs[count] == 0;
245 // Get length of wide string, even if CRT lacks wide character support
246 PUGI__FN size_t strlength_wide(const wchar_t* s)
250 #ifdef PUGIXML_WCHAR_MODE
253 const wchar_t* end = s;
255 return static_cast<size_t>(end - s);
260 // auto_ptr-like object for exception recovery
262 template <typename T> struct auto_deleter
264 typedef void (*D)(T*);
269 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
275 if (data) deleter(data);
287 #ifdef PUGIXML_COMPACT
289 class compact_hash_table
292 compact_hash_table(): _items(0), _capacity(0), _count(0)
300 xml_memory::deallocate(_items);
307 void* find(const void* key)
309 if (_capacity == 0) return 0;
311 item_t* item = get_item(key);
313 assert(item->key == key || (item->key == 0 && item->value == 0));
318 void insert(const void* key, void* value)
320 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
322 item_t* item = get_item(key);
334 bool reserve(size_t extra = 16)
336 if (_count + extra >= _capacity - _capacity / 4)
337 return rehash(_count + extra);
354 bool rehash(size_t count);
356 item_t* get_item(const void* key)
359 assert(_capacity > 0);
361 size_t hashmod = _capacity - 1;
362 size_t bucket = hash(key) & hashmod;
364 for (size_t probe = 0; probe <= hashmod; ++probe)
366 item_t& probe_item = _items[bucket];
368 if (probe_item.key == key || probe_item.key == 0)
371 // hash collision, quadratic probing
372 bucket = (bucket + probe + 1) & hashmod;
375 assert(false && "Hash table is full"); // unreachable
379 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
381 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
383 // MurmurHash3 32-bit finalizer
394 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
396 size_t capacity = 32;
397 while (count >= capacity - capacity / 4)
400 compact_hash_table rt;
401 rt._capacity = capacity;
402 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
407 memset(rt._items, 0, sizeof(item_t) * capacity);
409 for (size_t i = 0; i < _capacity; ++i)
411 rt.insert(_items[i].key, _items[i].value);
414 xml_memory::deallocate(_items);
416 _capacity = capacity;
419 assert(_count == rt._count);
428 #ifdef PUGIXML_COMPACT
429 static const uintptr_t xml_memory_block_alignment = 4;
431 static const uintptr_t xml_memory_block_alignment = sizeof(void*);
434 // extra metadata bits
435 static const uintptr_t xml_memory_page_contents_shared_mask = 64;
436 static const uintptr_t xml_memory_page_name_allocated_mask = 32;
437 static const uintptr_t xml_memory_page_value_allocated_mask = 16;
438 static const uintptr_t xml_memory_page_type_mask = 15;
440 // combined masks for string uniqueness
441 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
442 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
444 #ifdef PUGIXML_COMPACT
445 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
446 #define PUGI__GETPAGE_IMPL(header) (header).get_page()
448 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
449 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
450 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
453 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
454 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
456 struct xml_allocator;
458 struct xml_memory_page
460 static xml_memory_page* construct(void* memory)
462 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
464 result->allocator = 0;
467 result->busy_size = 0;
468 result->freed_size = 0;
470 #ifdef PUGIXML_COMPACT
471 result->compact_string_base = 0;
472 result->compact_shared_parent = 0;
473 result->compact_page_marker = 0;
479 xml_allocator* allocator;
481 xml_memory_page* prev;
482 xml_memory_page* next;
487 #ifdef PUGIXML_COMPACT
488 char_t* compact_string_base;
489 void* compact_shared_parent;
490 uint32_t* compact_page_marker;
494 static const size_t xml_memory_page_size =
495 #ifdef PUGIXML_MEMORY_PAGE_SIZE
496 (PUGIXML_MEMORY_PAGE_SIZE)
500 - sizeof(xml_memory_page);
502 struct xml_memory_string_header
504 uint16_t page_offset; // offset from page->data
505 uint16_t full_size; // 0 if string occupies whole page
510 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
512 #ifdef PUGIXML_COMPACT
517 xml_memory_page* allocate_page(size_t data_size)
519 size_t size = sizeof(xml_memory_page) + data_size;
521 // allocate block with some alignment, leaving memory for worst-case padding
522 void* memory = xml_memory::allocate(size);
523 if (!memory) return 0;
525 // prepare page structure
526 xml_memory_page* page = xml_memory_page::construct(memory);
529 page->allocator = _root->allocator;
534 static void deallocate_page(xml_memory_page* page)
536 xml_memory::deallocate(page);
539 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
541 void* allocate_memory(size_t size, xml_memory_page*& out_page)
543 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
544 return allocate_memory_oob(size, out_page);
546 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
555 #ifdef PUGIXML_COMPACT
556 void* allocate_object(size_t size, xml_memory_page*& out_page)
558 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
559 if (!result) return 0;
562 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
564 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
567 uint32_t* marker = static_cast<uint32_t*>(result);
569 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
570 out_page->compact_page_marker = marker;
572 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
573 // this will make sure deallocate_memory correctly tracks the size
574 out_page->freed_size += sizeof(uint32_t);
580 // roll back uint32_t part
581 _busy_size -= sizeof(uint32_t);
587 void* allocate_object(size_t size, xml_memory_page*& out_page)
589 return allocate_memory(size, out_page);
593 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
595 if (page == _root) page->busy_size = _busy_size;
597 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
600 page->freed_size += size;
601 assert(page->freed_size <= page->busy_size);
603 if (page->freed_size == page->busy_size)
607 assert(_root == page);
609 // top page freed, just reset sizes
611 page->freed_size = 0;
613 #ifdef PUGIXML_COMPACT
614 // reset compact state to maximize efficiency
615 page->compact_string_base = 0;
616 page->compact_shared_parent = 0;
617 page->compact_page_marker = 0;
624 assert(_root != page);
627 // remove from the list
628 page->prev->next = page->next;
629 page->next->prev = page->prev;
632 deallocate_page(page);
637 char_t* allocate_string(size_t length)
639 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
641 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
643 // allocate memory for string and header block
644 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
646 // round size up to block alignment boundary
647 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
649 xml_memory_page* page;
650 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
652 if (!header) return 0;
655 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
657 assert(page_offset % xml_memory_block_alignment == 0);
658 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
659 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
661 // full_size == 0 for large strings that occupy the whole page
662 assert(full_size % xml_memory_block_alignment == 0);
663 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
664 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
666 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
667 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
668 return static_cast<char_t*>(static_cast<void*>(header + 1));
671 void deallocate_string(char_t* string)
673 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
674 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
677 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
681 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
682 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
684 // if full_size == 0 then this string occupies the whole page
685 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
687 deallocate_memory(header, full_size, page);
692 #ifdef PUGIXML_COMPACT
693 return _hash->reserve();
699 xml_memory_page* _root;
702 #ifdef PUGIXML_COMPACT
703 compact_hash_table* _hash;
707 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
709 const size_t large_allocation_threshold = xml_memory_page_size / 4;
711 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
716 if (size <= large_allocation_threshold)
718 _root->busy_size = _busy_size;
720 // insert page at the end of linked list
729 // insert page before the end of linked list, so that it is deleted as soon as possible
730 // the last page is not deleted even if it's empty (see deallocate_memory)
733 page->prev = _root->prev;
736 _root->prev->next = page;
739 page->busy_size = size;
742 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
746 #ifdef PUGIXML_COMPACT
748 static const uintptr_t compact_alignment_log2 = 2;
749 static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
754 compact_header(xml_memory_page* page, unsigned int flags)
756 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
758 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
759 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
761 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
762 _flags = static_cast<unsigned char>(flags);
765 void operator&=(uintptr_t mod)
767 _flags &= static_cast<unsigned char>(mod);
770 void operator|=(uintptr_t mod)
772 _flags |= static_cast<unsigned char>(mod);
775 uintptr_t operator&(uintptr_t mod) const
780 xml_memory_page* get_page() const
782 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
783 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
784 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
786 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
791 unsigned char _flags;
794 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
796 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
798 return header->get_page();
801 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
803 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
806 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
808 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
811 template <typename T, int header_offset, int start = -126> class compact_pointer
814 compact_pointer(): _data(0)
818 void operator=(const compact_pointer& rhs)
823 void operator=(T* value)
827 // value is guaranteed to be compact-aligned; 'this' is not
828 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
829 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
830 // compensate for arithmetic shift rounding for negative values
831 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
832 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
834 if (static_cast<uintptr_t>(offset) <= 253)
835 _data = static_cast<unsigned char>(offset + 1);
838 compact_set_value<header_offset>(this, value);
853 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
855 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
858 return compact_get_value<header_offset, T>(this);
864 T* operator->() const
873 template <typename T, int header_offset> class compact_pointer_parent
876 compact_pointer_parent(): _data(0)
880 void operator=(const compact_pointer_parent& rhs)
885 void operator=(T* value)
889 // value is guaranteed to be compact-aligned; 'this' is not
890 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
891 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
892 // compensate for arithmetic shift behavior for negative values
893 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
894 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
896 if (static_cast<uintptr_t>(offset) <= 65533)
898 _data = static_cast<unsigned short>(offset + 1);
902 xml_memory_page* page = compact_get_page(this, header_offset);
904 if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
905 page->compact_shared_parent = value;
907 if (page->compact_shared_parent == value)
913 compact_set_value<header_offset>(this, value);
931 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
933 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
935 else if (_data == 65534)
936 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
938 return compact_get_value<header_offset, T>(this);
944 T* operator->() const
953 template <int header_offset, int base_offset> class compact_string
956 compact_string(): _data(0)
960 void operator=(const compact_string& rhs)
965 void operator=(char_t* value)
969 xml_memory_page* page = compact_get_page(this, header_offset);
971 if (PUGI__UNLIKELY(page->compact_string_base == 0))
972 page->compact_string_base = value;
974 ptrdiff_t offset = value - page->compact_string_base;
976 if (static_cast<uintptr_t>(offset) < (65535 << 7))
978 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
979 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
983 *base = static_cast<uint16_t>((offset >> 7) + 1);
984 _data = static_cast<unsigned char>((offset & 127) + 1);
988 ptrdiff_t remainder = offset - ((*base - 1) << 7);
990 if (static_cast<uintptr_t>(remainder) <= 253)
992 _data = static_cast<unsigned char>(remainder + 1);
996 compact_set_value<header_offset>(this, value);
1004 compact_set_value<header_offset>(this, value);
1015 operator char_t*() const
1021 xml_memory_page* page = compact_get_page(this, header_offset);
1023 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1024 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1027 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1029 return page->compact_string_base + offset;
1033 return compact_get_value<header_offset, char_t>(this);
1041 unsigned char _data;
1046 #ifdef PUGIXML_COMPACT
1049 struct xml_attribute_struct
1051 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1053 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1056 impl::compact_header header;
1058 uint16_t namevalue_base;
1060 impl::compact_string<4, 2> name;
1061 impl::compact_string<5, 3> value;
1063 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1064 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1067 struct xml_node_struct
1069 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1071 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1074 impl::compact_header header;
1076 uint16_t namevalue_base;
1078 impl::compact_string<4, 2> name;
1079 impl::compact_string<5, 3> value;
1081 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1083 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1085 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1086 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1088 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1094 struct xml_attribute_struct
1096 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1098 header = PUGI__GETHEADER_IMPL(this, page, 0);
1106 xml_attribute_struct* prev_attribute_c;
1107 xml_attribute_struct* next_attribute;
1110 struct xml_node_struct
1112 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1114 header = PUGI__GETHEADER_IMPL(this, page, type);
1122 xml_node_struct* parent;
1124 xml_node_struct* first_child;
1126 xml_node_struct* prev_sibling_c;
1127 xml_node_struct* next_sibling;
1129 xml_attribute_struct* first_attribute;
1135 struct xml_extra_buffer
1138 xml_extra_buffer* next;
1141 struct xml_document_struct: public xml_node_struct, public xml_allocator
1143 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1147 const char_t* buffer;
1149 xml_extra_buffer* extra_buffers;
1151 #ifdef PUGIXML_COMPACT
1152 compact_hash_table hash;
1156 template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1160 return *PUGI__GETPAGE(object)->allocator;
1163 template <typename Object> inline xml_document_struct& get_document(const Object* object)
1167 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1171 // Low-level DOM operations
1173 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1175 xml_memory_page* page;
1176 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1177 if (!memory) return 0;
1179 return new (memory) xml_attribute_struct(page);
1182 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1184 xml_memory_page* page;
1185 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1186 if (!memory) return 0;
1188 return new (memory) xml_node_struct(page, type);
1191 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1193 if (a->header & impl::xml_memory_page_name_allocated_mask)
1194 alloc.deallocate_string(a->name);
1196 if (a->header & impl::xml_memory_page_value_allocated_mask)
1197 alloc.deallocate_string(a->value);
1199 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1202 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1204 if (n->header & impl::xml_memory_page_name_allocated_mask)
1205 alloc.deallocate_string(n->name);
1207 if (n->header & impl::xml_memory_page_value_allocated_mask)
1208 alloc.deallocate_string(n->value);
1210 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1212 xml_attribute_struct* next = attr->next_attribute;
1214 destroy_attribute(attr, alloc);
1219 for (xml_node_struct* child = n->first_child; child; )
1221 xml_node_struct* next = child->next_sibling;
1223 destroy_node(child, alloc);
1228 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1231 inline void append_node(xml_node_struct* child, xml_node_struct* node)
1233 child->parent = node;
1235 xml_node_struct* head = node->first_child;
1239 xml_node_struct* tail = head->prev_sibling_c;
1241 tail->next_sibling = child;
1242 child->prev_sibling_c = tail;
1243 head->prev_sibling_c = child;
1247 node->first_child = child;
1248 child->prev_sibling_c = child;
1252 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1254 child->parent = node;
1256 xml_node_struct* head = node->first_child;
1260 child->prev_sibling_c = head->prev_sibling_c;
1261 head->prev_sibling_c = child;
1264 child->prev_sibling_c = child;
1266 child->next_sibling = head;
1267 node->first_child = child;
1270 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1272 xml_node_struct* parent = node->parent;
1274 child->parent = parent;
1276 if (node->next_sibling)
1277 node->next_sibling->prev_sibling_c = child;
1279 parent->first_child->prev_sibling_c = child;
1281 child->next_sibling = node->next_sibling;
1282 child->prev_sibling_c = node;
1284 node->next_sibling = child;
1287 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1289 xml_node_struct* parent = node->parent;
1291 child->parent = parent;
1293 if (node->prev_sibling_c->next_sibling)
1294 node->prev_sibling_c->next_sibling = child;
1296 parent->first_child = child;
1298 child->prev_sibling_c = node->prev_sibling_c;
1299 child->next_sibling = node;
1301 node->prev_sibling_c = child;
1304 inline void remove_node(xml_node_struct* node)
1306 xml_node_struct* parent = node->parent;
1308 if (node->next_sibling)
1309 node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1311 parent->first_child->prev_sibling_c = node->prev_sibling_c;
1313 if (node->prev_sibling_c->next_sibling)
1314 node->prev_sibling_c->next_sibling = node->next_sibling;
1316 parent->first_child = node->next_sibling;
1319 node->prev_sibling_c = 0;
1320 node->next_sibling = 0;
1323 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1325 xml_attribute_struct* head = node->first_attribute;
1329 xml_attribute_struct* tail = head->prev_attribute_c;
1331 tail->next_attribute = attr;
1332 attr->prev_attribute_c = tail;
1333 head->prev_attribute_c = attr;
1337 node->first_attribute = attr;
1338 attr->prev_attribute_c = attr;
1342 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1344 xml_attribute_struct* head = node->first_attribute;
1348 attr->prev_attribute_c = head->prev_attribute_c;
1349 head->prev_attribute_c = attr;
1352 attr->prev_attribute_c = attr;
1354 attr->next_attribute = head;
1355 node->first_attribute = attr;
1358 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1360 if (place->next_attribute)
1361 place->next_attribute->prev_attribute_c = attr;
1363 node->first_attribute->prev_attribute_c = attr;
1365 attr->next_attribute = place->next_attribute;
1366 attr->prev_attribute_c = place;
1367 place->next_attribute = attr;
1370 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1372 if (place->prev_attribute_c->next_attribute)
1373 place->prev_attribute_c->next_attribute = attr;
1375 node->first_attribute = attr;
1377 attr->prev_attribute_c = place->prev_attribute_c;
1378 attr->next_attribute = place;
1379 place->prev_attribute_c = attr;
1382 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1384 if (attr->next_attribute)
1385 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1387 node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1389 if (attr->prev_attribute_c->next_attribute)
1390 attr->prev_attribute_c->next_attribute = attr->next_attribute;
1392 node->first_attribute = attr->next_attribute;
1394 attr->prev_attribute_c = 0;
1395 attr->next_attribute = 0;
1398 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1400 if (!alloc.reserve()) return 0;
1402 xml_node_struct* child = allocate_node(alloc, type);
1403 if (!child) return 0;
1405 append_node(child, node);
1410 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1412 if (!alloc.reserve()) return 0;
1414 xml_attribute_struct* attr = allocate_attribute(alloc);
1415 if (!attr) return 0;
1417 append_attribute(attr, node);
1423 // Helper classes for code generation
1436 // Unicode utilities
1438 inline uint16_t endian_swap(uint16_t value)
1440 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1443 inline uint32_t endian_swap(uint32_t value)
1445 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1450 typedef size_t value_type;
1452 static value_type low(value_type result, uint32_t ch)
1455 if (ch < 0x80) return result + 1;
1457 else if (ch < 0x800) return result + 2;
1459 else return result + 3;
1462 static value_type high(value_type result, uint32_t)
1464 // U+10000..U+10FFFF
1471 typedef uint8_t* value_type;
1473 static value_type low(value_type result, uint32_t ch)
1478 *result = static_cast<uint8_t>(ch);
1482 else if (ch < 0x800)
1484 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1485 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1491 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1492 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1493 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1498 static value_type high(value_type result, uint32_t ch)
1500 // U+10000..U+10FFFF
1501 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1502 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1503 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1504 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1508 static value_type any(value_type result, uint32_t ch)
1510 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1514 struct utf16_counter
1516 typedef size_t value_type;
1518 static value_type low(value_type result, uint32_t)
1523 static value_type high(value_type result, uint32_t)
1531 typedef uint16_t* value_type;
1533 static value_type low(value_type result, uint32_t ch)
1535 *result = static_cast<uint16_t>(ch);
1540 static value_type high(value_type result, uint32_t ch)
1542 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1543 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1545 result[0] = static_cast<uint16_t>(0xD800 + msh);
1546 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1551 static value_type any(value_type result, uint32_t ch)
1553 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1557 struct utf32_counter
1559 typedef size_t value_type;
1561 static value_type low(value_type result, uint32_t)
1566 static value_type high(value_type result, uint32_t)
1574 typedef uint32_t* value_type;
1576 static value_type low(value_type result, uint32_t ch)
1583 static value_type high(value_type result, uint32_t ch)
1590 static value_type any(value_type result, uint32_t ch)
1598 struct latin1_writer
1600 typedef uint8_t* value_type;
1602 static value_type low(value_type result, uint32_t ch)
1604 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1609 static value_type high(value_type result, uint32_t ch)
1621 typedef uint8_t type;
1623 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1625 const uint8_t utf8_byte_mask = 0x3f;
1629 uint8_t lead = *data;
1631 // 0xxxxxxx -> U+0000..U+007F
1634 result = Traits::low(result, lead);
1638 // process aligned single-byte (ascii) blocks
1639 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1641 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1642 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1644 result = Traits::low(result, data[0]);
1645 result = Traits::low(result, data[1]);
1646 result = Traits::low(result, data[2]);
1647 result = Traits::low(result, data[3]);
1653 // 110xxxxx -> U+0080..U+07FF
1654 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1656 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1660 // 1110xxxx -> U+0800-U+FFFF
1661 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1663 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1667 // 11110xxx -> U+10000..U+10FFFF
1668 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1670 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1674 // 10xxxxxx or 11111xxx -> invalid
1686 template <typename opt_swap> struct utf16_decoder
1688 typedef uint16_t type;
1690 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1694 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1699 result = Traits::low(result, lead);
1704 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1706 result = Traits::low(result, lead);
1710 // surrogate pair lead
1711 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1713 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1715 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1717 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1738 template <typename opt_swap> struct utf32_decoder
1740 typedef uint32_t type;
1742 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1746 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1751 result = Traits::low(result, lead);
1755 // U+10000..U+10FFFF
1758 result = Traits::high(result, lead);
1768 struct latin1_decoder
1770 typedef uint8_t type;
1772 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1776 result = Traits::low(result, *data);
1785 template <size_t size> struct wchar_selector;
1787 template <> struct wchar_selector<2>
1789 typedef uint16_t type;
1790 typedef utf16_counter counter;
1791 typedef utf16_writer writer;
1792 typedef utf16_decoder<opt_false> decoder;
1795 template <> struct wchar_selector<4>
1797 typedef uint32_t type;
1798 typedef utf32_counter counter;
1799 typedef utf32_writer writer;
1800 typedef utf32_decoder<opt_false> decoder;
1803 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1804 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1806 struct wchar_decoder
1808 typedef wchar_t type;
1810 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1812 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1814 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1818 #ifdef PUGIXML_WCHAR_MODE
1819 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1821 for (size_t i = 0; i < length; ++i)
1822 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1830 ct_parse_pcdata = 1, // \0, &, \r, <
1831 ct_parse_attr = 2, // \0, &, \r, ', "
1832 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1833 ct_space = 8, // \r, \n, space, tab
1834 ct_parse_cdata = 16, // \0, ], >, \r
1835 ct_parse_comment = 32, // \0, -, >, \r
1836 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1837 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1840 static const unsigned char chartype_table[256] =
1842 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1843 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1844 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1845 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1846 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1847 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1848 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1849 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1851 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1852 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1853 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1854 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1855 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1856 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1857 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1858 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1863 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1864 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1865 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1866 ctx_digit = 8, // 0-9
1867 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1870 static const unsigned char chartypex_table[256] =
1872 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1873 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1874 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1875 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1877 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1878 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1879 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1880 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1882 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1883 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1884 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1885 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1886 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1887 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1888 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1889 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1892 #ifdef PUGIXML_WCHAR_MODE
1893 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1895 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1898 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1899 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1901 PUGI__FN bool is_little_endian()
1903 unsigned int ui = 1;
1905 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1908 PUGI__FN xml_encoding get_wchar_encoding()
1910 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1912 if (sizeof(wchar_t) == 2)
1913 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1915 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1918 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1920 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1921 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1923 // check if we have a non-empty XML declaration
1924 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1927 // scan XML declaration until the encoding field
1928 for (size_t i = 6; i + 1 < size; ++i)
1930 // declaration can not contain ? in quoted values
1934 if (data[i] == 'e' && data[i + 1] == 'n')
1938 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1939 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1940 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1943 PUGI__SCANCHARTYPE(ct_space);
1944 PUGI__SCANCHAR('=');
1945 PUGI__SCANCHARTYPE(ct_space);
1947 // the only two valid delimiters are ' and "
1948 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1950 PUGI__SCANCHAR(delimiter);
1952 size_t start = offset;
1954 out_encoding = data + offset;
1956 PUGI__SCANCHARTYPE(ct_symbol);
1958 out_length = offset - start;
1960 PUGI__SCANCHAR(delimiter);
1968 #undef PUGI__SCANCHAR
1969 #undef PUGI__SCANCHARTYPE
1972 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1974 // skip encoding autodetection if input buffer is too small
1975 if (size < 4) return encoding_utf8;
1977 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1979 // look for BOM in first few bytes
1980 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1981 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1982 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1983 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1984 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1986 // look for <, <? or <?xm in various encodings
1987 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1988 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1989 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1990 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1992 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1993 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1994 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1996 // no known BOM detected; parse declaration
1997 const uint8_t* enc = 0;
1998 size_t enc_length = 0;
2000 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2002 // iso-8859-1 (case-insensitive)
2003 if (enc_length == 10
2004 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2005 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2006 && enc[8] == '-' && enc[9] == '1')
2007 return encoding_latin1;
2009 // latin1 (case-insensitive)
2011 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2012 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2014 return encoding_latin1;
2017 return encoding_utf8;
2020 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2022 // replace wchar encoding with utf implementation
2023 if (encoding == encoding_wchar) return get_wchar_encoding();
2025 // replace utf16 encoding with utf16 with specific endianness
2026 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2028 // replace utf32 encoding with utf32 with specific endianness
2029 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2031 // only do autodetection if no explicit encoding is requested
2032 if (encoding != encoding_auto) return encoding;
2034 // try to guess encoding (based on XML specification, Appendix F.1)
2035 const uint8_t* data = static_cast<const uint8_t*>(contents);
2037 return guess_buffer_encoding(data, size);
2040 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2042 size_t length = size / sizeof(char_t);
2046 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2047 out_length = length;
2051 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2052 if (!buffer) return false;
2055 memcpy(buffer, contents, length * sizeof(char_t));
2057 assert(length == 0);
2061 out_buffer = buffer;
2062 out_length = length + 1;
2068 #ifdef PUGIXML_WCHAR_MODE
2069 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2071 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2072 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2075 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2077 const char_t* data = static_cast<const char_t*>(contents);
2078 size_t length = size / sizeof(char_t);
2082 char_t* buffer = const_cast<char_t*>(data);
2084 convert_wchar_endian_swap(buffer, data, length);
2086 out_buffer = buffer;
2087 out_length = length;
2091 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2092 if (!buffer) return false;
2094 convert_wchar_endian_swap(buffer, data, length);
2097 out_buffer = buffer;
2098 out_length = length + 1;
2104 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2106 const typename D::type* data = static_cast<const typename D::type*>(contents);
2107 size_t data_length = size / sizeof(typename D::type);
2109 // first pass: get length in wchar_t units
2110 size_t length = D::process(data, data_length, 0, wchar_counter());
2112 // allocate buffer of suitable length
2113 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2114 if (!buffer) return false;
2116 // second pass: convert utf16 input to wchar_t
2117 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2118 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2120 assert(oend == obegin + length);
2123 out_buffer = buffer;
2124 out_length = length + 1;
2129 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2131 // get native encoding
2132 xml_encoding wchar_encoding = get_wchar_encoding();
2134 // fast path: no conversion required
2135 if (encoding == wchar_encoding)
2136 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2138 // only endian-swapping is required
2139 if (need_endian_swap_utf(encoding, wchar_encoding))
2140 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2142 // source encoding is utf8
2143 if (encoding == encoding_utf8)
2144 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2146 // source encoding is utf16
2147 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2149 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2151 return (native_encoding == encoding) ?
2152 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2153 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2156 // source encoding is utf32
2157 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2159 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2161 return (native_encoding == encoding) ?
2162 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2163 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2166 // source encoding is latin1
2167 if (encoding == encoding_latin1)
2168 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2170 assert(false && "Invalid encoding"); // unreachable
2174 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2176 const typename D::type* data = static_cast<const typename D::type*>(contents);
2177 size_t data_length = size / sizeof(typename D::type);
2179 // first pass: get length in utf8 units
2180 size_t length = D::process(data, data_length, 0, utf8_counter());
2182 // allocate buffer of suitable length
2183 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2184 if (!buffer) return false;
2186 // second pass: convert utf16 input to utf8
2187 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2188 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2190 assert(oend == obegin + length);
2193 out_buffer = buffer;
2194 out_length = length + 1;
2199 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2201 for (size_t i = 0; i < size; ++i)
2208 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2210 const uint8_t* data = static_cast<const uint8_t*>(contents);
2211 size_t data_length = size;
2213 // get size of prefix that does not need utf8 conversion
2214 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2215 assert(prefix_length <= data_length);
2217 const uint8_t* postfix = data + prefix_length;
2218 size_t postfix_length = data_length - prefix_length;
2220 // if no conversion is needed, just return the original buffer
2221 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2223 // first pass: get length in utf8 units
2224 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2226 // allocate buffer of suitable length
2227 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2228 if (!buffer) return false;
2230 // second pass: convert latin1 input to utf8
2231 memcpy(buffer, data, prefix_length);
2233 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2234 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2236 assert(oend == obegin + length);
2239 out_buffer = buffer;
2240 out_length = length + 1;
2245 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2247 // fast path: no conversion required
2248 if (encoding == encoding_utf8)
2249 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2251 // source encoding is utf16
2252 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2254 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2256 return (native_encoding == encoding) ?
2257 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2258 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2261 // source encoding is utf32
2262 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2264 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2266 return (native_encoding == encoding) ?
2267 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2268 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2271 // source encoding is latin1
2272 if (encoding == encoding_latin1)
2273 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2275 assert(false && "Invalid encoding"); // unreachable
2280 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2282 // get length in utf8 characters
2283 return wchar_decoder::process(str, length, 0, utf8_counter());
2286 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2289 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2290 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2292 assert(begin + size == end);
2297 #ifndef PUGIXML_NO_STL
2298 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2300 // first pass: get length in utf8 characters
2301 size_t size = as_utf8_begin(str, length);
2303 // allocate resulting string
2305 result.resize(size);
2307 // second pass: convert to utf8
2308 if (size > 0) as_utf8_end(&result[0], size, str, length);
2313 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2315 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2317 // first pass: get length in wchar_t units
2318 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2320 // allocate resulting string
2321 std::basic_string<wchar_t> result;
2322 result.resize(length);
2324 // second pass: convert to wchar_t
2327 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2328 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2330 assert(begin + length == end);
2338 template <typename Header>
2339 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2341 // never reuse shared memory
2342 if (header & xml_memory_page_contents_shared_mask) return false;
2344 size_t target_length = strlength(target);
2346 // always reuse document buffer memory if possible
2347 if ((header & header_mask) == 0) return target_length >= length;
2349 // reuse heap memory if waste is not too great
2350 const size_t reuse_threshold = 32;
2352 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2355 template <typename String, typename Header>
2356 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2358 if (source_length == 0)
2360 // empty string and null pointer are equivalent, so just deallocate old memory
2361 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2363 if (header & header_mask) alloc->deallocate_string(dest);
2365 // mark the string as not allocated
2367 header &= ~header_mask;
2371 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2373 // we can reuse old buffer, so just copy the new data (including zero terminator)
2374 memcpy(dest, source, source_length * sizeof(char_t));
2375 dest[source_length] = 0;
2381 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2383 if (!alloc->reserve()) return false;
2385 // allocate new buffer
2386 char_t* buf = alloc->allocate_string(source_length + 1);
2387 if (!buf) return false;
2389 // copy the string (including zero terminator)
2390 memcpy(buf, source, source_length * sizeof(char_t));
2391 buf[source_length] = 0;
2393 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2394 if (header & header_mask) alloc->deallocate_string(dest);
2396 // the string is now allocated, so set the flag
2398 header |= header_mask;
2409 gap(): end(0), size(0)
2413 // Push new gap, move s count bytes further (skipping the gap).
2414 // Collapse previous gap.
2415 void push(char_t*& s, size_t count)
2417 if (end) // there was a gap already; collapse it
2419 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2421 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2424 s += count; // end of current gap
2431 // Collapse all gaps, return past-the-end pointer
2432 char_t* flush(char_t* s)
2436 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2438 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2446 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2448 char_t* stre = s + 1;
2454 unsigned int ucsc = 0;
2456 if (stre[1] == 'x') // &#x... (hex code)
2462 if (ch == ';') return stre;
2466 if (static_cast<unsigned int>(ch - '0') <= 9)
2467 ucsc = 16 * ucsc + (ch - '0');
2468 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2469 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2480 else // &#... (dec code)
2482 char_t ch = *++stre;
2484 if (ch == ';') return stre;
2488 if (static_cast<unsigned int>(ch - '0') <= 9)
2489 ucsc = 10 * ucsc + (ch - '0');
2501 #ifdef PUGIXML_WCHAR_MODE
2502 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2504 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2507 g.push(s, stre - s);
2515 if (*stre == 'm') // &am
2517 if (*++stre == 'p' && *++stre == ';') // &
2522 g.push(s, stre - s);
2526 else if (*stre == 'p') // &ap
2528 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // '
2533 g.push(s, stre - s);
2542 if (*++stre == 't' && *++stre == ';') // >
2547 g.push(s, stre - s);
2555 if (*++stre == 't' && *++stre == ';') // <
2560 g.push(s, stre - s);
2568 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // "
2573 g.push(s, stre - s);
2587 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2588 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2589 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2590 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2591 #define PUGI__POPNODE() { cursor = cursor->parent; }
2592 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2593 #define PUGI__SCANWHILE(X) { while (X) ++s; }
2594 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2595 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2596 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2597 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2599 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2605 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2607 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2609 *s++ = '\n'; // replace first one with 0x0a
2611 if (*s == '\n') g.push(s, 1);
2613 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2617 return s + (s[2] == '>' ? 3 : 2);
2627 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2633 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2635 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2637 *s++ = '\n'; // replace first one with 0x0a
2639 if (*s == '\n') g.push(s, 1);
2641 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2655 typedef char_t* (*strconv_pcdata_t)(char_t*);
2657 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2659 static char_t* parse(char_t* s)
2667 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2669 if (*s == '<') // PCDATA ends here
2671 char_t* end = g.flush(s);
2673 if (opt_trim::value)
2674 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2681 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2683 *s++ = '\n'; // replace first one with 0x0a
2685 if (*s == '\n') g.push(s, 1);
2687 else if (opt_escape::value && *s == '&')
2689 s = strconv_escape(s, g);
2693 char_t* end = g.flush(s);
2695 if (opt_trim::value)
2696 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2708 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2710 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2712 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2714 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2715 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2716 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2717 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2718 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2719 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2720 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2721 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2722 default: assert(false); return 0; // unreachable
2726 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2728 template <typename opt_escape> struct strconv_attribute_impl
2730 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2734 // trim leading whitespaces
2735 if (PUGI__IS_CHARTYPE(*s, ct_space))
2740 while (PUGI__IS_CHARTYPE(*str, ct_space));
2747 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2749 if (*s == end_quote)
2751 char_t* str = g.flush(s);
2754 while (PUGI__IS_CHARTYPE(*str, ct_space));
2758 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2762 if (PUGI__IS_CHARTYPE(*s, ct_space))
2764 char_t* str = s + 1;
2765 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2770 else if (opt_escape::value && *s == '&')
2772 s = strconv_escape(s, g);
2782 static char_t* parse_wconv(char_t* s, char_t end_quote)
2788 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2790 if (*s == end_quote)
2796 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2802 if (*s == '\n') g.push(s, 1);
2806 else if (opt_escape::value && *s == '&')
2808 s = strconv_escape(s, g);
2818 static char_t* parse_eol(char_t* s, char_t end_quote)
2824 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2826 if (*s == end_quote)
2832 else if (*s == '\r')
2836 if (*s == '\n') g.push(s, 1);
2838 else if (opt_escape::value && *s == '&')
2840 s = strconv_escape(s, g);
2850 static char_t* parse_simple(char_t* s, char_t end_quote)
2856 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2858 if (*s == end_quote)
2864 else if (opt_escape::value && *s == '&')
2866 s = strconv_escape(s, g);
2877 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2879 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2881 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2883 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2884 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2885 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2886 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2887 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2888 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2889 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2890 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2891 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2892 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2893 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2894 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2895 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2896 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2897 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2898 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2899 default: assert(false); return 0; // unreachable
2903 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2905 xml_parse_result result;
2906 result.status = status;
2907 result.offset = offset;
2914 xml_allocator* alloc;
2915 char_t* error_offset;
2916 xml_parse_status error_status;
2918 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2922 // DOCTYPE consists of nested sections of the following possible types:
2923 // <!-- ... -->, <? ... ?>, "...", '...'
2926 // First group can not contain nested groups
2927 // Second group can contain nested groups of the same type
2928 // Third group can contain all other groups
2929 char_t* parse_doctype_primitive(char_t* s)
2931 if (*s == '"' || *s == '\'')
2935 PUGI__SCANFOR(*s == ch);
2936 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2940 else if (s[0] == '<' && s[1] == '?')
2944 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2945 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2949 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2952 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2953 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2957 else PUGI__THROW_ERROR(status_bad_doctype, s);
2962 char_t* parse_doctype_ignore(char_t* s)
2966 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2971 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2973 // nested ignore section
2977 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2979 // ignore section end
2990 PUGI__THROW_ERROR(status_bad_doctype, s);
2993 char_t* parse_doctype_group(char_t* s, char_t endch)
2997 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
3002 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3007 s = parse_doctype_ignore(s);
3012 // some control group
3017 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3019 // unknown tag (forbidden), or some primitive group
3020 s = parse_doctype_primitive(s);
3034 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3039 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3041 // parse node contents, starting with exclamation mark
3044 if (*s == '-') // '<!-...'
3048 if (*s == '-') // '<!--...'
3052 if (PUGI__OPTSET(parse_comments))
3054 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3055 cursor->value = s; // Save the offset.
3058 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3060 s = strconv_comment(s, endch);
3062 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3066 // Scan for terminating '-->'.
3067 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3068 PUGI__CHECK_ERROR(status_bad_comment, s);
3070 if (PUGI__OPTSET(parse_comments))
3071 *s = 0; // Zero-terminate this segment at the first terminating '-'.
3073 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3076 else PUGI__THROW_ERROR(status_bad_comment, s);
3081 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3085 if (PUGI__OPTSET(parse_cdata))
3087 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3088 cursor->value = s; // Save the offset.
3090 if (PUGI__OPTSET(parse_eol))
3092 s = strconv_cdata(s, endch);
3094 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3098 // Scan for terminating ']]>'.
3099 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3100 PUGI__CHECK_ERROR(status_bad_cdata, s);
3102 *s++ = 0; // Zero-terminate this segment.
3105 else // Flagged for discard, but we still have to scan for the terminator.
3107 // Scan for terminating ']]>'.
3108 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3109 PUGI__CHECK_ERROR(status_bad_cdata, s);
3114 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3116 else PUGI__THROW_ERROR(status_bad_cdata, s);
3118 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3122 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3124 char_t* mark = s + 9;
3126 s = parse_doctype_group(s, endch);
3129 assert((*s == 0 && endch == '>') || *s == '>');
3132 if (PUGI__OPTSET(parse_doctype))
3134 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3136 PUGI__PUSHNODE(node_doctype);
3138 cursor->value = mark;
3141 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3142 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3143 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3148 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3150 // load into registers
3151 xml_node_struct* cursor = ref_cursor;
3154 // parse node contents, starting with question mark
3160 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3162 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3163 PUGI__CHECK_ERROR(status_bad_pi, s);
3165 // determine node type; stricmp / strcasecmp is not portable
3166 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3168 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3172 // disallow non top-level declarations
3173 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3175 PUGI__PUSHNODE(node_declaration);
3179 PUGI__PUSHNODE(node_pi);
3182 cursor->name = target;
3186 // parse value/attributes
3190 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3195 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3202 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3203 PUGI__CHECK_ERROR(status_bad_pi, s);
3207 // replace ending ? with / so that 'element' terminates properly
3210 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3215 // store value and step over >
3216 cursor->value = value;
3225 else PUGI__THROW_ERROR(status_bad_pi, s);
3230 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3231 PUGI__CHECK_ERROR(status_bad_pi, s);
3233 s += (s[1] == '>' ? 2 : 1);
3236 // store from registers
3237 ref_cursor = cursor;
3242 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3244 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3245 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3248 xml_node_struct* cursor = root;
3258 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3260 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3264 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3265 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3271 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3276 PUGI__SKIPWS(); // Eat any whitespace.
3278 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3280 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3281 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3283 a->name = s; // Save the offset.
3285 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3286 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3288 if (PUGI__IS_CHARTYPE(ch, ct_space))
3290 PUGI__SKIPWS(); // Eat any whitespace.
3296 if (ch == '=') // '<... #=...'
3298 PUGI__SKIPWS(); // Eat any whitespace.
3300 if (*s == '"' || *s == '\'') // '<... #="...'
3302 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3303 ++s; // Step over the quote.
3304 a->value = s; // Save the offset.
3306 s = strconv_attribute(s, ch);
3308 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3310 // After this line the loop continues from the start;
3311 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3312 // everything else will be detected
3313 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3315 else PUGI__THROW_ERROR(status_bad_attribute, s);
3317 else PUGI__THROW_ERROR(status_bad_attribute, s);
3329 else if (*s == 0 && endch == '>')
3334 else PUGI__THROW_ERROR(status_bad_start_element, s);
3342 else if (*s == 0 && endch == '>')
3346 else PUGI__THROW_ERROR(status_bad_start_element, s);
3351 else if (ch == '/') // '<#.../'
3353 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3355 PUGI__POPNODE(); // Pop.
3361 // we stepped over null terminator, backtrack & handle closing tag
3364 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3366 else PUGI__THROW_ERROR(status_bad_start_element, s);
3374 char_t* name = cursor->name;
3375 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3377 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3379 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3384 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3385 else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3388 PUGI__POPNODE(); // Pop.
3394 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3398 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3402 else if (*s == '?') // '<?...'
3404 s = parse_question(s, cursor, optmsk, endch);
3408 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3410 else if (*s == '!') // '<!...'
3412 s = parse_exclamation(s, cursor, optmsk, endch);
3415 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3416 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3420 mark = s; // Save this offset while searching for a terminator.
3422 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3424 if (*s == '<' || !*s)
3426 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3429 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3433 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3435 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3439 if (!PUGI__OPTSET(parse_trim_pcdata))
3442 if (cursor->parent || PUGI__OPTSET(parse_fragment))
3444 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3446 cursor->value = s; // Save the offset.
3450 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3452 cursor->value = s; // Save the offset.
3454 PUGI__POPNODE(); // Pop since this is a standalone.
3457 s = strconv_pcdata(s);
3463 PUGI__SCANFOR(*s == '<'); // '...<'
3474 // check that last tag is closed
3475 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3480 #ifdef PUGIXML_WCHAR_MODE
3481 static char_t* parse_skip_bom(char_t* s)
3483 unsigned int bom = 0xfeff;
3484 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3487 static char_t* parse_skip_bom(char_t* s)
3489 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3493 static bool has_element_node_siblings(xml_node_struct* node)
3497 if (PUGI__NODETYPE(node) == node_element) return true;
3499 node = node->next_sibling;
3505 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3507 // early-out for empty documents
3509 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3511 // get last child of the root before parsing
3512 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3514 // create parser on stack
3515 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3517 // save last character and make buffer zero-terminated (speeds up parsing)
3518 char_t endch = buffer[length - 1];
3519 buffer[length - 1] = 0;
3521 // skip BOM to make sure it does not end up as part of parse output
3522 char_t* buffer_data = parse_skip_bom(buffer);
3524 // perform actual parsing
3525 parser.parse_tree(buffer_data, root, optmsk, endch);
3527 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3528 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3532 // since we removed last character, we have to handle the only possible false positive (stray <)
3534 return make_parse_result(status_unrecognized_tag, length - 1);
3536 // check if there are any element nodes parsed
3537 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3539 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3540 return make_parse_result(status_no_document_element, length - 1);
3544 // roll back offset if it occurs on a null terminator in the source buffer
3545 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3553 // Output facilities
3554 PUGI__FN xml_encoding get_write_native_encoding()
3556 #ifdef PUGIXML_WCHAR_MODE
3557 return get_wchar_encoding();
3559 return encoding_utf8;
3563 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3565 // replace wchar encoding with utf implementation
3566 if (encoding == encoding_wchar) return get_wchar_encoding();
3568 // replace utf16 encoding with utf16 with specific endianness
3569 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3571 // replace utf32 encoding with utf32 with specific endianness
3572 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3574 // only do autodetection if no explicit encoding is requested
3575 if (encoding != encoding_auto) return encoding;
3577 // assume utf8 encoding
3578 return encoding_utf8;
3581 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3583 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3585 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3587 return static_cast<size_t>(end - dest) * sizeof(*dest);
3590 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3592 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3594 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3598 for (typename T::value_type i = dest; i != end; ++i)
3599 *i = endian_swap(*i);
3602 return static_cast<size_t>(end - dest) * sizeof(*dest);
3605 #ifdef PUGIXML_WCHAR_MODE
3606 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3608 if (length < 1) return 0;
3610 // discard last character if it's the lead of a surrogate pair
3611 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3614 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3616 // only endian-swapping is required
3617 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3619 convert_wchar_endian_swap(r_char, data, length);
3621 return length * sizeof(char_t);
3625 if (encoding == encoding_utf8)
3626 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3629 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3631 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3633 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3637 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3639 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3641 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3644 // convert to latin1
3645 if (encoding == encoding_latin1)
3646 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3648 assert(false && "Invalid encoding"); // unreachable
3652 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3654 if (length < 5) return 0;
3656 for (size_t i = 1; i <= 4; ++i)
3658 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3660 // either a standalone character or a leading one
3661 if ((ch & 0xc0) != 0x80) return length - i;
3664 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3668 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3670 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3672 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3674 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3677 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3679 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3681 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3684 if (encoding == encoding_latin1)
3685 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3687 assert(false && "Invalid encoding"); // unreachable
3692 class xml_buffered_writer
3694 xml_buffered_writer(const xml_buffered_writer&);
3695 xml_buffered_writer& operator=(const xml_buffered_writer&);
3698 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3700 PUGI__STATIC_ASSERT(bufcapacity >= 8);
3705 flush(buffer, bufsize);
3710 void flush(const char_t* data, size_t size)
3712 if (size == 0) return;
3714 // fast path, just write data
3715 if (encoding == get_write_native_encoding())
3716 writer.write(data, size * sizeof(char_t));
3720 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3721 assert(result <= sizeof(scratch));
3724 writer.write(scratch.data_u8, result);
3728 void write_direct(const char_t* data, size_t length)
3730 // flush the remaining buffer contents
3733 // handle large chunks
3734 if (length > bufcapacity)
3736 if (encoding == get_write_native_encoding())
3738 // fast path, can just write data chunk
3739 writer.write(data, length * sizeof(char_t));
3743 // need to convert in suitable chunks
3744 while (length > bufcapacity)
3746 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3747 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3748 size_t chunk_size = get_valid_length(data, bufcapacity);
3751 // convert chunk and write
3752 flush(data, chunk_size);
3756 length -= chunk_size;
3759 // small tail is copied below
3763 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3767 void write_buffer(const char_t* data, size_t length)
3769 size_t offset = bufsize;
3771 if (offset + length <= bufcapacity)
3773 memcpy(buffer + offset, data, length * sizeof(char_t));
3774 bufsize = offset + length;
3778 write_direct(data, length);
3782 void write_string(const char_t* data)
3784 // write the part of the string that fits in the buffer
3785 size_t offset = bufsize;
3787 while (*data && offset < bufcapacity)
3788 buffer[offset++] = *data++;
3791 if (offset < bufcapacity)
3797 // backtrack a bit if we have split the codepoint
3798 size_t length = offset - bufsize;
3799 size_t extra = length - get_valid_length(data - length, length);
3801 bufsize = offset - extra;
3803 write_direct(data - extra, strlength(data) + extra);
3807 void write(char_t d0)
3809 size_t offset = bufsize;
3810 if (offset > bufcapacity - 1) offset = flush();
3812 buffer[offset + 0] = d0;
3813 bufsize = offset + 1;
3816 void write(char_t d0, char_t d1)
3818 size_t offset = bufsize;
3819 if (offset > bufcapacity - 2) offset = flush();
3821 buffer[offset + 0] = d0;
3822 buffer[offset + 1] = d1;
3823 bufsize = offset + 2;
3826 void write(char_t d0, char_t d1, char_t d2)
3828 size_t offset = bufsize;
3829 if (offset > bufcapacity - 3) offset = flush();
3831 buffer[offset + 0] = d0;
3832 buffer[offset + 1] = d1;
3833 buffer[offset + 2] = d2;
3834 bufsize = offset + 3;
3837 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3839 size_t offset = bufsize;
3840 if (offset > bufcapacity - 4) offset = flush();
3842 buffer[offset + 0] = d0;
3843 buffer[offset + 1] = d1;
3844 buffer[offset + 2] = d2;
3845 buffer[offset + 3] = d3;
3846 bufsize = offset + 4;
3849 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3851 size_t offset = bufsize;
3852 if (offset > bufcapacity - 5) offset = flush();
3854 buffer[offset + 0] = d0;
3855 buffer[offset + 1] = d1;
3856 buffer[offset + 2] = d2;
3857 buffer[offset + 3] = d3;
3858 buffer[offset + 4] = d4;
3859 bufsize = offset + 5;
3862 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3864 size_t offset = bufsize;
3865 if (offset > bufcapacity - 6) offset = flush();
3867 buffer[offset + 0] = d0;
3868 buffer[offset + 1] = d1;
3869 buffer[offset + 2] = d2;
3870 buffer[offset + 3] = d3;
3871 buffer[offset + 4] = d4;
3872 buffer[offset + 5] = d5;
3873 bufsize = offset + 6;
3876 // utf8 maximum expansion: x4 (-> utf32)
3877 // utf16 maximum expansion: x2 (-> utf32)
3878 // utf32 maximum expansion: x1
3882 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3883 PUGIXML_MEMORY_OUTPUT_STACK
3888 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3891 char_t buffer[bufcapacity];
3895 uint8_t data_u8[4 * bufcapacity];
3896 uint16_t data_u16[2 * bufcapacity];
3897 uint32_t data_u32[bufcapacity];
3898 char_t data_char[bufcapacity];
3903 xml_encoding encoding;
3906 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3910 const char_t* prev = s;
3912 // While *s is a usual symbol
3913 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3915 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3921 writer.write('&', 'a', 'm', 'p', ';');
3925 writer.write('&', 'l', 't', ';');
3929 writer.write('&', 'g', 't', ';');
3933 writer.write('&', 'q', 'u', 'o', 't', ';');
3936 default: // s is not a usual symbol
3938 unsigned int ch = static_cast<unsigned int>(*s++);
3941 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3947 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3949 if (flags & format_no_escapes)
3950 writer.write_string(s);
3952 text_output_escaped(writer, s, type);
3955 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3959 writer.write('<', '!', '[', 'C', 'D');
3960 writer.write('A', 'T', 'A', '[');
3962 const char_t* prev = s;
3964 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3965 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3967 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3970 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3972 writer.write(']', ']', '>');
3977 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3979 switch (indent_length)
3983 for (unsigned int i = 0; i < depth; ++i)
3984 writer.write(indent[0]);
3990 for (unsigned int i = 0; i < depth; ++i)
3991 writer.write(indent[0], indent[1]);
3997 for (unsigned int i = 0; i < depth; ++i)
3998 writer.write(indent[0], indent[1], indent[2]);
4004 for (unsigned int i = 0; i < depth; ++i)
4005 writer.write(indent[0], indent[1], indent[2], indent[3]);
4011 for (unsigned int i = 0; i < depth; ++i)
4012 writer.write_buffer(indent, indent_length);
4017 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4019 writer.write('<', '!', '-', '-');
4023 const char_t* prev = s;
4025 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4026 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4028 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4034 writer.write('-', ' ');
4039 writer.write('-', '-', '>');
4042 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4046 const char_t* prev = s;
4048 // look for ?> sequence - we can't output it since ?> terminates PI
4049 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4051 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4055 assert(s[0] == '?' && s[1] == '>');
4057 writer.write('?', ' ', '>');
4063 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4065 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4067 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4069 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4073 text_output_indent(writer, indent, indent_length, depth + 1);
4080 writer.write_string(a->name ? a->name + 0 : default_name);
4081 writer.write('=', '"');
4084 text_output(writer, a->value, ctx_special_attr, flags);
4090 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4092 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4093 const char_t* name = node->name ? node->name + 0 : default_name;
4096 writer.write_string(name);
4098 if (node->first_attribute)
4099 node_output_attributes(writer, node, indent, indent_length, flags, depth);
4101 // element nodes can have value if parse_embed_pcdata was used
4104 if (!node->first_child)
4106 if (flags & format_no_empty_element_tags)
4108 writer.write('>', '<', '/');
4109 writer.write_string(name);
4116 if ((flags & format_raw) == 0)
4119 writer.write('/', '>');
4135 text_output(writer, node->value, ctx_special_pcdata, flags);
4137 if (!node->first_child)
4139 writer.write('<', '/');
4140 writer.write_string(name);
4152 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4154 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4155 const char_t* name = node->name ? node->name + 0 : default_name;
4157 writer.write('<', '/');
4158 writer.write_string(name);
4162 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4164 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4166 switch (PUGI__NODETYPE(node))
4169 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4173 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4177 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4181 writer.write('<', '?');
4182 writer.write_string(node->name ? node->name + 0 : default_name);
4187 node_output_pi_value(writer, node->value);
4190 writer.write('?', '>');
4193 case node_declaration:
4194 writer.write('<', '?');
4195 writer.write_string(node->name ? node->name + 0 : default_name);
4196 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4197 writer.write('?', '>');
4201 writer.write('<', '!', 'D', 'O', 'C');
4202 writer.write('T', 'Y', 'P', 'E');
4207 writer.write_string(node->value);
4214 assert(false && "Invalid node type"); // unreachable
4224 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4226 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4227 unsigned int indent_flags = indent_indent;
4229 xml_node_struct* node = root;
4235 // begin writing current node
4236 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4238 node_output_simple(writer, node, flags);
4244 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4247 if ((indent_flags & indent_indent) && indent_length)
4248 text_output_indent(writer, indent, indent_length, depth);
4250 if (PUGI__NODETYPE(node) == node_element)
4252 indent_flags = indent_newline | indent_indent;
4254 if (node_output_start(writer, node, indent, indent_length, flags, depth))
4256 // element nodes can have value if parse_embed_pcdata was used
4260 node = node->first_child;
4265 else if (PUGI__NODETYPE(node) == node_document)
4267 indent_flags = indent_indent;
4269 if (node->first_child)
4271 node = node->first_child;
4277 node_output_simple(writer, node, flags);
4279 indent_flags = indent_newline | indent_indent;
4283 // continue to the next node
4284 while (node != root)
4286 if (node->next_sibling)
4288 node = node->next_sibling;
4292 node = node->parent;
4294 // write closing node
4295 if (PUGI__NODETYPE(node) == node_element)
4299 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4302 if ((indent_flags & indent_indent) && indent_length)
4303 text_output_indent(writer, indent, indent_length, depth);
4305 node_output_end(writer, node);
4307 indent_flags = indent_newline | indent_indent;
4311 while (node != root);
4313 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4317 PUGI__FN bool has_declaration(xml_node_struct* node)
4319 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4321 xml_node_type type = PUGI__NODETYPE(child);
4323 if (type == node_declaration) return true;
4324 if (type == node_element) return false;
4330 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4332 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4339 PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4341 return parent == node_element || parent == node_declaration;
4344 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4346 if (parent != node_document && parent != node_element) return false;
4347 if (child == node_document || child == node_null) return false;
4348 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4353 PUGI__FN bool allow_move(xml_node parent, xml_node child)
4355 // check that child can be a child of parent
4356 if (!allow_insert_child(parent.type(), child.type()))
4359 // check that node is not moved between documents
4360 if (parent.root() != child.root())
4363 // check that new parent is not in the child subtree
4364 xml_node cur = parent;
4377 template <typename String, typename Header>
4378 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4380 assert(!dest && (header & header_mask) == 0);
4384 if (alloc && (source_header & header_mask) == 0)
4388 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4389 header |= xml_memory_page_contents_shared_mask;
4390 source_header |= xml_memory_page_contents_shared_mask;
4393 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4397 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4399 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4400 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4402 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4404 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4408 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4409 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4414 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4416 xml_allocator& alloc = get_allocator(dn);
4417 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4419 node_copy_contents(dn, sn, shared_alloc);
4421 xml_node_struct* dit = dn;
4422 xml_node_struct* sit = sn->first_child;
4424 while (sit && sit != sn)
4426 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4429 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4433 node_copy_contents(copy, sit, shared_alloc);
4435 if (sit->first_child)
4438 sit = sit->first_child;
4444 // continue to the next node
4447 if (sit->next_sibling)
4449 sit = sit->next_sibling;
4460 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4462 xml_allocator& alloc = get_allocator(da);
4463 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4465 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4466 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4469 inline bool is_text_node(xml_node_struct* node)
4471 xml_node_type type = PUGI__NODETYPE(node);
4473 return type == node_pcdata || type == node_cdata;
4476 // get value with conversion functions
4477 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4480 const char_t* s = value;
4482 while (PUGI__IS_CHARTYPE(*s, ct_space))
4485 bool negative = (*s == '-');
4487 s += (*s == '+' || *s == '-');
4489 bool overflow = false;
4491 if (s[0] == '0' && (s[1] | ' ') == 'x')
4495 // since overflow detection relies on length of the sequence skip leading zeros
4499 const char_t* start = s;
4503 if (static_cast<unsigned>(*s - '0') < 10)
4504 result = result * 16 + (*s - '0');
4505 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4506 result = result * 16 + ((*s | ' ') - 'a' + 10);
4513 size_t digits = static_cast<size_t>(s - start);
4515 overflow = digits > sizeof(U) * 2;
4519 // since overflow detection relies on length of the sequence skip leading zeros
4523 const char_t* start = s;
4527 if (static_cast<unsigned>(*s - '0') < 10)
4528 result = result * 10 + (*s - '0');
4535 size_t digits = static_cast<size_t>(s - start);
4537 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4539 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4540 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4541 const size_t high_bit = sizeof(U) * 8 - 1;
4543 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4548 // Workaround for crayc++ CC-3059: Expected no overflow in routine.
4550 return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4552 return (overflow || result > 0 - minv) ? minv : 0 - result;
4556 return (overflow || result > maxv) ? maxv : result;
4559 PUGI__FN int get_value_int(const char_t* value)
4561 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4564 PUGI__FN unsigned int get_value_uint(const char_t* value)
4566 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4569 PUGI__FN double get_value_double(const char_t* value)
4571 #ifdef PUGIXML_WCHAR_MODE
4572 return wcstod(value, 0);
4574 return strtod(value, 0);
4578 PUGI__FN float get_value_float(const char_t* value)
4580 #ifdef PUGIXML_WCHAR_MODE
4581 return static_cast<float>(wcstod(value, 0));
4583 return static_cast<float>(strtod(value, 0));
4587 PUGI__FN bool get_value_bool(const char_t* value)
4589 // only look at first char
4590 char_t first = *value;
4592 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4593 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4596 #ifdef PUGIXML_HAS_LONG_LONG
4597 PUGI__FN long long get_value_llong(const char_t* value)
4599 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4602 PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4604 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4608 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4610 char_t* result = end - 1;
4611 U rest = negative ? 0 - value : value;
4615 *result-- = static_cast<char_t>('0' + (rest % 10));
4620 assert(result >= begin);
4625 return result + !negative;
4628 // set value with conversion functions
4629 template <typename String, typename Header>
4630 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4632 #ifdef PUGIXML_WCHAR_MODE
4634 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4637 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4639 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4641 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4645 template <typename U, typename String, typename Header>
4646 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4649 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4650 char_t* begin = integer_to_string(buf, end, value, negative);
4652 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4655 template <typename String, typename Header>
4656 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4659 PUGI__SNPRINTF(buf, "%.9g", value);
4661 return set_value_ascii(dest, header, header_mask, buf);
4664 template <typename String, typename Header>
4665 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4668 PUGI__SNPRINTF(buf, "%.17g", value);
4670 return set_value_ascii(dest, header, header_mask, buf);
4673 template <typename String, typename Header>
4674 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4676 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4679 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4681 // check input buffer
4682 if (!contents && size) return make_parse_result(status_io_error);
4684 // get actual encoding
4685 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4687 // get private buffer
4691 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4693 // delete original buffer if we performed a conversion
4694 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4696 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4697 if (own || buffer != contents) *out_buffer = buffer;
4699 // store buffer for offset_debug
4700 doc->buffer = buffer;
4703 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4705 // remember encoding
4706 res.encoding = buffer_encoding;
4711 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4712 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4714 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4715 // there are 64-bit versions of fseek/ftell, let's use them
4716 typedef __int64 length_type;
4718 _fseeki64(file, 0, SEEK_END);
4719 length_type length = _ftelli64(file);
4720 _fseeki64(file, 0, SEEK_SET);
4721 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4722 // there are 64-bit versions of fseek/ftell, let's use them
4723 typedef off64_t length_type;
4725 fseeko64(file, 0, SEEK_END);
4726 length_type length = ftello64(file);
4727 fseeko64(file, 0, SEEK_SET);
4729 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4730 typedef long length_type;
4732 fseek(file, 0, SEEK_END);
4733 length_type length = ftell(file);
4734 fseek(file, 0, SEEK_SET);
4737 // check for I/O errors
4738 if (length < 0) return status_io_error;
4740 // check for overflow
4741 size_t result = static_cast<size_t>(length);
4743 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4746 out_result = result;
4751 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4752 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4754 // We only need to zero-terminate if encoding conversion does not do it for us
4755 #ifdef PUGIXML_WCHAR_MODE
4756 xml_encoding wchar_encoding = get_wchar_encoding();
4758 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4760 size_t length = size / sizeof(char_t);
4762 static_cast<char_t*>(buffer)[length] = 0;
4763 return (length + 1) * sizeof(char_t);
4766 if (encoding == encoding_utf8)
4768 static_cast<char*>(buffer)[size] = 0;
4776 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4778 if (!file) return make_parse_result(status_file_not_found);
4780 // get file size (can result in I/O errors)
4782 xml_parse_status size_status = get_file_size(file, size);
4783 if (size_status != status_ok) return make_parse_result(size_status);
4785 size_t max_suffix_size = sizeof(char_t);
4787 // allocate buffer for the whole file
4788 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4789 if (!contents) return make_parse_result(status_out_of_memory);
4791 // read file in memory
4792 size_t read_size = fread(contents, 1, size, file);
4794 if (read_size != size)
4796 xml_memory::deallocate(contents);
4797 return make_parse_result(status_io_error);
4800 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4802 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4805 PUGI__FN void close_file(FILE* file)
4810 #ifndef PUGIXML_NO_STL
4811 template <typename T> struct xml_stream_chunk
4813 static xml_stream_chunk* create()
4815 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4816 if (!memory) return 0;
4818 return new (memory) xml_stream_chunk();
4821 static void destroy(xml_stream_chunk* chunk)
4826 xml_stream_chunk* next_ = chunk->next;
4828 xml_memory::deallocate(chunk);
4834 xml_stream_chunk(): next(0), size(0)
4838 xml_stream_chunk* next;
4841 T data[xml_memory_page_size / sizeof(T)];
4844 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4846 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4848 // read file to a chunk list
4850 xml_stream_chunk<T>* last = 0;
4852 while (!stream.eof())
4854 // allocate new chunk
4855 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4856 if (!chunk) return status_out_of_memory;
4858 // append chunk to list
4859 if (last) last = last->next = chunk;
4860 else chunks.data = last = chunk;
4862 // read data to chunk
4863 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4864 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4866 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4867 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4869 // guard against huge files (chunk size is small enough to make this overflow check work)
4870 if (total + chunk->size < total) return status_out_of_memory;
4871 total += chunk->size;
4874 size_t max_suffix_size = sizeof(char_t);
4876 // copy chunk list to a contiguous buffer
4877 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4878 if (!buffer) return status_out_of_memory;
4880 char* write = buffer;
4882 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4884 assert(write + chunk->size <= buffer + total);
4885 memcpy(write, chunk->data, chunk->size);
4886 write += chunk->size;
4889 assert(write == buffer + total);
4892 *out_buffer = buffer;
4898 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4900 // get length of remaining data in stream
4901 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4902 stream.seekg(0, std::ios::end);
4903 std::streamoff length = stream.tellg() - pos;
4906 if (stream.fail() || pos < 0) return status_io_error;
4908 // guard against huge files
4909 size_t read_length = static_cast<size_t>(length);
4911 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4913 size_t max_suffix_size = sizeof(char_t);
4915 // read stream data into memory (guard against stream exceptions with buffer holder)
4916 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4917 if (!buffer.data) return status_out_of_memory;
4919 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4921 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4922 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4925 size_t actual_length = static_cast<size_t>(stream.gcount());
4926 assert(actual_length <= read_length);
4928 *out_buffer = buffer.release();
4929 *out_size = actual_length * sizeof(T);
4934 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4938 xml_parse_status status = status_ok;
4940 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4941 if (stream.fail()) return make_parse_result(status_io_error);
4943 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4944 if (stream.tellg() < 0)
4946 stream.clear(); // clear error flags that could be set by a failing tellg
4947 status = load_stream_data_noseek(stream, &buffer, &size);
4950 status = load_stream_data_seek(stream, &buffer, &size);
4952 if (status != status_ok) return make_parse_result(status);
4954 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4956 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4960 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4961 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4963 return _wfopen(path, mode);
4966 PUGI__FN char* convert_path_heap(const wchar_t* str)
4970 // first pass: get length in utf8 characters
4971 size_t length = strlength_wide(str);
4972 size_t size = as_utf8_begin(str, length);
4974 // allocate resulting string
4975 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4976 if (!result) return 0;
4978 // second pass: convert to utf8
4979 as_utf8_end(result, size, str, length);
4987 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4989 // there is no standard function to open wide paths, so our best bet is to try utf8 path
4990 char* path_utf8 = convert_path_heap(path);
4991 if (!path_utf8) return 0;
4993 // convert mode to ASCII (we mirror _wfopen interface)
4994 char mode_ascii[4] = {0};
4995 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4997 // try to open the utf8 path
4998 FILE* result = fopen(path_utf8, mode_ascii);
5000 // free dummy buffer
5001 xml_memory::deallocate(path_utf8);
5007 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5009 if (!file) return false;
5011 xml_writer_file writer(file);
5012 doc.save(writer, indent, flags, encoding);
5014 return ferror(file) == 0;
5017 struct name_null_sentry
5019 xml_node_struct* node;
5022 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5036 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5040 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5042 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5043 (void)!result; // unfortunately we can't do proper error handling here
5046 #ifndef PUGIXML_NO_STL
5047 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5051 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5055 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5059 assert(!wide_stream);
5060 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5064 assert(wide_stream);
5065 assert(size % sizeof(wchar_t) == 0);
5067 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5072 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5076 PUGI__FN xml_tree_walker::~xml_tree_walker()
5080 PUGI__FN int xml_tree_walker::depth() const
5085 PUGI__FN bool xml_tree_walker::begin(xml_node&)
5090 PUGI__FN bool xml_tree_walker::end(xml_node&)
5095 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5099 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5103 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5107 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5109 return _attr ? unspecified_bool_xml_attribute : 0;
5112 PUGI__FN bool xml_attribute::operator!() const
5117 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5119 return (_attr == r._attr);
5122 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5124 return (_attr != r._attr);
5127 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5129 return (_attr < r._attr);
5132 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5134 return (_attr > r._attr);
5137 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5139 return (_attr <= r._attr);
5142 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5144 return (_attr >= r._attr);
5147 PUGI__FN xml_attribute xml_attribute::next_attribute() const
5149 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5152 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5154 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5157 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5159 return (_attr && _attr->value) ? _attr->value + 0 : def;
5162 PUGI__FN int xml_attribute::as_int(int def) const
5164 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5167 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5169 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5172 PUGI__FN double xml_attribute::as_double(double def) const
5174 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5177 PUGI__FN float xml_attribute::as_float(float def) const
5179 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5182 PUGI__FN bool xml_attribute::as_bool(bool def) const
5184 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5187 #ifdef PUGIXML_HAS_LONG_LONG
5188 PUGI__FN long long xml_attribute::as_llong(long long def) const
5190 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5193 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5195 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5199 PUGI__FN bool xml_attribute::empty() const
5204 PUGI__FN const char_t* xml_attribute::name() const
5206 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5209 PUGI__FN const char_t* xml_attribute::value() const
5211 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5214 PUGI__FN size_t xml_attribute::hash_value() const
5216 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5219 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5224 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5230 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5236 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5242 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5248 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5254 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5260 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5266 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5272 #ifdef PUGIXML_HAS_LONG_LONG
5273 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5279 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5286 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5288 if (!_attr) return false;
5290 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5293 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5295 if (!_attr) return false;
5297 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5300 PUGI__FN bool xml_attribute::set_value(int rhs)
5302 if (!_attr) return false;
5304 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5307 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5309 if (!_attr) return false;
5311 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5314 PUGI__FN bool xml_attribute::set_value(long rhs)
5316 if (!_attr) return false;
5318 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5321 PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5323 if (!_attr) return false;
5325 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5328 PUGI__FN bool xml_attribute::set_value(double rhs)
5330 if (!_attr) return false;
5332 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5335 PUGI__FN bool xml_attribute::set_value(float rhs)
5337 if (!_attr) return false;
5339 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5342 PUGI__FN bool xml_attribute::set_value(bool rhs)
5344 if (!_attr) return false;
5346 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5349 #ifdef PUGIXML_HAS_LONG_LONG
5350 PUGI__FN bool xml_attribute::set_value(long long rhs)
5352 if (!_attr) return false;
5354 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5357 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5359 if (!_attr) return false;
5361 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5366 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5368 return (bool)lhs && rhs;
5371 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5373 return (bool)lhs || rhs;
5377 PUGI__FN xml_node::xml_node(): _root(0)
5381 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5385 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5389 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5391 return _root ? unspecified_bool_xml_node : 0;
5394 PUGI__FN bool xml_node::operator!() const
5399 PUGI__FN xml_node::iterator xml_node::begin() const
5401 return iterator(_root ? _root->first_child + 0 : 0, _root);
5404 PUGI__FN xml_node::iterator xml_node::end() const
5406 return iterator(0, _root);
5409 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5411 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5414 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5416 return attribute_iterator(0, _root);
5419 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5421 return xml_object_range<xml_node_iterator>(begin(), end());
5424 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5426 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5429 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5431 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5434 PUGI__FN bool xml_node::operator==(const xml_node& r) const
5436 return (_root == r._root);
5439 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5441 return (_root != r._root);
5444 PUGI__FN bool xml_node::operator<(const xml_node& r) const
5446 return (_root < r._root);
5449 PUGI__FN bool xml_node::operator>(const xml_node& r) const
5451 return (_root > r._root);
5454 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5456 return (_root <= r._root);
5459 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5461 return (_root >= r._root);
5464 PUGI__FN bool xml_node::empty() const
5469 PUGI__FN const char_t* xml_node::name() const
5471 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5474 PUGI__FN xml_node_type xml_node::type() const
5476 return _root ? PUGI__NODETYPE(_root) : node_null;
5479 PUGI__FN const char_t* xml_node::value() const
5481 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5484 PUGI__FN xml_node xml_node::child(const char_t* name_) const
5486 if (!_root) return xml_node();
5488 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5489 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5494 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5496 if (!_root) return xml_attribute();
5498 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5499 if (i->name && impl::strequal(name_, i->name))
5500 return xml_attribute(i);
5502 return xml_attribute();
5505 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5507 if (!_root) return xml_node();
5509 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5510 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5515 PUGI__FN xml_node xml_node::next_sibling() const
5517 return _root ? xml_node(_root->next_sibling) : xml_node();
5520 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5522 if (!_root) return xml_node();
5524 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5525 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5530 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5532 xml_attribute_struct* hint = hint_._attr;
5534 // if hint is not an attribute of node, behavior is not defined
5535 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5537 if (!_root) return xml_attribute();
5539 // optimistically search from hint up until the end
5540 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5541 if (i->name && impl::strequal(name_, i->name))
5543 // update hint to maximize efficiency of searching for consecutive attributes
5544 hint_._attr = i->next_attribute;
5546 return xml_attribute(i);
5549 // wrap around and search from the first attribute until the hint
5550 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5551 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5552 if (j->name && impl::strequal(name_, j->name))
5554 // update hint to maximize efficiency of searching for consecutive attributes
5555 hint_._attr = j->next_attribute;
5557 return xml_attribute(j);
5560 return xml_attribute();
5563 PUGI__FN xml_node xml_node::previous_sibling() const
5565 if (!_root) return xml_node();
5567 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5568 else return xml_node();
5571 PUGI__FN xml_node xml_node::parent() const
5573 return _root ? xml_node(_root->parent) : xml_node();
5576 PUGI__FN xml_node xml_node::root() const
5578 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5581 PUGI__FN xml_text xml_node::text() const
5583 return xml_text(_root);
5586 PUGI__FN const char_t* xml_node::child_value() const
5588 if (!_root) return PUGIXML_TEXT("");
5590 // element nodes can have value if parse_embed_pcdata was used
5591 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5592 return _root->value;
5594 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5595 if (impl::is_text_node(i) && i->value)
5598 return PUGIXML_TEXT("");
5601 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5603 return child(name_).child_value();
5606 PUGI__FN xml_attribute xml_node::first_attribute() const
5608 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5611 PUGI__FN xml_attribute xml_node::last_attribute() const
5613 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5616 PUGI__FN xml_node xml_node::first_child() const
5618 return _root ? xml_node(_root->first_child) : xml_node();
5621 PUGI__FN xml_node xml_node::last_child() const
5623 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5626 PUGI__FN bool xml_node::set_name(const char_t* rhs)
5628 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5630 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5633 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5636 PUGI__FN bool xml_node::set_value(const char_t* rhs)
5638 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5640 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5643 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5646 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5648 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5650 impl::xml_allocator& alloc = impl::get_allocator(_root);
5651 if (!alloc.reserve()) return xml_attribute();
5653 xml_attribute a(impl::allocate_attribute(alloc));
5654 if (!a) return xml_attribute();
5656 impl::append_attribute(a._attr, _root);
5663 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5665 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5667 impl::xml_allocator& alloc = impl::get_allocator(_root);
5668 if (!alloc.reserve()) return xml_attribute();
5670 xml_attribute a(impl::allocate_attribute(alloc));
5671 if (!a) return xml_attribute();
5673 impl::prepend_attribute(a._attr, _root);
5680 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5682 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5683 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5685 impl::xml_allocator& alloc = impl::get_allocator(_root);
5686 if (!alloc.reserve()) return xml_attribute();
5688 xml_attribute a(impl::allocate_attribute(alloc));
5689 if (!a) return xml_attribute();
5691 impl::insert_attribute_after(a._attr, attr._attr, _root);
5698 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5700 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5701 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5703 impl::xml_allocator& alloc = impl::get_allocator(_root);
5704 if (!alloc.reserve()) return xml_attribute();
5706 xml_attribute a(impl::allocate_attribute(alloc));
5707 if (!a) return xml_attribute();
5709 impl::insert_attribute_before(a._attr, attr._attr, _root);
5716 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5718 if (!proto) return xml_attribute();
5719 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5721 impl::xml_allocator& alloc = impl::get_allocator(_root);
5722 if (!alloc.reserve()) return xml_attribute();
5724 xml_attribute a(impl::allocate_attribute(alloc));
5725 if (!a) return xml_attribute();
5727 impl::append_attribute(a._attr, _root);
5728 impl::node_copy_attribute(a._attr, proto._attr);
5733 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5735 if (!proto) return xml_attribute();
5736 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5738 impl::xml_allocator& alloc = impl::get_allocator(_root);
5739 if (!alloc.reserve()) return xml_attribute();
5741 xml_attribute a(impl::allocate_attribute(alloc));
5742 if (!a) return xml_attribute();
5744 impl::prepend_attribute(a._attr, _root);
5745 impl::node_copy_attribute(a._attr, proto._attr);
5750 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5752 if (!proto) return xml_attribute();
5753 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5754 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5756 impl::xml_allocator& alloc = impl::get_allocator(_root);
5757 if (!alloc.reserve()) return xml_attribute();
5759 xml_attribute a(impl::allocate_attribute(alloc));
5760 if (!a) return xml_attribute();
5762 impl::insert_attribute_after(a._attr, attr._attr, _root);
5763 impl::node_copy_attribute(a._attr, proto._attr);
5768 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5770 if (!proto) return xml_attribute();
5771 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5772 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5774 impl::xml_allocator& alloc = impl::get_allocator(_root);
5775 if (!alloc.reserve()) return xml_attribute();
5777 xml_attribute a(impl::allocate_attribute(alloc));
5778 if (!a) return xml_attribute();
5780 impl::insert_attribute_before(a._attr, attr._attr, _root);
5781 impl::node_copy_attribute(a._attr, proto._attr);
5786 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5788 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5790 impl::xml_allocator& alloc = impl::get_allocator(_root);
5791 if (!alloc.reserve()) return xml_node();
5793 xml_node n(impl::allocate_node(alloc, type_));
5794 if (!n) return xml_node();
5796 impl::append_node(n._root, _root);
5798 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5803 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5805 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5807 impl::xml_allocator& alloc = impl::get_allocator(_root);
5808 if (!alloc.reserve()) return xml_node();
5810 xml_node n(impl::allocate_node(alloc, type_));
5811 if (!n) return xml_node();
5813 impl::prepend_node(n._root, _root);
5815 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5820 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5822 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5823 if (!node._root || node._root->parent != _root) return xml_node();
5825 impl::xml_allocator& alloc = impl::get_allocator(_root);
5826 if (!alloc.reserve()) return xml_node();
5828 xml_node n(impl::allocate_node(alloc, type_));
5829 if (!n) return xml_node();
5831 impl::insert_node_before(n._root, node._root);
5833 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5838 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5840 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5841 if (!node._root || node._root->parent != _root) return xml_node();
5843 impl::xml_allocator& alloc = impl::get_allocator(_root);
5844 if (!alloc.reserve()) return xml_node();
5846 xml_node n(impl::allocate_node(alloc, type_));
5847 if (!n) return xml_node();
5849 impl::insert_node_after(n._root, node._root);
5851 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5856 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5858 xml_node result = append_child(node_element);
5860 result.set_name(name_);
5865 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5867 xml_node result = prepend_child(node_element);
5869 result.set_name(name_);
5874 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5876 xml_node result = insert_child_after(node_element, node);
5878 result.set_name(name_);
5883 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5885 xml_node result = insert_child_before(node_element, node);
5887 result.set_name(name_);
5892 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5894 xml_node_type type_ = proto.type();
5895 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5897 impl::xml_allocator& alloc = impl::get_allocator(_root);
5898 if (!alloc.reserve()) return xml_node();
5900 xml_node n(impl::allocate_node(alloc, type_));
5901 if (!n) return xml_node();
5903 impl::append_node(n._root, _root);
5904 impl::node_copy_tree(n._root, proto._root);
5909 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5911 xml_node_type type_ = proto.type();
5912 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5914 impl::xml_allocator& alloc = impl::get_allocator(_root);
5915 if (!alloc.reserve()) return xml_node();
5917 xml_node n(impl::allocate_node(alloc, type_));
5918 if (!n) return xml_node();
5920 impl::prepend_node(n._root, _root);
5921 impl::node_copy_tree(n._root, proto._root);
5926 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5928 xml_node_type type_ = proto.type();
5929 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5930 if (!node._root || node._root->parent != _root) return xml_node();
5932 impl::xml_allocator& alloc = impl::get_allocator(_root);
5933 if (!alloc.reserve()) return xml_node();
5935 xml_node n(impl::allocate_node(alloc, type_));
5936 if (!n) return xml_node();
5938 impl::insert_node_after(n._root, node._root);
5939 impl::node_copy_tree(n._root, proto._root);
5944 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5946 xml_node_type type_ = proto.type();
5947 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5948 if (!node._root || node._root->parent != _root) return xml_node();
5950 impl::xml_allocator& alloc = impl::get_allocator(_root);
5951 if (!alloc.reserve()) return xml_node();
5953 xml_node n(impl::allocate_node(alloc, type_));
5954 if (!n) return xml_node();
5956 impl::insert_node_before(n._root, node._root);
5957 impl::node_copy_tree(n._root, proto._root);
5962 PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5964 if (!impl::allow_move(*this, moved)) return xml_node();
5966 impl::xml_allocator& alloc = impl::get_allocator(_root);
5967 if (!alloc.reserve()) return xml_node();
5969 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5970 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5972 impl::remove_node(moved._root);
5973 impl::append_node(moved._root, _root);
5978 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5980 if (!impl::allow_move(*this, moved)) return xml_node();
5982 impl::xml_allocator& alloc = impl::get_allocator(_root);
5983 if (!alloc.reserve()) return xml_node();
5985 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5986 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5988 impl::remove_node(moved._root);
5989 impl::prepend_node(moved._root, _root);
5994 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5996 if (!impl::allow_move(*this, moved)) return xml_node();
5997 if (!node._root || node._root->parent != _root) return xml_node();
5998 if (moved._root == node._root) return xml_node();
6000 impl::xml_allocator& alloc = impl::get_allocator(_root);
6001 if (!alloc.reserve()) return xml_node();
6003 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6004 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6006 impl::remove_node(moved._root);
6007 impl::insert_node_after(moved._root, node._root);
6012 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6014 if (!impl::allow_move(*this, moved)) return xml_node();
6015 if (!node._root || node._root->parent != _root) return xml_node();
6016 if (moved._root == node._root) return xml_node();
6018 impl::xml_allocator& alloc = impl::get_allocator(_root);
6019 if (!alloc.reserve()) return xml_node();
6021 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6022 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6024 impl::remove_node(moved._root);
6025 impl::insert_node_before(moved._root, node._root);
6030 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6032 return remove_attribute(attribute(name_));
6035 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6037 if (!_root || !a._attr) return false;
6038 if (!impl::is_attribute_of(a._attr, _root)) return false;
6040 impl::xml_allocator& alloc = impl::get_allocator(_root);
6041 if (!alloc.reserve()) return false;
6043 impl::remove_attribute(a._attr, _root);
6044 impl::destroy_attribute(a._attr, alloc);
6049 PUGI__FN bool xml_node::remove_child(const char_t* name_)
6051 return remove_child(child(name_));
6054 PUGI__FN bool xml_node::remove_child(const xml_node& n)
6056 if (!_root || !n._root || n._root->parent != _root) return false;
6058 impl::xml_allocator& alloc = impl::get_allocator(_root);
6059 if (!alloc.reserve()) return false;
6061 impl::remove_node(n._root);
6062 impl::destroy_node(n._root, alloc);
6067 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6069 // append_buffer is only valid for elements/documents
6070 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6072 // get document node
6073 impl::xml_document_struct* doc = &impl::get_document(_root);
6075 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6076 doc->header |= impl::xml_memory_page_contents_shared_mask;
6078 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6079 impl::xml_memory_page* page = 0;
6080 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6083 if (!extra) return impl::make_parse_result(status_out_of_memory);
6085 #ifdef PUGIXML_COMPACT
6086 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6087 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6088 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6091 // add extra buffer to the list
6093 extra->next = doc->extra_buffers;
6094 doc->extra_buffers = extra;
6096 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6097 impl::name_null_sentry sentry(_root);
6099 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6102 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6104 if (!_root) return xml_node();
6106 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6107 if (i->name && impl::strequal(name_, i->name))
6109 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6110 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6117 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6119 if (!_root) return xml_node();
6121 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6122 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6123 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6129 #ifndef PUGIXML_NO_STL
6130 PUGI__FN string_t xml_node::path(char_t delimiter) const
6132 if (!_root) return string_t();
6136 for (xml_node_struct* i = _root; i; i = i->parent)
6138 offset += (i != _root);
6139 offset += i->name ? impl::strlength(i->name) : 0;
6143 result.resize(offset);
6145 for (xml_node_struct* j = _root; j; j = j->parent)
6148 result[--offset] = delimiter;
6152 size_t length = impl::strlength(j->name);
6155 memcpy(&result[offset], j->name, length * sizeof(char_t));
6159 assert(offset == 0);
6165 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6167 xml_node found = *this; // Current search context.
6169 if (!_root || !path_[0]) return found;
6171 if (path_[0] == delimiter)
6173 // Absolute path; e.g. '/foo/bar'
6174 found = found.root();
6178 const char_t* path_segment = path_;
6180 while (*path_segment == delimiter) ++path_segment;
6182 const char_t* path_segment_end = path_segment;
6184 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6186 if (path_segment == path_segment_end) return found;
6188 const char_t* next_segment = path_segment_end;
6190 while (*next_segment == delimiter) ++next_segment;
6192 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6193 return found.first_element_by_path(next_segment, delimiter);
6194 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6195 return found.parent().first_element_by_path(next_segment, delimiter);
6198 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6200 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6202 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6204 if (subsearch) return subsearch;
6212 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6216 xml_node arg_begin(_root);
6217 if (!walker.begin(arg_begin)) return false;
6219 xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6227 xml_node arg_for_each(cur);
6228 if (!walker.for_each(arg_for_each))
6231 if (cur->first_child)
6234 cur = cur->first_child;
6236 else if (cur->next_sibling)
6237 cur = cur->next_sibling;
6240 while (!cur->next_sibling && cur != _root && cur->parent)
6247 cur = cur->next_sibling;
6250 while (cur && cur != _root);
6253 assert(walker._depth == -1);
6255 xml_node arg_end(_root);
6256 return walker.end(arg_end);
6259 PUGI__FN size_t xml_node::hash_value() const
6261 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6264 PUGI__FN xml_node_struct* xml_node::internal_object() const
6269 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6273 impl::xml_buffered_writer buffered_writer(writer, encoding);
6275 impl::node_output(buffered_writer, _root, indent, flags, depth);
6277 buffered_writer.flush();
6280 #ifndef PUGIXML_NO_STL
6281 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6283 xml_writer_stream writer(stream);
6285 print(writer, indent, flags, encoding, depth);
6288 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6290 xml_writer_stream writer(stream);
6292 print(writer, indent, flags, encoding_wchar, depth);
6296 PUGI__FN ptrdiff_t xml_node::offset_debug() const
6298 if (!_root) return -1;
6300 impl::xml_document_struct& doc = impl::get_document(_root);
6302 // we can determine the offset reliably only if there is exactly once parse buffer
6303 if (!doc.buffer || doc.extra_buffers) return -1;
6311 case node_declaration:
6313 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6319 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6322 assert(false && "Invalid node type"); // unreachable
6328 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6330 return (bool)lhs && rhs;
6333 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6335 return (bool)lhs || rhs;
6339 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6343 PUGI__FN xml_node_struct* xml_text::_data() const
6345 if (!_root || impl::is_text_node(_root)) return _root;
6347 // element nodes can have value if parse_embed_pcdata was used
6348 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6351 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6352 if (impl::is_text_node(node))
6358 PUGI__FN xml_node_struct* xml_text::_data_new()
6360 xml_node_struct* d = _data();
6363 return xml_node(_root).append_child(node_pcdata).internal_object();
6366 PUGI__FN xml_text::xml_text(): _root(0)
6370 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6374 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6376 return _data() ? unspecified_bool_xml_text : 0;
6379 PUGI__FN bool xml_text::operator!() const
6384 PUGI__FN bool xml_text::empty() const
6386 return _data() == 0;
6389 PUGI__FN const char_t* xml_text::get() const
6391 xml_node_struct* d = _data();
6393 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6396 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6398 xml_node_struct* d = _data();
6400 return (d && d->value) ? d->value + 0 : def;
6403 PUGI__FN int xml_text::as_int(int def) const
6405 xml_node_struct* d = _data();
6407 return (d && d->value) ? impl::get_value_int(d->value) : def;
6410 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6412 xml_node_struct* d = _data();
6414 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6417 PUGI__FN double xml_text::as_double(double def) const
6419 xml_node_struct* d = _data();
6421 return (d && d->value) ? impl::get_value_double(d->value) : def;
6424 PUGI__FN float xml_text::as_float(float def) const
6426 xml_node_struct* d = _data();
6428 return (d && d->value) ? impl::get_value_float(d->value) : def;
6431 PUGI__FN bool xml_text::as_bool(bool def) const
6433 xml_node_struct* d = _data();
6435 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6438 #ifdef PUGIXML_HAS_LONG_LONG
6439 PUGI__FN long long xml_text::as_llong(long long def) const
6441 xml_node_struct* d = _data();
6443 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6446 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6448 xml_node_struct* d = _data();
6450 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6454 PUGI__FN bool xml_text::set(const char_t* rhs)
6456 xml_node_struct* dn = _data_new();
6458 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6461 PUGI__FN bool xml_text::set(int rhs)
6463 xml_node_struct* dn = _data_new();
6465 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6468 PUGI__FN bool xml_text::set(unsigned int rhs)
6470 xml_node_struct* dn = _data_new();
6472 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6475 PUGI__FN bool xml_text::set(long rhs)
6477 xml_node_struct* dn = _data_new();
6479 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6482 PUGI__FN bool xml_text::set(unsigned long rhs)
6484 xml_node_struct* dn = _data_new();
6486 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6489 PUGI__FN bool xml_text::set(float rhs)
6491 xml_node_struct* dn = _data_new();
6493 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6496 PUGI__FN bool xml_text::set(double rhs)
6498 xml_node_struct* dn = _data_new();
6500 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6503 PUGI__FN bool xml_text::set(bool rhs)
6505 xml_node_struct* dn = _data_new();
6507 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6510 #ifdef PUGIXML_HAS_LONG_LONG
6511 PUGI__FN bool xml_text::set(long long rhs)
6513 xml_node_struct* dn = _data_new();
6515 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6518 PUGI__FN bool xml_text::set(unsigned long long rhs)
6520 xml_node_struct* dn = _data_new();
6522 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6526 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6532 PUGI__FN xml_text& xml_text::operator=(int rhs)
6538 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6544 PUGI__FN xml_text& xml_text::operator=(long rhs)
6550 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6556 PUGI__FN xml_text& xml_text::operator=(double rhs)
6562 PUGI__FN xml_text& xml_text::operator=(float rhs)
6568 PUGI__FN xml_text& xml_text::operator=(bool rhs)
6574 #ifdef PUGIXML_HAS_LONG_LONG
6575 PUGI__FN xml_text& xml_text::operator=(long long rhs)
6581 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6588 PUGI__FN xml_node xml_text::data() const
6590 return xml_node(_data());
6594 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6596 return (bool)lhs && rhs;
6599 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6601 return (bool)lhs || rhs;
6605 PUGI__FN xml_node_iterator::xml_node_iterator()
6609 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6613 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6617 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6619 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6622 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6624 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6627 PUGI__FN xml_node& xml_node_iterator::operator*() const
6629 assert(_wrap._root);
6633 PUGI__FN xml_node* xml_node_iterator::operator->() const
6635 assert(_wrap._root);
6636 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6639 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6641 assert(_wrap._root);
6642 _wrap._root = _wrap._root->next_sibling;
6646 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6648 xml_node_iterator temp = *this;
6653 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6655 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6659 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6661 xml_node_iterator temp = *this;
6666 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6670 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6674 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6678 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6680 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6683 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6685 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6688 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6690 assert(_wrap._attr);
6694 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6696 assert(_wrap._attr);
6697 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6700 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6702 assert(_wrap._attr);
6703 _wrap._attr = _wrap._attr->next_attribute;
6707 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6709 xml_attribute_iterator temp = *this;
6714 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6716 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6720 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6722 xml_attribute_iterator temp = *this;
6727 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6731 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6735 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6739 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6741 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6744 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6746 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6749 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6751 assert(_wrap._root);
6755 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6757 assert(_wrap._root);
6758 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6761 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6763 assert(_wrap._root);
6764 _wrap = _wrap.next_sibling(_name);
6768 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6770 xml_named_node_iterator temp = *this;
6775 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6778 _wrap = _wrap.previous_sibling(_name);
6781 _wrap = _parent.last_child();
6783 if (!impl::strequal(_wrap.name(), _name))
6784 _wrap = _wrap.previous_sibling(_name);
6790 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6792 xml_named_node_iterator temp = *this;
6797 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6801 PUGI__FN xml_parse_result::operator bool() const
6803 return status == status_ok;
6806 PUGI__FN const char* xml_parse_result::description() const
6810 case status_ok: return "No error";
6812 case status_file_not_found: return "File was not found";
6813 case status_io_error: return "Error reading from file/stream";
6814 case status_out_of_memory: return "Could not allocate memory";
6815 case status_internal_error: return "Internal error occurred";
6817 case status_unrecognized_tag: return "Could not determine tag type";
6819 case status_bad_pi: return "Error parsing document declaration/processing instruction";
6820 case status_bad_comment: return "Error parsing comment";
6821 case status_bad_cdata: return "Error parsing CDATA section";
6822 case status_bad_doctype: return "Error parsing document type declaration";
6823 case status_bad_pcdata: return "Error parsing PCDATA section";
6824 case status_bad_start_element: return "Error parsing start element tag";
6825 case status_bad_attribute: return "Error parsing element attribute";
6826 case status_bad_end_element: return "Error parsing end element tag";
6827 case status_end_element_mismatch: return "Start-end tags mismatch";
6829 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6831 case status_no_document_element: return "No document element found";
6833 default: return "Unknown error";
6837 PUGI__FN xml_document::xml_document(): _buffer(0)
6842 PUGI__FN xml_document::~xml_document()
6847 #ifdef PUGIXML_HAS_MOVE
6848 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
6854 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6856 if (this == &rhs) return *this;
6866 PUGI__FN void xml_document::reset()
6872 PUGI__FN void xml_document::reset(const xml_document& proto)
6876 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6880 PUGI__FN void xml_document::_create()
6884 #ifdef PUGIXML_COMPACT
6885 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
6886 const size_t page_offset = sizeof(void*);
6888 const size_t page_offset = 0;
6891 // initialize sentinel page
6892 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6894 // prepare page structure
6895 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6898 page->busy_size = impl::xml_memory_page_size;
6900 // setup first page marker
6901 #ifdef PUGIXML_COMPACT
6902 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
6903 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6904 *page->compact_page_marker = sizeof(impl::xml_memory_page);
6907 // allocate new root
6908 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6909 _root->prev_sibling_c = _root;
6911 // setup sentinel page
6912 page->allocator = static_cast<impl::xml_document_struct*>(_root);
6914 // setup hash table pointer in allocator
6915 #ifdef PUGIXML_COMPACT
6916 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
6919 // verify the document allocation
6920 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6923 PUGI__FN void xml_document::_destroy()
6927 // destroy static storage
6930 impl::xml_memory::deallocate(_buffer);
6934 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6935 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6937 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6940 // destroy dynamic storage, leave sentinel page (it's in static memory)
6941 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6942 assert(root_page && !root_page->prev);
6943 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6945 for (impl::xml_memory_page* page = root_page->next; page; )
6947 impl::xml_memory_page* next = page->next;
6949 impl::xml_allocator::deallocate_page(page);
6954 #ifdef PUGIXML_COMPACT
6955 // destroy hash table
6956 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6962 #ifdef PUGIXML_HAS_MOVE
6963 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6965 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
6966 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
6968 // save first child pointer for later; this needs hash access
6969 xml_node_struct* other_first_child = other->first_child;
6971 #ifdef PUGIXML_COMPACT
6972 // reserve space for the hash table up front; this is the only operation that can fail
6973 // if it does, we have no choice but to throw (if we have exceptions)
6974 if (other_first_child)
6976 size_t other_children = 0;
6977 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
6980 // in compact mode, each pointer assignment could result in a hash table request
6981 // during move, we have to relocate document first_child and parents of all children
6982 // normally there's just one child and its parent has a pointerless encoding but
6983 // we assume the worst here
6984 if (!other->_hash->reserve(other_children + 1))
6986 #ifdef PUGIXML_NO_EXCEPTIONS
6989 throw std::bad_alloc();
6995 // move allocation state
6996 doc->_root = other->_root;
6997 doc->_busy_size = other->_busy_size;
6999 // move buffer state
7000 doc->buffer = other->buffer;
7001 doc->extra_buffers = other->extra_buffers;
7002 _buffer = rhs._buffer;
7004 #ifdef PUGIXML_COMPACT
7005 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7006 doc->hash = other->hash;
7007 doc->_hash = &doc->hash;
7009 // make sure we don't access other hash up until the end when we reinitialize other document
7013 // move page structure
7014 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7015 assert(doc_page && !doc_page->prev && !doc_page->next);
7017 impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7018 assert(other_page && !other_page->prev);
7020 // relink pages since root page is embedded into xml_document
7021 if (impl::xml_memory_page* page = other_page->next)
7023 assert(page->prev == other_page);
7025 page->prev = doc_page;
7027 doc_page->next = page;
7028 other_page->next = 0;
7031 // make sure pages point to the correct document state
7032 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7034 assert(page->allocator == other);
7036 page->allocator = doc;
7038 #ifdef PUGIXML_COMPACT
7039 // this automatically migrates most children between documents and prevents ->parent assignment from allocating
7040 if (page->compact_shared_parent == other)
7041 page->compact_shared_parent = doc;
7045 // move tree structure
7046 assert(!doc->first_child);
7048 doc->first_child = other_first_child;
7050 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7052 #ifdef PUGIXML_COMPACT
7053 // most children will have migrated when we reassigned compact_shared_parent
7054 assert(node->parent == other || node->parent == doc);
7058 assert(node->parent == other);
7063 // reset other document
7064 new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7069 #ifndef PUGIXML_NO_STL
7070 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7074 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7077 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7081 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7085 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7087 // Force native encoding (skip autodetection)
7088 #ifdef PUGIXML_WCHAR_MODE
7089 xml_encoding encoding = encoding_wchar;
7091 xml_encoding encoding = encoding_utf8;
7094 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7097 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7099 return load_string(contents, options);
7102 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7106 using impl::auto_deleter; // MSVC7 workaround
7107 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
7109 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7112 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7116 using impl::auto_deleter; // MSVC7 workaround
7117 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7119 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7122 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7126 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7129 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7133 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7136 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7140 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7143 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7145 impl::xml_buffered_writer buffered_writer(writer, encoding);
7147 if ((flags & format_write_bom) && encoding != encoding_latin1)
7149 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7150 #ifdef PUGIXML_WCHAR_MODE
7151 unsigned int bom = 0xfeff;
7152 buffered_writer.write(static_cast<wchar_t>(bom));
7154 buffered_writer.write('\xef', '\xbb', '\xbf');
7158 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7160 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7161 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7162 buffered_writer.write('?', '>');
7163 if (!(flags & format_raw)) buffered_writer.write('\n');
7166 impl::node_output(buffered_writer, _root, indent, flags, 0);
7168 buffered_writer.flush();
7171 #ifndef PUGIXML_NO_STL
7172 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7174 xml_writer_stream writer(stream);
7176 save(writer, indent, flags, encoding);
7179 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7181 xml_writer_stream writer(stream);
7183 save(writer, indent, flags, encoding_wchar);
7187 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7189 using impl::auto_deleter; // MSVC7 workaround
7190 auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7192 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7195 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7197 using impl::auto_deleter; // MSVC7 workaround
7198 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7200 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7203 PUGI__FN xml_node xml_document::document_element() const
7207 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7208 if (PUGI__NODETYPE(i) == node_element)
7214 #ifndef PUGIXML_NO_STL
7215 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7219 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7222 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7224 return impl::as_utf8_impl(str.c_str(), str.size());
7227 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7231 return impl::as_wide_impl(str, strlen(str));
7234 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7236 return impl::as_wide_impl(str.c_str(), str.size());
7240 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7242 impl::xml_memory::allocate = allocate;
7243 impl::xml_memory::deallocate = deallocate;
7246 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7248 return impl::xml_memory::allocate;
7251 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7253 return impl::xml_memory::deallocate;
7257 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7260 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7261 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7263 return std::bidirectional_iterator_tag();
7266 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7268 return std::bidirectional_iterator_tag();
7271 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7273 return std::bidirectional_iterator_tag();
7278 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7281 // Workarounds for (non-standard) iterator category detection
7282 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7284 return std::bidirectional_iterator_tag();
7287 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7289 return std::bidirectional_iterator_tag();
7292 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7294 return std::bidirectional_iterator_tag();
7299 #ifndef PUGIXML_NO_XPATH
7304 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7312 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7320 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7328 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7334 template <typename T> void swap(T& lhs, T& rhs)
7341 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7345 for (I it = begin + 1; it != end; ++it)
7346 if (pred(*it, *result))
7352 template <typename I> void reverse(I begin, I end)
7354 while (end - begin > 1) swap(*begin++, *--end);
7357 template <typename I> I unique(I begin, I end)
7360 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7362 if (begin == end) return begin;
7364 // last written element
7367 // merge unique elements
7368 while (begin != end)
7370 if (*begin != *write)
7371 *++write = *begin++;
7376 // past-the-end (write points to live element)
7380 template <typename T, typename Pred> void insertion_sort(T* begin, T* end, const Pred& pred)
7385 for (T* it = begin + 1; it != end; ++it)
7390 // move hole backwards
7391 while (hole > begin && pred(val, *(hole - 1)))
7393 *hole = *(hole - 1);
7397 // fill hole with element
7402 template <typename I, typename Pred> I median3(I first, I middle, I last, const Pred& pred)
7404 if (pred(*middle, *first)) swap(middle, first);
7405 if (pred(*last, *middle)) swap(last, middle);
7406 if (pred(*middle, *first)) swap(middle, first);
7411 template <typename T, typename Pred> void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7413 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7420 if (pred(*lt, pivot))
7422 else if (*lt == pivot)
7428 // we now have just 4 groups: = < >; move equal elements to the middle
7431 for (T* it = begin; it != eq; ++it)
7432 swap(*it, *--eqbeg);
7438 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7440 // sort large chunks
7441 while (end - begin > 16)
7443 // find median element
7444 I middle = begin + (end - begin) / 2;
7445 I median = median3(begin, middle, end - 1, pred);
7447 // partition in three chunks (< = >)
7449 partition3(begin, end, *median, pred, &eqbeg, &eqend);
7451 // loop on larger half
7452 if (eqbeg - begin > end - eqend)
7454 sort(eqend, end, pred);
7459 sort(begin, eqbeg, pred);
7464 // insertion sort small chunk
7465 insertion_sort(begin, end, pred);
7469 // Allocator used for AST and evaluation stacks
7471 static const size_t xpath_memory_page_size =
7472 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7473 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7479 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7481 struct xpath_memory_block
7483 xpath_memory_block* next;
7488 char data[xpath_memory_page_size];
7493 struct xpath_allocator
7495 xpath_memory_block* _root;
7499 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7503 void* allocate(size_t size)
7505 // round size up to block alignment boundary
7506 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7508 if (_root_size + size <= _root->capacity)
7510 void* buf = &_root->data[0] + _root_size;
7516 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7517 size_t block_capacity_base = sizeof(_root->data);
7518 size_t block_capacity_req = size + block_capacity_base / 4;
7519 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7521 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7523 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7526 if (_error) *_error = true;
7530 block->next = _root;
7531 block->capacity = block_capacity;
7540 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7542 // round size up to block alignment boundary
7543 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7544 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7546 // we can only reallocate the last object
7547 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7549 // try to reallocate the object inplace
7550 if (ptr && _root_size - old_size + new_size <= _root->capacity)
7552 _root_size = _root_size - old_size + new_size;
7556 // allocate a new block
7557 void* result = allocate(new_size);
7558 if (!result) return 0;
7560 // we have a new block
7563 // copy old data (we only support growing)
7564 assert(new_size >= old_size);
7565 memcpy(result, ptr, old_size);
7567 // free the previous page if it had no other objects
7568 assert(_root->data == result);
7569 assert(_root->next);
7571 if (_root->next->data == ptr)
7573 // deallocate the whole page, unless it was the first one
7574 xpath_memory_block* next = _root->next->next;
7578 xml_memory::deallocate(_root->next);
7587 void revert(const xpath_allocator& state)
7589 // free all new pages
7590 xpath_memory_block* cur = _root;
7592 while (cur != state._root)
7594 xpath_memory_block* next = cur->next;
7596 xml_memory::deallocate(cur);
7602 _root = state._root;
7603 _root_size = state._root_size;
7608 xpath_memory_block* cur = _root;
7613 xpath_memory_block* next = cur->next;
7615 xml_memory::deallocate(cur);
7622 struct xpath_allocator_capture
7624 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7628 ~xpath_allocator_capture()
7630 _target->revert(_state);
7633 xpath_allocator* _target;
7634 xpath_allocator _state;
7639 xpath_allocator* result;
7640 xpath_allocator* temp;
7643 struct xpath_stack_data
7645 xpath_memory_block blocks[2];
7646 xpath_allocator result;
7647 xpath_allocator temp;
7651 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7653 blocks[0].next = blocks[1].next = 0;
7654 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7656 stack.result = &result;
7672 const char_t* _buffer;
7674 size_t _length_heap;
7676 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7678 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7679 if (!result) return 0;
7681 memcpy(result, string, length * sizeof(char_t));
7687 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7692 static xpath_string from_const(const char_t* str)
7694 return xpath_string(str, false, 0);
7697 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7699 assert(begin <= end && *end == 0);
7701 return xpath_string(begin, true, static_cast<size_t>(end - begin));
7704 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7706 assert(begin <= end);
7709 return xpath_string();
7711 size_t length = static_cast<size_t>(end - begin);
7712 const char_t* data = duplicate_string(begin, length, alloc);
7714 return data ? xpath_string(data, true, length) : xpath_string();
7717 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7721 void append(const xpath_string& o, xpath_allocator* alloc)
7723 // skip empty sources
7724 if (!*o._buffer) return;
7726 // fast append for constant empty target and constant source
7727 if (!*_buffer && !_uses_heap && !o._uses_heap)
7729 _buffer = o._buffer;
7733 // need to make heap copy
7734 size_t target_length = length();
7735 size_t source_length = o.length();
7736 size_t result_length = target_length + source_length;
7738 // allocate new buffer
7739 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7740 if (!result) return;
7742 // append first string to the new buffer in case there was no reallocation
7743 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7745 // append second string to the new buffer
7746 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7747 result[result_length] = 0;
7752 _length_heap = result_length;
7756 const char_t* c_str() const
7761 size_t length() const
7763 return _uses_heap ? _length_heap : strlength(_buffer);
7766 char_t* data(xpath_allocator* alloc)
7768 // make private heap copy
7771 size_t length_ = strlength(_buffer);
7772 const char_t* data_ = duplicate_string(_buffer, length_, alloc);
7774 if (!data_) return 0;
7778 _length_heap = length_;
7781 return const_cast<char_t*>(_buffer);
7786 return *_buffer == 0;
7789 bool operator==(const xpath_string& o) const
7791 return strequal(_buffer, o._buffer);
7794 bool operator!=(const xpath_string& o) const
7796 return !strequal(_buffer, o._buffer);
7799 bool uses_heap() const
7807 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7809 while (*pattern && *string == *pattern)
7815 return *pattern == 0;
7818 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7820 #ifdef PUGIXML_WCHAR_MODE
7821 return wcschr(s, c);
7823 return strchr(s, c);
7827 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7829 #ifdef PUGIXML_WCHAR_MODE
7830 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7831 return (*p == 0) ? s : wcsstr(s, p);
7833 return strstr(s, p);
7837 // Converts symbol to lower case, if it is an ASCII one
7838 PUGI__FN char_t tolower_ascii(char_t ch)
7840 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7843 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7846 return xpath_string::from_const(na.attribute().value());
7849 xml_node n = na.node();
7857 return xpath_string::from_const(n.value());
7862 xpath_string result;
7864 // element nodes can have value if parse_embed_pcdata was used
7866 result.append(xpath_string::from_const(n.value()), alloc);
7868 xml_node cur = n.first_child();
7870 while (cur && cur != n)
7872 if (cur.type() == node_pcdata || cur.type() == node_cdata)
7873 result.append(xpath_string::from_const(cur.value()), alloc);
7875 if (cur.first_child())
7876 cur = cur.first_child();
7877 else if (cur.next_sibling())
7878 cur = cur.next_sibling();
7881 while (!cur.next_sibling() && cur != n)
7884 if (cur != n) cur = cur.next_sibling();
7892 return xpath_string();
7897 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7899 assert(ln->parent == rn->parent);
7901 // there is no common ancestor (the shared parent is null), nodes are from different documents
7902 if (!ln->parent) return ln < rn;
7904 // determine sibling order
7905 xml_node_struct* ls = ln;
7906 xml_node_struct* rs = rn;
7910 if (ls == rn) return true;
7911 if (rs == ln) return false;
7913 ls = ls->next_sibling;
7914 rs = rs->next_sibling;
7917 // if rn sibling chain ended ln must be before rn
7921 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7923 // find common ancestor at the same depth, if any
7924 xml_node_struct* lp = ln;
7925 xml_node_struct* rp = rn;
7927 while (lp && rp && lp->parent != rp->parent)
7933 // parents are the same!
7934 if (lp && rp) return node_is_before_sibling(lp, rp);
7936 // nodes are at different depths, need to normalize heights
7937 bool left_higher = !lp;
7951 // one node is the ancestor of the other
7952 if (ln == rn) return left_higher;
7954 // find common ancestor... again
7955 while (ln->parent != rn->parent)
7961 return node_is_before_sibling(ln, rn);
7964 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7966 while (node && node != parent) node = node->parent;
7968 return parent && node == parent;
7971 PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7973 xml_node_struct* node = xnode.node().internal_object();
7977 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7979 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7980 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7986 xml_attribute_struct* attr = xnode.attribute().internal_object();
7990 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7992 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7993 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
8002 struct document_order_comparator
8004 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8006 // optimized document order based check
8007 const void* lo = document_buffer_order(lhs);
8008 const void* ro = document_buffer_order(rhs);
8010 if (lo && ro) return lo < ro;
8013 xml_node ln = lhs.node(), rn = rhs.node();
8015 // compare attributes
8016 if (lhs.attribute() && rhs.attribute())
8019 if (lhs.parent() == rhs.parent())
8021 // determine sibling order
8022 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8023 if (a == rhs.attribute())
8029 // compare attribute parents
8033 else if (lhs.attribute())
8035 // attributes go after the parent element
8036 if (lhs.parent() == rhs.node()) return false;
8040 else if (rhs.attribute())
8042 // attributes go after the parent element
8043 if (rhs.parent() == lhs.node()) return true;
8048 if (ln == rn) return false;
8050 if (!ln || !rn) return ln < rn;
8052 return node_is_before(ln.internal_object(), rn.internal_object());
8056 struct duplicate_comparator
8058 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8060 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
8061 else return rhs.attribute() ? false : lhs.node() < rhs.node();
8065 PUGI__FN double gen_nan()
8067 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8068 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8069 typedef uint32_t UI; // BCC5 workaround
8070 union { float f; UI i; } u;
8075 const volatile double zero = 0.0;
8080 PUGI__FN bool is_nan(double value)
8082 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8083 return !!_isnan(value);
8084 #elif defined(fpclassify) && defined(FP_NAN)
8085 return fpclassify(value) == FP_NAN;
8088 const volatile double v = value;
8093 PUGI__FN const char_t* convert_number_to_string_special(double value)
8095 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8096 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8097 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8098 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8099 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8100 switch (fpclassify(value))
8103 return PUGIXML_TEXT("NaN");
8106 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8109 return PUGIXML_TEXT("0");
8116 const volatile double v = value;
8118 if (v == 0) return PUGIXML_TEXT("0");
8119 if (v != v) return PUGIXML_TEXT("NaN");
8120 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8125 PUGI__FN bool convert_number_to_boolean(double value)
8127 return (value != 0 && !is_nan(value));
8130 PUGI__FN void truncate_zeros(char* begin, char* end)
8132 while (begin != end && end[-1] == '0') end--;
8137 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8138 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
8139 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8143 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8145 // truncate redundant zeros
8146 truncate_zeros(buffer, buffer + strlen(buffer));
8149 *out_mantissa = buffer;
8150 *out_exponent = exponent;
8153 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8155 // get a scientific notation value with IEEE DBL_DIG decimals
8156 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8158 // get the exponent (possibly negative)
8159 char* exponent_string = strchr(buffer, 'e');
8160 assert(exponent_string);
8162 int exponent = atoi(exponent_string + 1);
8164 // extract mantissa string: skip sign
8165 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8166 assert(mantissa[0] != '0' && mantissa[1] == '.');
8168 // divide mantissa by 10 to eliminate integer part
8169 mantissa[1] = mantissa[0];
8173 // remove extra mantissa digits and zero-terminate mantissa
8174 truncate_zeros(mantissa, exponent_string);
8177 *out_mantissa = mantissa;
8178 *out_exponent = exponent;
8182 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8184 // try special number conversion
8185 const char_t* special = convert_number_to_string_special(value);
8186 if (special) return xpath_string::from_const(special);
8188 // get mantissa + exponent form
8189 char mantissa_buffer[32];
8193 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8195 // allocate a buffer of suitable length for the number
8196 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8197 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8198 if (!result) return xpath_string();
8204 if (value < 0) *s++ = '-';
8213 while (exponent > 0)
8215 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8216 *s++ = *mantissa ? *mantissa++ : '0';
8227 // extra zeroes from negative exponent
8228 while (exponent < 0)
8234 // extra mantissa digits
8237 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8243 assert(s < result + result_size);
8246 return xpath_string::from_heap_preallocated(result, s);
8249 PUGI__FN bool check_string_to_number_format(const char_t* string)
8251 // parse leading whitespace
8252 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8255 if (*string == '-') ++string;
8257 if (!*string) return false;
8259 // if there is no integer part, there should be a decimal part with at least one digit
8260 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8262 // parse integer part
8263 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8265 // parse decimal part
8270 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8273 // parse trailing whitespace
8274 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8276 return *string == 0;
8279 PUGI__FN double convert_string_to_number(const char_t* string)
8281 // check string format
8282 if (!check_string_to_number_format(string)) return gen_nan();
8285 #ifdef PUGIXML_WCHAR_MODE
8286 return wcstod(string, 0);
8288 return strtod(string, 0);
8292 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8294 size_t length = static_cast<size_t>(end - begin);
8295 char_t* scratch = buffer;
8297 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8299 // need to make dummy on-heap copy
8300 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8301 if (!scratch) return false;
8304 // copy string to zero-terminated buffer and perform conversion
8305 memcpy(scratch, begin, length * sizeof(char_t));
8306 scratch[length] = 0;
8308 *out_result = convert_string_to_number(scratch);
8310 // free dummy buffer
8311 if (scratch != buffer) xml_memory::deallocate(scratch);
8316 PUGI__FN double round_nearest(double value)
8318 return floor(value + 0.5);
8321 PUGI__FN double round_nearest_nzero(double value)
8323 // same as round_nearest, but returns -0 for [-0.5, -0]
8324 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8325 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8328 PUGI__FN const char_t* qualified_name(const xpath_node& node)
8330 return node.attribute() ? node.attribute().name() : node.node().name();
8333 PUGI__FN const char_t* local_name(const xpath_node& node)
8335 const char_t* name = qualified_name(node);
8336 const char_t* p = find_char(name, ':');
8338 return p ? p + 1 : name;
8341 struct namespace_uri_predicate
8343 const char_t* prefix;
8344 size_t prefix_length;
8346 namespace_uri_predicate(const char_t* name)
8348 const char_t* pos = find_char(name, ':');
8350 prefix = pos ? name : 0;
8351 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8354 bool operator()(xml_attribute a) const
8356 const char_t* name = a.name();
8358 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8360 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8364 PUGI__FN const char_t* namespace_uri(xml_node node)
8366 namespace_uri_predicate pred = node.name();
8372 xml_attribute a = p.find_attribute(pred);
8374 if (a) return a.value();
8379 return PUGIXML_TEXT("");
8382 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8384 namespace_uri_predicate pred = attr.name();
8386 // Default namespace does not apply to attributes
8387 if (!pred.prefix) return PUGIXML_TEXT("");
8389 xml_node p = parent;
8393 xml_attribute a = p.find_attribute(pred);
8395 if (a) return a.value();
8400 return PUGIXML_TEXT("");
8403 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8405 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8408 PUGI__FN char_t* normalize_space(char_t* buffer)
8410 char_t* write = buffer;
8412 for (char_t* it = buffer; *it; )
8416 if (PUGI__IS_CHARTYPE(ch, ct_space))
8418 // replace whitespace sequence with single space
8419 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8421 // avoid leading spaces
8422 if (write != buffer) *write++ = ' ';
8427 // remove trailing space
8428 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8436 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8438 char_t* write = buffer;
8442 PUGI__DMC_VOLATILE char_t ch = *buffer++;
8444 const char_t* pos = find_char(from, ch);
8447 *write++ = ch; // do not process
8448 else if (static_cast<size_t>(pos - from) < to_length)
8449 *write++ = to[pos - from]; // replace
8458 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8460 unsigned char table[128] = {0};
8464 unsigned int fc = static_cast<unsigned int>(*from);
8465 unsigned int tc = static_cast<unsigned int>(*to);
8467 if (fc >= 128 || tc >= 128)
8470 // code=128 means "skip character"
8472 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8478 for (int i = 0; i < 128; ++i)
8480 table[i] = static_cast<unsigned char>(i);
8482 void* result = alloc->allocate(sizeof(table));
8483 if (!result) return 0;
8485 memcpy(result, table, sizeof(table));
8487 return static_cast<unsigned char*>(result);
8490 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8492 char_t* write = buffer;
8496 char_t ch = *buffer++;
8497 unsigned int index = static_cast<unsigned int>(ch);
8501 unsigned char code = table[index];
8503 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8504 // this code skips these characters without extra branches
8505 *write = static_cast<char_t>(code);
8506 write += 1 - (code >> 7);
8520 inline bool is_xpath_attribute(const char_t* name)
8522 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8525 struct xpath_variable_boolean: xpath_variable
8527 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8535 struct xpath_variable_number: xpath_variable
8537 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8545 struct xpath_variable_string: xpath_variable
8547 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8551 ~xpath_variable_string()
8553 if (value) xml_memory::deallocate(value);
8560 struct xpath_variable_node_set: xpath_variable
8562 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8566 xpath_node_set value;
8570 static const xpath_node_set dummy_node_set;
8572 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8574 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8575 unsigned int result = 0;
8579 result += static_cast<unsigned int>(*str++);
8580 result += result << 10;
8581 result ^= result >> 6;
8584 result += result << 3;
8585 result ^= result >> 11;
8586 result += result << 15;
8591 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8593 size_t length = strlength(name);
8594 if (length == 0) return 0; // empty variable names are invalid
8596 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8597 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8598 if (!memory) return 0;
8600 T* result = new (memory) T();
8602 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8607 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8611 case xpath_type_node_set:
8612 return new_xpath_variable<xpath_variable_node_set>(name);
8614 case xpath_type_number:
8615 return new_xpath_variable<xpath_variable_number>(name);
8617 case xpath_type_string:
8618 return new_xpath_variable<xpath_variable_string>(name);
8620 case xpath_type_boolean:
8621 return new_xpath_variable<xpath_variable_boolean>(name);
8628 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8631 xml_memory::deallocate(var);
8634 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8638 case xpath_type_node_set:
8639 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8642 case xpath_type_number:
8643 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8646 case xpath_type_string:
8647 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8650 case xpath_type_boolean:
8651 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8655 assert(false && "Invalid variable type"); // unreachable
8659 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8661 switch (rhs->type())
8663 case xpath_type_node_set:
8664 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8666 case xpath_type_number:
8667 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8669 case xpath_type_string:
8670 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8672 case xpath_type_boolean:
8673 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8676 assert(false && "Invalid variable type"); // unreachable
8681 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8683 size_t length = static_cast<size_t>(end - begin);
8684 char_t* scratch = buffer;
8686 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8688 // need to make dummy on-heap copy
8689 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8690 if (!scratch) return false;
8693 // copy string to zero-terminated buffer and perform lookup
8694 memcpy(scratch, begin, length * sizeof(char_t));
8695 scratch[length] = 0;
8697 *out_result = set->get(scratch);
8699 // free dummy buffer
8700 if (scratch != buffer) xml_memory::deallocate(scratch);
8706 // Internal node set class
8708 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8710 if (end - begin < 2)
8711 return xpath_node_set::type_sorted;
8713 document_order_comparator cmp;
8715 bool first = cmp(begin[0], begin[1]);
8717 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8718 if (cmp(it[0], it[1]) != first)
8719 return xpath_node_set::type_unsorted;
8721 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8724 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8726 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8728 if (type == xpath_node_set::type_unsorted)
8730 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8732 if (sorted == xpath_node_set::type_unsorted)
8734 sort(begin, end, document_order_comparator());
8736 type = xpath_node_set::type_sorted;
8742 if (type != order) reverse(begin, end);
8747 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8749 if (begin == end) return xpath_node();
8753 case xpath_node_set::type_sorted:
8756 case xpath_node_set::type_sorted_reverse:
8759 case xpath_node_set::type_unsorted:
8760 return *min_element(begin, end, document_order_comparator());
8763 assert(false && "Invalid node set type"); // unreachable
8764 return xpath_node();
8768 class xpath_node_set_raw
8770 xpath_node_set::type_t _type;
8777 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8781 xpath_node* begin() const
8786 xpath_node* end() const
8793 return _begin == _end;
8798 return static_cast<size_t>(_end - _begin);
8801 xpath_node first() const
8803 return xpath_first(_begin, _end, _type);
8806 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8808 void push_back(const xpath_node& node, xpath_allocator* alloc)
8813 push_back_grow(node, alloc);
8816 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8818 if (begin_ == end_) return;
8820 size_t size_ = static_cast<size_t>(_end - _begin);
8821 size_t capacity = static_cast<size_t>(_eos - _begin);
8822 size_t count = static_cast<size_t>(end_ - begin_);
8824 if (size_ + count > capacity)
8826 // reallocate the old array or allocate a new one
8827 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8832 _end = data + size_;
8833 _eos = data + size_ + count;
8836 memcpy(_end, begin_, count * sizeof(xpath_node));
8842 _type = xpath_sort(_begin, _end, _type, false);
8845 void truncate(xpath_node* pos)
8847 assert(_begin <= pos && pos <= _end);
8852 void remove_duplicates()
8854 if (_type == xpath_node_set::type_unsorted)
8855 sort(_begin, _end, duplicate_comparator());
8857 _end = unique(_begin, _end);
8860 xpath_node_set::type_t type() const
8865 void set_type(xpath_node_set::type_t value)
8871 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8873 size_t capacity = static_cast<size_t>(_eos - _begin);
8875 // get new capacity (1.5x rule)
8876 size_t new_capacity = capacity + capacity / 2 + 1;
8878 // reallocate the old array or allocate a new one
8879 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8884 _end = data + capacity;
8885 _eos = data + new_capacity;
8893 struct xpath_context
8896 size_t position, size;
8898 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8911 lex_greater_or_equal,
8923 lex_open_square_brace,
8924 lex_close_square_brace,
8934 struct xpath_lexer_string
8936 const char_t* begin;
8939 xpath_lexer_string(): begin(0), end(0)
8943 bool operator==(const char_t* other) const
8945 size_t length = static_cast<size_t>(end - begin);
8947 return strequalrange(other, begin, length);
8954 const char_t* _cur_lexeme_pos;
8955 xpath_lexer_string _cur_lexeme_contents;
8957 lexeme_t _cur_lexeme;
8960 explicit xpath_lexer(const char_t* query): _cur(query)
8965 const char_t* state() const
8972 const char_t* cur = _cur;
8974 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8976 // save lexeme position for error reporting
8977 _cur_lexeme_pos = cur;
8982 _cur_lexeme = lex_eof;
8986 if (*(cur+1) == '=')
8989 _cur_lexeme = lex_greater_or_equal;
8994 _cur_lexeme = lex_greater;
8999 if (*(cur+1) == '=')
9002 _cur_lexeme = lex_less_or_equal;
9007 _cur_lexeme = lex_less;
9012 if (*(cur+1) == '=')
9015 _cur_lexeme = lex_not_equal;
9019 _cur_lexeme = lex_none;
9025 _cur_lexeme = lex_equal;
9031 _cur_lexeme = lex_plus;
9037 _cur_lexeme = lex_minus;
9043 _cur_lexeme = lex_multiply;
9049 _cur_lexeme = lex_union;
9056 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9058 _cur_lexeme_contents.begin = cur;
9060 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9062 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9066 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9069 _cur_lexeme_contents.end = cur;
9071 _cur_lexeme = lex_var_ref;
9075 _cur_lexeme = lex_none;
9082 _cur_lexeme = lex_open_brace;
9088 _cur_lexeme = lex_close_brace;
9094 _cur_lexeme = lex_open_square_brace;
9100 _cur_lexeme = lex_close_square_brace;
9106 _cur_lexeme = lex_comma;
9111 if (*(cur+1) == '/')
9114 _cur_lexeme = lex_double_slash;
9119 _cur_lexeme = lex_slash;
9124 if (*(cur+1) == '.')
9127 _cur_lexeme = lex_double_dot;
9129 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9131 _cur_lexeme_contents.begin = cur; // .
9135 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9137 _cur_lexeme_contents.end = cur;
9139 _cur_lexeme = lex_number;
9144 _cur_lexeme = lex_dot;
9150 _cur_lexeme = lex_axis_attribute;
9157 char_t terminator = *cur;
9161 _cur_lexeme_contents.begin = cur;
9162 while (*cur && *cur != terminator) cur++;
9163 _cur_lexeme_contents.end = cur;
9166 _cur_lexeme = lex_none;
9170 _cur_lexeme = lex_quoted_string;
9177 if (*(cur+1) == ':')
9180 _cur_lexeme = lex_double_colon;
9184 _cur_lexeme = lex_none;
9189 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9191 _cur_lexeme_contents.begin = cur;
9193 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9199 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9202 _cur_lexeme_contents.end = cur;
9204 _cur_lexeme = lex_number;
9206 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9208 _cur_lexeme_contents.begin = cur;
9210 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9214 if (cur[1] == '*') // namespace test ncname:*
9218 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9222 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9226 _cur_lexeme_contents.end = cur;
9228 _cur_lexeme = lex_string;
9232 _cur_lexeme = lex_none;
9239 lexeme_t current() const
9244 const char_t* current_pos() const
9246 return _cur_lexeme_pos;
9249 const xpath_lexer_string& contents() const
9251 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9253 return _cur_lexeme_contents;
9260 ast_op_or, // left or right
9261 ast_op_and, // left and right
9262 ast_op_equal, // left = right
9263 ast_op_not_equal, // left != right
9264 ast_op_less, // left < right
9265 ast_op_greater, // left > right
9266 ast_op_less_or_equal, // left <= right
9267 ast_op_greater_or_equal, // left >= right
9268 ast_op_add, // left + right
9269 ast_op_subtract, // left - right
9270 ast_op_multiply, // left * right
9271 ast_op_divide, // left / right
9272 ast_op_mod, // left % right
9273 ast_op_negate, // left - right
9274 ast_op_union, // left | right
9275 ast_predicate, // apply predicate to set; next points to next predicate
9276 ast_filter, // select * from left where right
9277 ast_string_constant, // string constant
9278 ast_number_constant, // number constant
9279 ast_variable, // variable
9280 ast_func_last, // last()
9281 ast_func_position, // position()
9282 ast_func_count, // count(left)
9283 ast_func_id, // id(left)
9284 ast_func_local_name_0, // local-name()
9285 ast_func_local_name_1, // local-name(left)
9286 ast_func_namespace_uri_0, // namespace-uri()
9287 ast_func_namespace_uri_1, // namespace-uri(left)
9288 ast_func_name_0, // name()
9289 ast_func_name_1, // name(left)
9290 ast_func_string_0, // string()
9291 ast_func_string_1, // string(left)
9292 ast_func_concat, // concat(left, right, siblings)
9293 ast_func_starts_with, // starts_with(left, right)
9294 ast_func_contains, // contains(left, right)
9295 ast_func_substring_before, // substring-before(left, right)
9296 ast_func_substring_after, // substring-after(left, right)
9297 ast_func_substring_2, // substring(left, right)
9298 ast_func_substring_3, // substring(left, right, third)
9299 ast_func_string_length_0, // string-length()
9300 ast_func_string_length_1, // string-length(left)
9301 ast_func_normalize_space_0, // normalize-space()
9302 ast_func_normalize_space_1, // normalize-space(left)
9303 ast_func_translate, // translate(left, right, third)
9304 ast_func_boolean, // boolean(left)
9305 ast_func_not, // not(left)
9306 ast_func_true, // true()
9307 ast_func_false, // false()
9308 ast_func_lang, // lang(left)
9309 ast_func_number_0, // number()
9310 ast_func_number_1, // number(left)
9311 ast_func_sum, // sum(left)
9312 ast_func_floor, // floor(left)
9313 ast_func_ceiling, // ceiling(left)
9314 ast_func_round, // round(left)
9315 ast_step, // process set left with step
9316 ast_step_root, // select root node
9318 ast_opt_translate_table, // translate(left, right, third) where right/third are constants
9319 ast_opt_compare_attribute // @name = 'string'
9325 axis_ancestor_or_self,
9329 axis_descendant_or_self,
9331 axis_following_sibling,
9335 axis_preceding_sibling,
9344 nodetest_type_comment,
9349 nodetest_all_in_namespace
9357 predicate_constant_one
9367 template <axis_t N> struct axis_to_type
9369 static const axis_t axis;
9372 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9374 class xpath_ast_node
9384 // for ast_step/ast_predicate/ast_filter
9387 // tree node structure
9388 xpath_ast_node* _left;
9389 xpath_ast_node* _right;
9390 xpath_ast_node* _next;
9394 // value for ast_string_constant
9395 const char_t* string;
9396 // value for ast_number_constant
9398 // variable for ast_variable
9399 xpath_variable* variable;
9400 // node test for ast_step (node name/namespace/node type/pi target)
9401 const char_t* nodetest;
9402 // table for ast_opt_translate_table
9403 const unsigned char* table;
9406 xpath_ast_node(const xpath_ast_node&);
9407 xpath_ast_node& operator=(const xpath_ast_node&);
9409 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9411 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9413 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9415 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9416 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9417 else if (lt == xpath_type_number || rt == xpath_type_number)
9418 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9419 else if (lt == xpath_type_string || rt == xpath_type_string)
9421 xpath_allocator_capture cr(stack.result);
9423 xpath_string ls = lhs->eval_string(c, stack);
9424 xpath_string rs = rhs->eval_string(c, stack);
9426 return comp(ls, rs);
9429 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9431 xpath_allocator_capture cr(stack.result);
9433 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9434 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9436 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9437 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9439 xpath_allocator_capture cri(stack.result);
9441 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9449 if (lt == xpath_type_node_set)
9455 if (lt == xpath_type_boolean)
9456 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9457 else if (lt == xpath_type_number)
9459 xpath_allocator_capture cr(stack.result);
9461 double l = lhs->eval_number(c, stack);
9462 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9464 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9466 xpath_allocator_capture cri(stack.result);
9468 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9474 else if (lt == xpath_type_string)
9476 xpath_allocator_capture cr(stack.result);
9478 xpath_string l = lhs->eval_string(c, stack);
9479 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9481 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9483 xpath_allocator_capture cri(stack.result);
9485 if (comp(l, string_value(*ri, stack.result)))
9493 assert(false && "Wrong types"); // unreachable
9497 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9499 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9502 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9504 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9506 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9507 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9508 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9510 xpath_allocator_capture cr(stack.result);
9512 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9513 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9515 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9517 xpath_allocator_capture cri(stack.result);
9519 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9521 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9523 xpath_allocator_capture crii(stack.result);
9525 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9532 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9534 xpath_allocator_capture cr(stack.result);
9536 double l = lhs->eval_number(c, stack);
9537 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9539 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9541 xpath_allocator_capture cri(stack.result);
9543 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9549 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9551 xpath_allocator_capture cr(stack.result);
9553 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9554 double r = rhs->eval_number(c, stack);
9556 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9558 xpath_allocator_capture cri(stack.result);
9560 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9568 assert(false && "Wrong types"); // unreachable
9573 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9575 assert(ns.size() >= first);
9576 assert(expr->rettype() != xpath_type_number);
9579 size_t size = ns.size() - first;
9581 xpath_node* last = ns.begin() + first;
9583 // remove_if... or well, sort of
9584 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9586 xpath_context c(*it, i, size);
9588 if (expr->eval_boolean(c, stack))
9599 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9601 assert(ns.size() >= first);
9602 assert(expr->rettype() == xpath_type_number);
9605 size_t size = ns.size() - first;
9607 xpath_node* last = ns.begin() + first;
9609 // remove_if... or well, sort of
9610 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9612 xpath_context c(*it, i, size);
9614 if (expr->eval_number(c, stack) == i)
9625 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9627 assert(ns.size() >= first);
9628 assert(expr->rettype() == xpath_type_number);
9630 size_t size = ns.size() - first;
9632 xpath_node* last = ns.begin() + first;
9634 xpath_context c(xpath_node(), 1, size);
9636 double er = expr->eval_number(c, stack);
9638 if (er >= 1.0 && er <= size)
9640 size_t eri = static_cast<size_t>(er);
9644 xpath_node r = last[eri - 1];
9653 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9655 if (ns.size() == first) return;
9657 assert(_type == ast_filter || _type == ast_predicate);
9659 if (_test == predicate_constant || _test == predicate_constant_one)
9660 apply_predicate_number_const(ns, first, _right, stack);
9661 else if (_right->rettype() == xpath_type_number)
9662 apply_predicate_number(ns, first, _right, stack, once);
9664 apply_predicate_boolean(ns, first, _right, stack, once);
9667 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9669 if (ns.size() == first) return;
9671 bool last_once = eval_once(ns.type(), eval);
9673 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9674 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9677 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9681 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9686 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9688 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9693 case nodetest_type_node:
9695 if (is_xpath_attribute(name))
9697 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9702 case nodetest_all_in_namespace:
9703 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9705 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9717 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9721 xml_node_type type = PUGI__NODETYPE(n);
9726 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9728 ns.push_back(xml_node(n), alloc);
9733 case nodetest_type_node:
9734 ns.push_back(xml_node(n), alloc);
9737 case nodetest_type_comment:
9738 if (type == node_comment)
9740 ns.push_back(xml_node(n), alloc);
9745 case nodetest_type_text:
9746 if (type == node_pcdata || type == node_cdata)
9748 ns.push_back(xml_node(n), alloc);
9753 case nodetest_type_pi:
9754 if (type == node_pi)
9756 ns.push_back(xml_node(n), alloc);
9762 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9764 ns.push_back(xml_node(n), alloc);
9770 if (type == node_element)
9772 ns.push_back(xml_node(n), alloc);
9777 case nodetest_all_in_namespace:
9778 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9780 ns.push_back(xml_node(n), alloc);
9786 assert(false && "Unknown axis"); // unreachable
9792 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9794 const axis_t axis = T::axis;
9798 case axis_attribute:
9800 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9801 if (step_push(ns, a, n, alloc) & once)
9809 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9810 if (step_push(ns, c, alloc) & once)
9816 case axis_descendant:
9817 case axis_descendant_or_self:
9819 if (axis == axis_descendant_or_self)
9820 if (step_push(ns, n, alloc) & once)
9823 xml_node_struct* cur = n->first_child;
9827 if (step_push(ns, cur, alloc) & once)
9830 if (cur->first_child)
9831 cur = cur->first_child;
9834 while (!cur->next_sibling)
9838 if (cur == n) return;
9841 cur = cur->next_sibling;
9848 case axis_following_sibling:
9850 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9851 if (step_push(ns, c, alloc) & once)
9857 case axis_preceding_sibling:
9859 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9860 if (step_push(ns, c, alloc) & once)
9866 case axis_following:
9868 xml_node_struct* cur = n;
9870 // exit from this node so that we don't include descendants
9871 while (!cur->next_sibling)
9878 cur = cur->next_sibling;
9882 if (step_push(ns, cur, alloc) & once)
9885 if (cur->first_child)
9886 cur = cur->first_child;
9889 while (!cur->next_sibling)
9896 cur = cur->next_sibling;
9903 case axis_preceding:
9905 xml_node_struct* cur = n;
9907 // exit from this node so that we don't include descendants
9908 while (!cur->prev_sibling_c->next_sibling)
9915 cur = cur->prev_sibling_c;
9919 if (cur->first_child)
9920 cur = cur->first_child->prev_sibling_c;
9923 // leaf node, can't be ancestor
9924 if (step_push(ns, cur, alloc) & once)
9927 while (!cur->prev_sibling_c->next_sibling)
9933 if (!node_is_ancestor(cur, n))
9934 if (step_push(ns, cur, alloc) & once)
9938 cur = cur->prev_sibling_c;
9946 case axis_ancestor_or_self:
9948 if (axis == axis_ancestor_or_self)
9949 if (step_push(ns, n, alloc) & once)
9952 xml_node_struct* cur = n->parent;
9956 if (step_push(ns, cur, alloc) & once)
9967 step_push(ns, n, alloc);
9975 step_push(ns, n->parent, alloc);
9981 assert(false && "Unimplemented axis"); // unreachable
9985 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9987 const axis_t axis = T::axis;
9992 case axis_ancestor_or_self:
9994 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9995 if (step_push(ns, a, p, alloc) & once)
9998 xml_node_struct* cur = p;
10002 if (step_push(ns, cur, alloc) & once)
10011 case axis_descendant_or_self:
10014 if (_test == nodetest_type_node) // reject attributes based on principal node type test
10015 step_push(ns, a, p, alloc);
10020 case axis_following:
10022 xml_node_struct* cur = p;
10026 if (cur->first_child)
10027 cur = cur->first_child;
10030 while (!cur->next_sibling)
10037 cur = cur->next_sibling;
10040 if (step_push(ns, cur, alloc) & once)
10049 step_push(ns, p, alloc);
10054 case axis_preceding:
10056 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10057 step_fill(ns, p, alloc, once, v);
10062 assert(false && "Unimplemented axis"); // unreachable
10066 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10068 const axis_t axis = T::axis;
10069 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10072 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10073 else if (axis_has_attributes && xn.attribute() && xn.parent())
10074 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10077 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10079 const axis_t axis = T::axis;
10080 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10081 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10084 (axis == axis_attribute && _test == nodetest_name) ||
10085 (!_right && eval_once(axis_type, eval)) ||
10086 (_right && !_right->_next && _right->_test == predicate_constant_one);
10088 xpath_node_set_raw ns;
10089 ns.set_type(axis_type);
10093 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10095 // self axis preserves the original order
10096 if (axis == axis_self) ns.set_type(s.type());
10098 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10100 size_t size = ns.size();
10102 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10103 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10105 step_fill(ns, *it, stack.result, once, v);
10106 if (_right) apply_predicates(ns, size, stack, eval);
10111 step_fill(ns, c.n, stack.result, once, v);
10112 if (_right) apply_predicates(ns, 0, stack, eval);
10115 // child, attribute and self axes always generate unique set of nodes
10116 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10117 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10118 ns.remove_duplicates();
10124 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10125 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10127 assert(type == ast_string_constant);
10128 _data.string = value;
10131 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10132 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10134 assert(type == ast_number_constant);
10135 _data.number = value;
10138 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10139 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10141 assert(type == ast_variable);
10142 _data.variable = value;
10145 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10146 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10150 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10151 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10153 assert(type == ast_step);
10154 _data.nodetest = contents;
10157 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10158 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10160 assert(type == ast_filter || type == ast_predicate);
10163 void set_next(xpath_ast_node* value)
10168 void set_right(xpath_ast_node* value)
10173 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10178 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10181 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10184 return compare_eq(_left, _right, c, stack, equal_to());
10186 case ast_op_not_equal:
10187 return compare_eq(_left, _right, c, stack, not_equal_to());
10190 return compare_rel(_left, _right, c, stack, less());
10192 case ast_op_greater:
10193 return compare_rel(_right, _left, c, stack, less());
10195 case ast_op_less_or_equal:
10196 return compare_rel(_left, _right, c, stack, less_equal());
10198 case ast_op_greater_or_equal:
10199 return compare_rel(_right, _left, c, stack, less_equal());
10201 case ast_func_starts_with:
10203 xpath_allocator_capture cr(stack.result);
10205 xpath_string lr = _left->eval_string(c, stack);
10206 xpath_string rr = _right->eval_string(c, stack);
10208 return starts_with(lr.c_str(), rr.c_str());
10211 case ast_func_contains:
10213 xpath_allocator_capture cr(stack.result);
10215 xpath_string lr = _left->eval_string(c, stack);
10216 xpath_string rr = _right->eval_string(c, stack);
10218 return find_substring(lr.c_str(), rr.c_str()) != 0;
10221 case ast_func_boolean:
10222 return _left->eval_boolean(c, stack);
10225 return !_left->eval_boolean(c, stack);
10227 case ast_func_true:
10230 case ast_func_false:
10233 case ast_func_lang:
10235 if (c.n.attribute()) return false;
10237 xpath_allocator_capture cr(stack.result);
10239 xpath_string lang = _left->eval_string(c, stack);
10241 for (xml_node n = c.n.node(); n; n = n.parent())
10243 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10247 const char_t* value = a.value();
10249 // strnicmp / strncasecmp is not portable
10250 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10252 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10256 return *value == 0 || *value == '-';
10263 case ast_opt_compare_attribute:
10265 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10267 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10269 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10274 assert(_rettype == _data.variable->type());
10276 if (_rettype == xpath_type_boolean)
10277 return _data.variable->get_boolean();
10285 case xpath_type_number:
10286 return convert_number_to_boolean(eval_number(c, stack));
10288 case xpath_type_string:
10290 xpath_allocator_capture cr(stack.result);
10292 return !eval_string(c, stack).empty();
10295 case xpath_type_node_set:
10297 xpath_allocator_capture cr(stack.result);
10299 return !eval_node_set(c, stack, nodeset_eval_any).empty();
10303 assert(false && "Wrong expression for return type boolean"); // unreachable
10310 double eval_number(const xpath_context& c, const xpath_stack& stack)
10315 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10317 case ast_op_subtract:
10318 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10320 case ast_op_multiply:
10321 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10323 case ast_op_divide:
10324 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10327 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10329 case ast_op_negate:
10330 return -_left->eval_number(c, stack);
10332 case ast_number_constant:
10333 return _data.number;
10335 case ast_func_last:
10336 return static_cast<double>(c.size);
10338 case ast_func_position:
10339 return static_cast<double>(c.position);
10341 case ast_func_count:
10343 xpath_allocator_capture cr(stack.result);
10345 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10348 case ast_func_string_length_0:
10350 xpath_allocator_capture cr(stack.result);
10352 return static_cast<double>(string_value(c.n, stack.result).length());
10355 case ast_func_string_length_1:
10357 xpath_allocator_capture cr(stack.result);
10359 return static_cast<double>(_left->eval_string(c, stack).length());
10362 case ast_func_number_0:
10364 xpath_allocator_capture cr(stack.result);
10366 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10369 case ast_func_number_1:
10370 return _left->eval_number(c, stack);
10374 xpath_allocator_capture cr(stack.result);
10378 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10380 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10382 xpath_allocator_capture cri(stack.result);
10384 r += convert_string_to_number(string_value(*it, stack.result).c_str());
10390 case ast_func_floor:
10392 double r = _left->eval_number(c, stack);
10394 return r == r ? floor(r) : r;
10397 case ast_func_ceiling:
10399 double r = _left->eval_number(c, stack);
10401 return r == r ? ceil(r) : r;
10404 case ast_func_round:
10405 return round_nearest_nzero(_left->eval_number(c, stack));
10409 assert(_rettype == _data.variable->type());
10411 if (_rettype == xpath_type_number)
10412 return _data.variable->get_number();
10420 case xpath_type_boolean:
10421 return eval_boolean(c, stack) ? 1 : 0;
10423 case xpath_type_string:
10425 xpath_allocator_capture cr(stack.result);
10427 return convert_string_to_number(eval_string(c, stack).c_str());
10430 case xpath_type_node_set:
10432 xpath_allocator_capture cr(stack.result);
10434 return convert_string_to_number(eval_string(c, stack).c_str());
10438 assert(false && "Wrong expression for return type number"); // unreachable
10446 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10448 assert(_type == ast_func_concat);
10450 xpath_allocator_capture ct(stack.temp);
10452 // count the string number
10454 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10456 // allocate a buffer for temporary string objects
10457 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10458 if (!buffer) return xpath_string();
10460 // evaluate all strings to temporary stack
10461 xpath_stack swapped_stack = {stack.temp, stack.result};
10463 buffer[0] = _left->eval_string(c, swapped_stack);
10466 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10467 assert(pos == count);
10469 // get total length
10471 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10473 // create final string
10474 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10475 if (!result) return xpath_string();
10477 char_t* ri = result;
10479 for (size_t j = 0; j < count; ++j)
10480 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10485 return xpath_string::from_heap_preallocated(result, ri);
10488 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10492 case ast_string_constant:
10493 return xpath_string::from_const(_data.string);
10495 case ast_func_local_name_0:
10497 xpath_node na = c.n;
10499 return xpath_string::from_const(local_name(na));
10502 case ast_func_local_name_1:
10504 xpath_allocator_capture cr(stack.result);
10506 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10507 xpath_node na = ns.first();
10509 return xpath_string::from_const(local_name(na));
10512 case ast_func_name_0:
10514 xpath_node na = c.n;
10516 return xpath_string::from_const(qualified_name(na));
10519 case ast_func_name_1:
10521 xpath_allocator_capture cr(stack.result);
10523 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10524 xpath_node na = ns.first();
10526 return xpath_string::from_const(qualified_name(na));
10529 case ast_func_namespace_uri_0:
10531 xpath_node na = c.n;
10533 return xpath_string::from_const(namespace_uri(na));
10536 case ast_func_namespace_uri_1:
10538 xpath_allocator_capture cr(stack.result);
10540 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10541 xpath_node na = ns.first();
10543 return xpath_string::from_const(namespace_uri(na));
10546 case ast_func_string_0:
10547 return string_value(c.n, stack.result);
10549 case ast_func_string_1:
10550 return _left->eval_string(c, stack);
10552 case ast_func_concat:
10553 return eval_string_concat(c, stack);
10555 case ast_func_substring_before:
10557 xpath_allocator_capture cr(stack.temp);
10559 xpath_stack swapped_stack = {stack.temp, stack.result};
10561 xpath_string s = _left->eval_string(c, swapped_stack);
10562 xpath_string p = _right->eval_string(c, swapped_stack);
10564 const char_t* pos = find_substring(s.c_str(), p.c_str());
10566 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10569 case ast_func_substring_after:
10571 xpath_allocator_capture cr(stack.temp);
10573 xpath_stack swapped_stack = {stack.temp, stack.result};
10575 xpath_string s = _left->eval_string(c, swapped_stack);
10576 xpath_string p = _right->eval_string(c, swapped_stack);
10578 const char_t* pos = find_substring(s.c_str(), p.c_str());
10579 if (!pos) return xpath_string();
10581 const char_t* rbegin = pos + p.length();
10582 const char_t* rend = s.c_str() + s.length();
10584 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10587 case ast_func_substring_2:
10589 xpath_allocator_capture cr(stack.temp);
10591 xpath_stack swapped_stack = {stack.temp, stack.result};
10593 xpath_string s = _left->eval_string(c, swapped_stack);
10594 size_t s_length = s.length();
10596 double first = round_nearest(_right->eval_number(c, stack));
10598 if (is_nan(first)) return xpath_string(); // NaN
10599 else if (first >= s_length + 1) return xpath_string();
10601 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10602 assert(1 <= pos && pos <= s_length + 1);
10604 const char_t* rbegin = s.c_str() + (pos - 1);
10605 const char_t* rend = s.c_str() + s.length();
10607 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10610 case ast_func_substring_3:
10612 xpath_allocator_capture cr(stack.temp);
10614 xpath_stack swapped_stack = {stack.temp, stack.result};
10616 xpath_string s = _left->eval_string(c, swapped_stack);
10617 size_t s_length = s.length();
10619 double first = round_nearest(_right->eval_number(c, stack));
10620 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10622 if (is_nan(first) || is_nan(last)) return xpath_string();
10623 else if (first >= s_length + 1) return xpath_string();
10624 else if (first >= last) return xpath_string();
10625 else if (last < 1) return xpath_string();
10627 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10628 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10630 assert(1 <= pos && pos <= end && end <= s_length + 1);
10631 const char_t* rbegin = s.c_str() + (pos - 1);
10632 const char_t* rend = s.c_str() + (end - 1);
10634 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10637 case ast_func_normalize_space_0:
10639 xpath_string s = string_value(c.n, stack.result);
10641 char_t* begin = s.data(stack.result);
10642 if (!begin) return xpath_string();
10644 char_t* end = normalize_space(begin);
10646 return xpath_string::from_heap_preallocated(begin, end);
10649 case ast_func_normalize_space_1:
10651 xpath_string s = _left->eval_string(c, stack);
10653 char_t* begin = s.data(stack.result);
10654 if (!begin) return xpath_string();
10656 char_t* end = normalize_space(begin);
10658 return xpath_string::from_heap_preallocated(begin, end);
10661 case ast_func_translate:
10663 xpath_allocator_capture cr(stack.temp);
10665 xpath_stack swapped_stack = {stack.temp, stack.result};
10667 xpath_string s = _left->eval_string(c, stack);
10668 xpath_string from = _right->eval_string(c, swapped_stack);
10669 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10671 char_t* begin = s.data(stack.result);
10672 if (!begin) return xpath_string();
10674 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10676 return xpath_string::from_heap_preallocated(begin, end);
10679 case ast_opt_translate_table:
10681 xpath_string s = _left->eval_string(c, stack);
10683 char_t* begin = s.data(stack.result);
10684 if (!begin) return xpath_string();
10686 char_t* end = translate_table(begin, _data.table);
10688 return xpath_string::from_heap_preallocated(begin, end);
10693 assert(_rettype == _data.variable->type());
10695 if (_rettype == xpath_type_string)
10696 return xpath_string::from_const(_data.variable->get_string());
10704 case xpath_type_boolean:
10705 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10707 case xpath_type_number:
10708 return convert_number_to_string(eval_number(c, stack), stack.result);
10710 case xpath_type_node_set:
10712 xpath_allocator_capture cr(stack.temp);
10714 xpath_stack swapped_stack = {stack.temp, stack.result};
10716 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10717 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10721 assert(false && "Wrong expression for return type string"); // unreachable
10722 return xpath_string();
10728 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10734 xpath_allocator_capture cr(stack.temp);
10736 xpath_stack swapped_stack = {stack.temp, stack.result};
10738 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10739 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10741 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10742 rs.set_type(xpath_node_set::type_unsorted);
10744 rs.append(ls.begin(), ls.end(), stack.result);
10745 rs.remove_duplicates();
10752 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10754 // either expression is a number or it contains position() call; sort by document order
10755 if (_test != predicate_posinv) set.sort_do();
10757 bool once = eval_once(set.type(), eval);
10759 apply_predicate(set, 0, stack, once);
10765 return xpath_node_set_raw();
10771 case axis_ancestor:
10772 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10774 case axis_ancestor_or_self:
10775 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10777 case axis_attribute:
10778 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10781 return step_do(c, stack, eval, axis_to_type<axis_child>());
10783 case axis_descendant:
10784 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10786 case axis_descendant_or_self:
10787 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10789 case axis_following:
10790 return step_do(c, stack, eval, axis_to_type<axis_following>());
10792 case axis_following_sibling:
10793 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10795 case axis_namespace:
10796 // namespaced axis is not supported
10797 return xpath_node_set_raw();
10800 return step_do(c, stack, eval, axis_to_type<axis_parent>());
10802 case axis_preceding:
10803 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10805 case axis_preceding_sibling:
10806 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10809 return step_do(c, stack, eval, axis_to_type<axis_self>());
10812 assert(false && "Unknown axis"); // unreachable
10813 return xpath_node_set_raw();
10817 case ast_step_root:
10819 assert(!_right); // root step can't have any predicates
10821 xpath_node_set_raw ns;
10823 ns.set_type(xpath_node_set::type_sorted);
10825 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10826 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10833 assert(_rettype == _data.variable->type());
10835 if (_rettype == xpath_type_node_set)
10837 const xpath_node_set& s = _data.variable->get_node_set();
10839 xpath_node_set_raw ns;
10841 ns.set_type(s.type());
10842 ns.append(s.begin(), s.end(), stack.result);
10850 assert(false && "Wrong expression for return type node set"); // unreachable
10851 return xpath_node_set_raw();
10855 void optimize(xpath_allocator* alloc)
10858 _left->optimize(alloc);
10861 _right->optimize(alloc);
10864 _next->optimize(alloc);
10866 optimize_self(alloc);
10869 void optimize_self(xpath_allocator* alloc)
10871 // Rewrite [position()=expr] with [expr]
10872 // Note that this step has to go before classification to recognize [position()=1]
10873 if ((_type == ast_filter || _type == ast_predicate) &&
10874 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10876 _right = _right->_right;
10879 // Classify filter/predicate ops to perform various optimizations during evaluation
10880 if (_type == ast_filter || _type == ast_predicate)
10882 assert(_test == predicate_default);
10884 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10885 _test = predicate_constant_one;
10886 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10887 _test = predicate_constant;
10888 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10889 _test = predicate_posinv;
10892 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
10893 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10894 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10895 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10896 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10897 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10900 if (_axis == axis_child || _axis == axis_descendant)
10901 _axis = axis_descendant;
10903 _axis = axis_descendant_or_self;
10905 _left = _left->_left;
10908 // Use optimized lookup table implementation for translate() with constant arguments
10909 if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10911 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10915 _type = ast_opt_translate_table;
10916 _data.table = table;
10920 // Use optimized path for @attr = 'value' or @attr = $value
10921 if (_type == ast_op_equal &&
10922 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10923 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10925 _type = ast_opt_compare_attribute;
10929 bool is_posinv_expr() const
10933 case ast_func_position:
10934 case ast_func_last:
10937 case ast_string_constant:
10938 case ast_number_constant:
10943 case ast_step_root:
10946 case ast_predicate:
10951 if (_left && !_left->is_posinv_expr()) return false;
10953 for (xpath_ast_node* n = _right; n; n = n->_next)
10954 if (!n->is_posinv_expr()) return false;
10960 bool is_posinv_step() const
10962 assert(_type == ast_step);
10964 for (xpath_ast_node* n = _right; n; n = n->_next)
10966 assert(n->_type == ast_predicate);
10968 if (n->_test != predicate_posinv)
10975 xpath_value_type rettype() const
10977 return static_cast<xpath_value_type>(_rettype);
10981 struct xpath_parser
10983 xpath_allocator* _alloc;
10984 xpath_lexer _lexer;
10986 const char_t* _query;
10987 xpath_variable_set* _variables;
10989 xpath_parse_result* _result;
10991 char_t _scratch[32];
10993 xpath_ast_node* error(const char* message)
10995 _result->error = message;
10996 _result->offset = _lexer.current_pos() - _query;
11001 xpath_ast_node* error_oom()
11003 assert(_alloc->_error);
11004 *_alloc->_error = true;
11011 return _alloc->allocate(sizeof(xpath_ast_node));
11014 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11016 void* memory = alloc_node();
11017 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11020 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11022 void* memory = alloc_node();
11023 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11026 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11028 void* memory = alloc_node();
11029 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11032 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11034 void* memory = alloc_node();
11035 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11038 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11040 void* memory = alloc_node();
11041 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11044 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11046 void* memory = alloc_node();
11047 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11050 const char_t* alloc_string(const xpath_lexer_string& value)
11053 return PUGIXML_TEXT("");
11055 size_t length = static_cast<size_t>(value.end - value.begin);
11057 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11060 memcpy(c, value.begin, length * sizeof(char_t));
11066 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11068 switch (name.begin[0])
11071 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11072 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11077 if (name == PUGIXML_TEXT("count") && argc == 1)
11079 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11080 return alloc_node(ast_func_count, xpath_type_number, args[0]);
11082 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11083 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11084 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11085 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11086 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11087 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11092 if (name == PUGIXML_TEXT("false") && argc == 0)
11093 return alloc_node(ast_func_false, xpath_type_boolean);
11094 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11095 return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11100 if (name == PUGIXML_TEXT("id") && argc == 1)
11101 return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11106 if (name == PUGIXML_TEXT("last") && argc == 0)
11107 return alloc_node(ast_func_last, xpath_type_number);
11108 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11109 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11110 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11112 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11113 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11119 if (name == PUGIXML_TEXT("name") && argc <= 1)
11121 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11122 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11124 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11126 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11127 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11129 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11130 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11131 else if (name == PUGIXML_TEXT("not") && argc == 1)
11132 return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11133 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11134 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11139 if (name == PUGIXML_TEXT("position") && argc == 0)
11140 return alloc_node(ast_func_position, xpath_type_number);
11145 if (name == PUGIXML_TEXT("round") && argc == 1)
11146 return alloc_node(ast_func_round, xpath_type_number, args[0]);
11151 if (name == PUGIXML_TEXT("string") && argc <= 1)
11152 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11153 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11154 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11155 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11156 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11157 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11158 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11159 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11160 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11161 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11162 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11163 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11165 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11166 return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11172 if (name == PUGIXML_TEXT("translate") && argc == 3)
11173 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11174 else if (name == PUGIXML_TEXT("true") && argc == 0)
11175 return alloc_node(ast_func_true, xpath_type_boolean);
11183 return error("Unrecognized function or wrong parameter count");
11186 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11190 switch (name.begin[0])
11193 if (name == PUGIXML_TEXT("ancestor"))
11194 return axis_ancestor;
11195 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11196 return axis_ancestor_or_self;
11197 else if (name == PUGIXML_TEXT("attribute"))
11198 return axis_attribute;
11203 if (name == PUGIXML_TEXT("child"))
11209 if (name == PUGIXML_TEXT("descendant"))
11210 return axis_descendant;
11211 else if (name == PUGIXML_TEXT("descendant-or-self"))
11212 return axis_descendant_or_self;
11217 if (name == PUGIXML_TEXT("following"))
11218 return axis_following;
11219 else if (name == PUGIXML_TEXT("following-sibling"))
11220 return axis_following_sibling;
11225 if (name == PUGIXML_TEXT("namespace"))
11226 return axis_namespace;
11231 if (name == PUGIXML_TEXT("parent"))
11232 return axis_parent;
11233 else if (name == PUGIXML_TEXT("preceding"))
11234 return axis_preceding;
11235 else if (name == PUGIXML_TEXT("preceding-sibling"))
11236 return axis_preceding_sibling;
11241 if (name == PUGIXML_TEXT("self"))
11254 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11256 switch (name.begin[0])
11259 if (name == PUGIXML_TEXT("comment"))
11260 return nodetest_type_comment;
11265 if (name == PUGIXML_TEXT("node"))
11266 return nodetest_type_node;
11271 if (name == PUGIXML_TEXT("processing-instruction"))
11272 return nodetest_type_pi;
11277 if (name == PUGIXML_TEXT("text"))
11278 return nodetest_type_text;
11286 return nodetest_none;
11289 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11290 xpath_ast_node* parse_primary_expression()
11292 switch (_lexer.current())
11296 xpath_lexer_string name = _lexer.contents();
11299 return error("Unknown variable: variable set is not provided");
11301 xpath_variable* var = 0;
11302 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11303 return error_oom();
11306 return error("Unknown variable: variable set does not contain the given name");
11310 return alloc_node(ast_variable, var->type(), var);
11313 case lex_open_brace:
11317 xpath_ast_node* n = parse_expression();
11320 if (_lexer.current() != lex_close_brace)
11321 return error("Expected ')' to match an opening '('");
11328 case lex_quoted_string:
11330 const char_t* value = alloc_string(_lexer.contents());
11331 if (!value) return 0;
11335 return alloc_node(ast_string_constant, xpath_type_string, value);
11342 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11343 return error_oom();
11347 return alloc_node(ast_number_constant, xpath_type_number, value);
11352 xpath_ast_node* args[2] = {0};
11355 xpath_lexer_string function = _lexer.contents();
11358 xpath_ast_node* last_arg = 0;
11360 if (_lexer.current() != lex_open_brace)
11361 return error("Unrecognized function call");
11364 while (_lexer.current() != lex_close_brace)
11368 if (_lexer.current() != lex_comma)
11369 return error("No comma between function arguments");
11373 xpath_ast_node* n = parse_expression();
11376 if (argc < 2) args[argc] = n;
11377 else last_arg->set_next(n);
11385 return parse_function(function, argc, args);
11389 return error("Unrecognizable primary expression");
11393 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11394 // Predicate ::= '[' PredicateExpr ']'
11395 // PredicateExpr ::= Expr
11396 xpath_ast_node* parse_filter_expression()
11398 xpath_ast_node* n = parse_primary_expression();
11401 while (_lexer.current() == lex_open_square_brace)
11405 if (n->rettype() != xpath_type_node_set)
11406 return error("Predicate has to be applied to node set");
11408 xpath_ast_node* expr = parse_expression();
11409 if (!expr) return 0;
11411 n = alloc_node(ast_filter, n, expr, predicate_default);
11414 if (_lexer.current() != lex_close_square_brace)
11415 return error("Expected ']' to match an opening '['");
11423 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11424 // AxisSpecifier ::= AxisName '::' | '@'?
11425 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11426 // NameTest ::= '*' | NCName ':' '*' | QName
11427 // AbbreviatedStep ::= '.' | '..'
11428 xpath_ast_node* parse_step(xpath_ast_node* set)
11430 if (set && set->rettype() != xpath_type_node_set)
11431 return error("Step has to be applied to node set");
11433 bool axis_specified = false;
11434 axis_t axis = axis_child; // implied child axis
11436 if (_lexer.current() == lex_axis_attribute)
11438 axis = axis_attribute;
11439 axis_specified = true;
11443 else if (_lexer.current() == lex_dot)
11447 if (_lexer.current() == lex_open_square_brace)
11448 return error("Predicates are not allowed after an abbreviated step");
11450 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11452 else if (_lexer.current() == lex_double_dot)
11456 if (_lexer.current() == lex_open_square_brace)
11457 return error("Predicates are not allowed after an abbreviated step");
11459 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11462 nodetest_t nt_type = nodetest_none;
11463 xpath_lexer_string nt_name;
11465 if (_lexer.current() == lex_string)
11468 nt_name = _lexer.contents();
11471 // was it an axis name?
11472 if (_lexer.current() == lex_double_colon)
11475 if (axis_specified)
11476 return error("Two axis specifiers in one step");
11478 axis = parse_axis_name(nt_name, axis_specified);
11480 if (!axis_specified)
11481 return error("Unknown axis");
11483 // read actual node test
11486 if (_lexer.current() == lex_multiply)
11488 nt_type = nodetest_all;
11489 nt_name = xpath_lexer_string();
11492 else if (_lexer.current() == lex_string)
11494 nt_name = _lexer.contents();
11499 return error("Unrecognized node test");
11503 if (nt_type == nodetest_none)
11505 // node type test or processing-instruction
11506 if (_lexer.current() == lex_open_brace)
11510 if (_lexer.current() == lex_close_brace)
11514 nt_type = parse_node_test_type(nt_name);
11516 if (nt_type == nodetest_none)
11517 return error("Unrecognized node type");
11519 nt_name = xpath_lexer_string();
11521 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11523 if (_lexer.current() != lex_quoted_string)
11524 return error("Only literals are allowed as arguments to processing-instruction()");
11526 nt_type = nodetest_pi;
11527 nt_name = _lexer.contents();
11530 if (_lexer.current() != lex_close_brace)
11531 return error("Unmatched brace near processing-instruction()");
11536 return error("Unmatched brace near node type test");
11539 // QName or NCName:*
11542 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11544 nt_name.end--; // erase *
11546 nt_type = nodetest_all_in_namespace;
11550 nt_type = nodetest_name;
11555 else if (_lexer.current() == lex_multiply)
11557 nt_type = nodetest_all;
11562 return error("Unrecognized node test");
11565 const char_t* nt_name_copy = alloc_string(nt_name);
11566 if (!nt_name_copy) return 0;
11568 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11571 xpath_ast_node* last = 0;
11573 while (_lexer.current() == lex_open_square_brace)
11577 xpath_ast_node* expr = parse_expression();
11578 if (!expr) return 0;
11580 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11581 if (!pred) return 0;
11583 if (_lexer.current() != lex_close_square_brace)
11584 return error("Expected ']' to match an opening '['");
11587 if (last) last->set_next(pred);
11588 else n->set_right(pred);
11596 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11597 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11599 xpath_ast_node* n = parse_step(set);
11602 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11604 lexeme_t l = _lexer.current();
11607 if (l == lex_double_slash)
11609 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11620 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11621 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11622 xpath_ast_node* parse_location_path()
11624 if (_lexer.current() == lex_slash)
11628 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11631 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11632 lexeme_t l = _lexer.current();
11634 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11635 return parse_relative_location_path(n);
11639 else if (_lexer.current() == lex_double_slash)
11643 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11646 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11649 return parse_relative_location_path(n);
11652 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11653 return parse_relative_location_path(0);
11656 // PathExpr ::= LocationPath
11658 // | FilterExpr '/' RelativeLocationPath
11659 // | FilterExpr '//' RelativeLocationPath
11660 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11661 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11662 xpath_ast_node* parse_path_or_unary_expression()
11665 // PathExpr begins with either LocationPath or FilterExpr.
11666 // FilterExpr begins with PrimaryExpr
11667 // PrimaryExpr begins with '$' in case of it being a variable reference,
11668 // '(' in case of it being an expression, string literal, number constant or
11670 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11671 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11672 _lexer.current() == lex_string)
11674 if (_lexer.current() == lex_string)
11676 // This is either a function call, or not - if not, we shall proceed with location path
11677 const char_t* state = _lexer.state();
11679 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11682 return parse_location_path();
11684 // This looks like a function call; however this still can be a node-test. Check it.
11685 if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11686 return parse_location_path();
11689 xpath_ast_node* n = parse_filter_expression();
11692 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11694 lexeme_t l = _lexer.current();
11697 if (l == lex_double_slash)
11699 if (n->rettype() != xpath_type_node_set)
11700 return error("Step has to be applied to node set");
11702 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11706 // select from location path
11707 return parse_relative_location_path(n);
11712 else if (_lexer.current() == lex_minus)
11716 // precedence 7+ - only parses union expressions
11717 xpath_ast_node* n = parse_expression(7);
11720 return alloc_node(ast_op_negate, xpath_type_number, n);
11724 return parse_location_path();
11730 ast_type_t asttype;
11731 xpath_value_type rettype;
11734 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11738 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11742 static binary_op_t parse(xpath_lexer& lexer)
11744 switch (lexer.current())
11747 if (lexer.contents() == PUGIXML_TEXT("or"))
11748 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11749 else if (lexer.contents() == PUGIXML_TEXT("and"))
11750 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11751 else if (lexer.contents() == PUGIXML_TEXT("div"))
11752 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11753 else if (lexer.contents() == PUGIXML_TEXT("mod"))
11754 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11756 return binary_op_t();
11759 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11761 case lex_not_equal:
11762 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11765 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11768 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11770 case lex_less_or_equal:
11771 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11773 case lex_greater_or_equal:
11774 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11777 return binary_op_t(ast_op_add, xpath_type_number, 5);
11780 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11783 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11786 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11789 return binary_op_t();
11794 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11796 binary_op_t op = binary_op_t::parse(_lexer);
11798 while (op.asttype != ast_unknown && op.precedence >= limit)
11802 xpath_ast_node* rhs = parse_path_or_unary_expression();
11803 if (!rhs) return 0;
11805 binary_op_t nextop = binary_op_t::parse(_lexer);
11807 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11809 rhs = parse_expression_rec(rhs, nextop.precedence);
11810 if (!rhs) return 0;
11812 nextop = binary_op_t::parse(_lexer);
11815 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11816 return error("Union operator has to be applied to node sets");
11818 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
11819 if (!lhs) return 0;
11821 op = binary_op_t::parse(_lexer);
11828 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11829 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11830 // EqualityExpr ::= RelationalExpr
11831 // | EqualityExpr '=' RelationalExpr
11832 // | EqualityExpr '!=' RelationalExpr
11833 // RelationalExpr ::= AdditiveExpr
11834 // | RelationalExpr '<' AdditiveExpr
11835 // | RelationalExpr '>' AdditiveExpr
11836 // | RelationalExpr '<=' AdditiveExpr
11837 // | RelationalExpr '>=' AdditiveExpr
11838 // AdditiveExpr ::= MultiplicativeExpr
11839 // | AdditiveExpr '+' MultiplicativeExpr
11840 // | AdditiveExpr '-' MultiplicativeExpr
11841 // MultiplicativeExpr ::= UnaryExpr
11842 // | MultiplicativeExpr '*' UnaryExpr
11843 // | MultiplicativeExpr 'div' UnaryExpr
11844 // | MultiplicativeExpr 'mod' UnaryExpr
11845 xpath_ast_node* parse_expression(int limit = 0)
11847 xpath_ast_node* n = parse_path_or_unary_expression();
11850 return parse_expression_rec(n, limit);
11853 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11857 xpath_ast_node* parse()
11859 xpath_ast_node* n = parse_expression();
11862 // check if there are unparsed tokens left
11863 if (_lexer.current() != lex_eof)
11864 return error("Incorrect query");
11869 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11871 xpath_parser parser(query, variables, alloc, result);
11873 return parser.parse();
11877 struct xpath_query_impl
11879 static xpath_query_impl* create()
11881 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11882 if (!memory) return 0;
11884 return new (memory) xpath_query_impl();
11887 static void destroy(xpath_query_impl* impl)
11889 // free all allocated pages
11890 impl->alloc.release();
11892 // free allocator memory (with the first page)
11893 xml_memory::deallocate(impl);
11896 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
11899 block.capacity = sizeof(block.data);
11902 xpath_ast_node* root;
11903 xpath_allocator alloc;
11904 xpath_memory_block block;
11908 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11910 if (!impl) return 0;
11912 if (impl->root->rettype() != xpath_type_node_set)
11914 #ifdef PUGIXML_NO_EXCEPTIONS
11917 xpath_parse_result res;
11918 res.error = "Expression does not evaluate to node set";
11920 throw xpath_exception(res);
11930 #ifndef PUGIXML_NO_EXCEPTIONS
11931 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11933 assert(_result.error);
11936 PUGI__FN const char* xpath_exception::what() const throw()
11938 return _result.error;
11941 PUGI__FN const xpath_parse_result& xpath_exception::result() const
11947 PUGI__FN xpath_node::xpath_node()
11951 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11955 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11959 PUGI__FN xml_node xpath_node::node() const
11961 return _attribute ? xml_node() : _node;
11964 PUGI__FN xml_attribute xpath_node::attribute() const
11969 PUGI__FN xml_node xpath_node::parent() const
11971 return _attribute ? _node : _node.parent();
11974 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11978 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11980 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11983 PUGI__FN bool xpath_node::operator!() const
11985 return !(_node || _attribute);
11988 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11990 return _node == n._node && _attribute == n._attribute;
11993 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11995 return _node != n._node || _attribute != n._attribute;
11998 #ifdef __BORLANDC__
11999 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12001 return (bool)lhs && rhs;
12004 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12006 return (bool)lhs || rhs;
12010 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12012 assert(begin_ <= end_);
12014 size_t size_ = static_cast<size_t>(end_ - begin_);
12018 // deallocate old buffer
12019 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
12021 // use internal buffer
12022 if (begin_ != end_) _storage = *begin_;
12024 _begin = &_storage;
12025 _end = &_storage + size_;
12031 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12035 #ifdef PUGIXML_NO_EXCEPTIONS
12038 throw std::bad_alloc();
12042 memcpy(storage, begin_, size_ * sizeof(xpath_node));
12044 // deallocate old buffer
12045 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
12049 _end = storage + size_;
12054 #ifdef PUGIXML_HAS_MOVE
12055 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12058 _storage = rhs._storage;
12059 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
12060 _end = _begin + (rhs._end - rhs._begin);
12062 rhs._type = type_unsorted;
12063 rhs._begin = &rhs._storage;
12064 rhs._end = rhs._begin;
12068 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12072 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12074 _assign(begin_, end_, type_);
12077 PUGI__FN xpath_node_set::~xpath_node_set()
12079 if (_begin != &_storage)
12080 impl::xml_memory::deallocate(_begin);
12083 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12085 _assign(ns._begin, ns._end, ns._type);
12088 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12090 if (this == &ns) return *this;
12092 _assign(ns._begin, ns._end, ns._type);
12097 #ifdef PUGIXML_HAS_MOVE
12098 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(&_storage), _end(&_storage)
12103 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12105 if (this == &rhs) return *this;
12107 if (_begin != &_storage)
12108 impl::xml_memory::deallocate(_begin);
12116 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12121 PUGI__FN size_t xpath_node_set::size() const
12123 return _end - _begin;
12126 PUGI__FN bool xpath_node_set::empty() const
12128 return _begin == _end;
12131 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12133 assert(index < size());
12134 return _begin[index];
12137 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12142 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12147 PUGI__FN void xpath_node_set::sort(bool reverse)
12149 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12152 PUGI__FN xpath_node xpath_node_set::first() const
12154 return impl::xpath_first(_begin, _end, _type);
12157 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12161 PUGI__FN xpath_parse_result::operator bool() const
12166 PUGI__FN const char* xpath_parse_result::description() const
12168 return error ? error : "No error";
12171 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12175 PUGI__FN const char_t* xpath_variable::name() const
12179 case xpath_type_node_set:
12180 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12182 case xpath_type_number:
12183 return static_cast<const impl::xpath_variable_number*>(this)->name;
12185 case xpath_type_string:
12186 return static_cast<const impl::xpath_variable_string*>(this)->name;
12188 case xpath_type_boolean:
12189 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12192 assert(false && "Invalid variable type"); // unreachable
12197 PUGI__FN xpath_value_type xpath_variable::type() const
12202 PUGI__FN bool xpath_variable::get_boolean() const
12204 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12207 PUGI__FN double xpath_variable::get_number() const
12209 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12212 PUGI__FN const char_t* xpath_variable::get_string() const
12214 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12215 return value ? value : PUGIXML_TEXT("");
12218 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12220 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12223 PUGI__FN bool xpath_variable::set(bool value)
12225 if (_type != xpath_type_boolean) return false;
12227 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12231 PUGI__FN bool xpath_variable::set(double value)
12233 if (_type != xpath_type_number) return false;
12235 static_cast<impl::xpath_variable_number*>(this)->value = value;
12239 PUGI__FN bool xpath_variable::set(const char_t* value)
12241 if (_type != xpath_type_string) return false;
12243 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12245 // duplicate string
12246 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12248 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12249 if (!copy) return false;
12251 memcpy(copy, value, size);
12253 // replace old string
12254 if (var->value) impl::xml_memory::deallocate(var->value);
12260 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12262 if (_type != xpath_type_node_set) return false;
12264 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12268 PUGI__FN xpath_variable_set::xpath_variable_set()
12270 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12274 PUGI__FN xpath_variable_set::~xpath_variable_set()
12276 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12277 _destroy(_data[i]);
12280 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12282 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12288 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12290 if (this == &rhs) return *this;
12297 #ifdef PUGIXML_HAS_MOVE
12298 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12300 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12302 _data[i] = rhs._data[i];
12307 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12309 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12311 _destroy(_data[i]);
12313 _data[i] = rhs._data[i];
12321 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12323 xpath_variable_set temp;
12325 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12326 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12332 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12334 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12336 xpath_variable* chain = _data[i];
12338 _data[i] = rhs._data[i];
12339 rhs._data[i] = chain;
12343 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12345 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12346 size_t hash = impl::hash_string(name) % hash_size;
12348 // look for existing variable
12349 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12350 if (impl::strequal(var->name(), name))
12356 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12358 xpath_variable* last = 0;
12362 // allocate storage for new variable
12363 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12364 if (!nvar) return false;
12366 // link the variable to the result immediately to handle failures gracefully
12368 last->_next = nvar;
12370 *out_result = nvar;
12374 // copy the value; this can fail due to out-of-memory conditions
12375 if (!impl::copy_xpath_variable(nvar, var)) return false;
12383 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12387 xpath_variable* next = var->_next;
12389 impl::delete_xpath_variable(var->_type, var);
12395 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12397 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12398 size_t hash = impl::hash_string(name) % hash_size;
12400 // look for existing variable
12401 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12402 if (impl::strequal(var->name(), name))
12403 return var->type() == type ? var : 0;
12405 // add new variable
12406 xpath_variable* result = impl::new_xpath_variable(type, name);
12410 result->_next = _data[hash];
12412 _data[hash] = result;
12418 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12420 xpath_variable* var = add(name, xpath_type_boolean);
12421 return var ? var->set(value) : false;
12424 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12426 xpath_variable* var = add(name, xpath_type_number);
12427 return var ? var->set(value) : false;
12430 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12432 xpath_variable* var = add(name, xpath_type_string);
12433 return var ? var->set(value) : false;
12436 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12438 xpath_variable* var = add(name, xpath_type_node_set);
12439 return var ? var->set(value) : false;
12442 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12444 return _find(name);
12447 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12449 return _find(name);
12452 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12454 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12458 #ifdef PUGIXML_NO_EXCEPTIONS
12459 _result.error = "Out of memory";
12461 throw std::bad_alloc();
12466 using impl::auto_deleter; // MSVC7 workaround
12467 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12469 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12473 qimpl->root->optimize(&qimpl->alloc);
12475 _impl = impl.release();
12480 #ifdef PUGIXML_NO_EXCEPTIONS
12481 if (qimpl->oom) _result.error = "Out of memory";
12483 if (qimpl->oom) throw std::bad_alloc();
12484 throw xpath_exception(_result);
12490 PUGI__FN xpath_query::xpath_query(): _impl(0)
12494 PUGI__FN xpath_query::~xpath_query()
12497 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12500 #ifdef PUGIXML_HAS_MOVE
12501 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12504 _result = rhs._result;
12506 rhs._result = xpath_parse_result();
12509 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12511 if (this == &rhs) return *this;
12514 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12517 _result = rhs._result;
12519 rhs._result = xpath_parse_result();
12525 PUGI__FN xpath_value_type xpath_query::return_type() const
12527 if (!_impl) return xpath_type_none;
12529 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12532 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12534 if (!_impl) return false;
12536 impl::xpath_context c(n, 1, 1);
12537 impl::xpath_stack_data sd;
12539 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12543 #ifdef PUGIXML_NO_EXCEPTIONS
12546 throw std::bad_alloc();
12553 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12555 if (!_impl) return impl::gen_nan();
12557 impl::xpath_context c(n, 1, 1);
12558 impl::xpath_stack_data sd;
12560 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12564 #ifdef PUGIXML_NO_EXCEPTIONS
12565 return impl::gen_nan();
12567 throw std::bad_alloc();
12574 #ifndef PUGIXML_NO_STL
12575 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12577 if (!_impl) return string_t();
12579 impl::xpath_context c(n, 1, 1);
12580 impl::xpath_stack_data sd;
12582 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12586 #ifdef PUGIXML_NO_EXCEPTIONS
12589 throw std::bad_alloc();
12593 return string_t(r.c_str(), r.length());
12597 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12599 impl::xpath_context c(n, 1, 1);
12600 impl::xpath_stack_data sd;
12602 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12606 #ifdef PUGIXML_NO_EXCEPTIONS
12607 r = impl::xpath_string();
12609 throw std::bad_alloc();
12613 size_t full_size = r.length() + 1;
12617 size_t size = (full_size < capacity) ? full_size : capacity;
12620 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12621 buffer[size - 1] = 0;
12627 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12629 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12630 if (!root) return xpath_node_set();
12632 impl::xpath_context c(n, 1, 1);
12633 impl::xpath_stack_data sd;
12635 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12639 #ifdef PUGIXML_NO_EXCEPTIONS
12640 return xpath_node_set();
12642 throw std::bad_alloc();
12646 return xpath_node_set(r.begin(), r.end(), r.type());
12649 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12651 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12652 if (!root) return xpath_node();
12654 impl::xpath_context c(n, 1, 1);
12655 impl::xpath_stack_data sd;
12657 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12661 #ifdef PUGIXML_NO_EXCEPTIONS
12662 return xpath_node();
12664 throw std::bad_alloc();
12671 PUGI__FN const xpath_parse_result& xpath_query::result() const
12676 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12680 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12682 return _impl ? unspecified_bool_xpath_query : 0;
12685 PUGI__FN bool xpath_query::operator!() const
12690 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12692 xpath_query q(query, variables);
12693 return q.evaluate_node(*this);
12696 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12698 return query.evaluate_node(*this);
12701 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12703 xpath_query q(query, variables);
12704 return q.evaluate_node_set(*this);
12707 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12709 return query.evaluate_node_set(*this);
12712 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12714 xpath_query q(query, variables);
12715 return q.evaluate_node(*this);
12718 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12720 return query.evaluate_node(*this);
12726 #ifdef __BORLANDC__
12727 # pragma option pop
12730 // Intel C++ does not properly keep warning state for function templates,
12731 // so popping warning state at the end of translation unit leads to warnings in the middle.
12732 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12733 # pragma warning(pop)
12736 #if defined(_MSC_VER) && defined(__c2__)
12737 # pragma clang diagnostic pop
12740 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12741 #undef PUGI__NO_INLINE
12742 #undef PUGI__UNLIKELY
12743 #undef PUGI__STATIC_ASSERT
12744 #undef PUGI__DMC_VOLATILE
12745 #undef PUGI__UNSIGNED_OVERFLOW
12746 #undef PUGI__MSVC_CRT_VERSION
12747 #undef PUGI__SNPRINTF
12748 #undef PUGI__NS_BEGIN
12749 #undef PUGI__NS_END
12751 #undef PUGI__FN_NO_INLINE
12752 #undef PUGI__GETHEADER_IMPL
12753 #undef PUGI__GETPAGE_IMPL
12754 #undef PUGI__GETPAGE
12755 #undef PUGI__NODETYPE
12756 #undef PUGI__IS_CHARTYPE_IMPL
12757 #undef PUGI__IS_CHARTYPE
12758 #undef PUGI__IS_CHARTYPEX
12759 #undef PUGI__ENDSWITH
12760 #undef PUGI__SKIPWS
12761 #undef PUGI__OPTSET
12762 #undef PUGI__PUSHNODE
12763 #undef PUGI__POPNODE
12764 #undef PUGI__SCANFOR
12765 #undef PUGI__SCANWHILE
12766 #undef PUGI__SCANWHILE_UNROLL
12767 #undef PUGI__ENDSEG
12768 #undef PUGI__THROW_ERROR
12769 #undef PUGI__CHECK_ERROR
12774 * Copyright (c) 2006-2018 Arseny Kapoulkine
12776 * Permission is hereby granted, free of charge, to any person
12777 * obtaining a copy of this software and associated documentation
12778 * files (the "Software"), to deal in the Software without
12779 * restriction, including without limitation the rights to use,
12780 * copy, modify, merge, publish, distribute, sublicense, and/or sell
12781 * copies of the Software, and to permit persons to whom the
12782 * Software is furnished to do so, subject to the following
12785 * The above copyright notice and this permission notice shall be
12786 * included in all copies or substantial portions of the Software.
12788 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12789 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12790 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12791 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12792 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12793 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12794 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12795 * OTHER DEALINGS IN THE SOFTWARE.