TinyXml is a compact library that implements XML Parsing and Manipulation.

Dependents:   tinyxml_test

Committer:
wvd_vegt
Date:
Mon Apr 18 18:54:09 2011 +0000
Revision:
0:3c1d63c20cfc
First Version

Who changed what in which revision?

UserRevisionLine numberNew contents of line
wvd_vegt 0:3c1d63c20cfc 1 /*
wvd_vegt 0:3c1d63c20cfc 2 www.sourceforge.net/projects/tinyxml
wvd_vegt 0:3c1d63c20cfc 3 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
wvd_vegt 0:3c1d63c20cfc 4
wvd_vegt 0:3c1d63c20cfc 5 This software is provided 'as-is', without any express or implied
wvd_vegt 0:3c1d63c20cfc 6 warranty. In no event will the authors be held liable for any
wvd_vegt 0:3c1d63c20cfc 7 damages arising from the use of this software.
wvd_vegt 0:3c1d63c20cfc 8
wvd_vegt 0:3c1d63c20cfc 9 Permission is granted to anyone to use this software for any
wvd_vegt 0:3c1d63c20cfc 10 purpose, including commercial applications, and to alter it and
wvd_vegt 0:3c1d63c20cfc 11 redistribute it freely, subject to the following restrictions:
wvd_vegt 0:3c1d63c20cfc 12
wvd_vegt 0:3c1d63c20cfc 13 1. The origin of this software must not be misrepresented; you must
wvd_vegt 0:3c1d63c20cfc 14 not claim that you wrote the original software. If you use this
wvd_vegt 0:3c1d63c20cfc 15 software in a product, an acknowledgment in the product documentation
wvd_vegt 0:3c1d63c20cfc 16 would be appreciated but is not required.
wvd_vegt 0:3c1d63c20cfc 17
wvd_vegt 0:3c1d63c20cfc 18 2. Altered source versions must be plainly marked as such, and
wvd_vegt 0:3c1d63c20cfc 19 must not be misrepresented as being the original software.
wvd_vegt 0:3c1d63c20cfc 20
wvd_vegt 0:3c1d63c20cfc 21 3. This notice may not be removed or altered from any source
wvd_vegt 0:3c1d63c20cfc 22 distribution.
wvd_vegt 0:3c1d63c20cfc 23 */
wvd_vegt 0:3c1d63c20cfc 24
wvd_vegt 0:3c1d63c20cfc 25 #include <ctype.h>
wvd_vegt 0:3c1d63c20cfc 26 #include <stddef.h>
wvd_vegt 0:3c1d63c20cfc 27
wvd_vegt 0:3c1d63c20cfc 28 #include "tinyxml.h"
wvd_vegt 0:3c1d63c20cfc 29
wvd_vegt 0:3c1d63c20cfc 30 //#define DEBUG_PARSER
wvd_vegt 0:3c1d63c20cfc 31 #if defined( DEBUG_PARSER )
wvd_vegt 0:3c1d63c20cfc 32 # if defined( DEBUG ) && defined( _MSC_VER )
wvd_vegt 0:3c1d63c20cfc 33 # include <windows.h>
wvd_vegt 0:3c1d63c20cfc 34 # define TIXML_LOG OutputDebugString
wvd_vegt 0:3c1d63c20cfc 35 # else
wvd_vegt 0:3c1d63c20cfc 36 # define TIXML_LOG printf
wvd_vegt 0:3c1d63c20cfc 37 # endif
wvd_vegt 0:3c1d63c20cfc 38 #endif
wvd_vegt 0:3c1d63c20cfc 39
wvd_vegt 0:3c1d63c20cfc 40 // Note tha "PutString" hardcodes the same list. This
wvd_vegt 0:3c1d63c20cfc 41 // is less flexible than it appears. Changing the entries
wvd_vegt 0:3c1d63c20cfc 42 // or order will break putstring.
wvd_vegt 0:3c1d63c20cfc 43 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
wvd_vegt 0:3c1d63c20cfc 44 {
wvd_vegt 0:3c1d63c20cfc 45 { "&amp;", 5, '&' },
wvd_vegt 0:3c1d63c20cfc 46 { "&lt;", 4, '<' },
wvd_vegt 0:3c1d63c20cfc 47 { "&gt;", 4, '>' },
wvd_vegt 0:3c1d63c20cfc 48 { "&quot;", 6, '\"' },
wvd_vegt 0:3c1d63c20cfc 49 { "&apos;", 6, '\'' }
wvd_vegt 0:3c1d63c20cfc 50 };
wvd_vegt 0:3c1d63c20cfc 51
wvd_vegt 0:3c1d63c20cfc 52 // Bunch of unicode info at:
wvd_vegt 0:3c1d63c20cfc 53 // http://www.unicode.org/faq/utf_bom.html
wvd_vegt 0:3c1d63c20cfc 54 // Including the basic of this table, which determines the #bytes in the
wvd_vegt 0:3c1d63c20cfc 55 // sequence from the lead byte. 1 placed for invalid sequences --
wvd_vegt 0:3c1d63c20cfc 56 // although the result will be junk, pass it through as much as possible.
wvd_vegt 0:3c1d63c20cfc 57 // Beware of the non-characters in UTF-8:
wvd_vegt 0:3c1d63c20cfc 58 // ef bb bf (Microsoft "lead bytes")
wvd_vegt 0:3c1d63c20cfc 59 // ef bf be
wvd_vegt 0:3c1d63c20cfc 60 // ef bf bf
wvd_vegt 0:3c1d63c20cfc 61
wvd_vegt 0:3c1d63c20cfc 62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
wvd_vegt 0:3c1d63c20cfc 63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
wvd_vegt 0:3c1d63c20cfc 64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
wvd_vegt 0:3c1d63c20cfc 65
wvd_vegt 0:3c1d63c20cfc 66 const int TiXmlBase::utf8ByteTable[256] =
wvd_vegt 0:3c1d63c20cfc 67 {
wvd_vegt 0:3c1d63c20cfc 68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
wvd_vegt 0:3c1d63c20cfc 69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
wvd_vegt 0:3c1d63c20cfc 70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
wvd_vegt 0:3c1d63c20cfc 71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
wvd_vegt 0:3c1d63c20cfc 72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
wvd_vegt 0:3c1d63c20cfc 73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
wvd_vegt 0:3c1d63c20cfc 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
wvd_vegt 0:3c1d63c20cfc 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
wvd_vegt 0:3c1d63c20cfc 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
wvd_vegt 0:3c1d63c20cfc 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
wvd_vegt 0:3c1d63c20cfc 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
wvd_vegt 0:3c1d63c20cfc 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
wvd_vegt 0:3c1d63c20cfc 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
wvd_vegt 0:3c1d63c20cfc 81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
wvd_vegt 0:3c1d63c20cfc 82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
wvd_vegt 0:3c1d63c20cfc 83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
wvd_vegt 0:3c1d63c20cfc 84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
wvd_vegt 0:3c1d63c20cfc 85 };
wvd_vegt 0:3c1d63c20cfc 86
wvd_vegt 0:3c1d63c20cfc 87
wvd_vegt 0:3c1d63c20cfc 88 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
wvd_vegt 0:3c1d63c20cfc 89 {
wvd_vegt 0:3c1d63c20cfc 90 const unsigned long BYTE_MASK = 0xBF;
wvd_vegt 0:3c1d63c20cfc 91 const unsigned long BYTE_MARK = 0x80;
wvd_vegt 0:3c1d63c20cfc 92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
wvd_vegt 0:3c1d63c20cfc 93
wvd_vegt 0:3c1d63c20cfc 94 if (input < 0x80)
wvd_vegt 0:3c1d63c20cfc 95 *length = 1;
wvd_vegt 0:3c1d63c20cfc 96 else if ( input < 0x800 )
wvd_vegt 0:3c1d63c20cfc 97 *length = 2;
wvd_vegt 0:3c1d63c20cfc 98 else if ( input < 0x10000 )
wvd_vegt 0:3c1d63c20cfc 99 *length = 3;
wvd_vegt 0:3c1d63c20cfc 100 else if ( input < 0x200000 )
wvd_vegt 0:3c1d63c20cfc 101 *length = 4;
wvd_vegt 0:3c1d63c20cfc 102 else
wvd_vegt 0:3c1d63c20cfc 103 { *length = 0; return; } // This code won't covert this correctly anyway.
wvd_vegt 0:3c1d63c20cfc 104
wvd_vegt 0:3c1d63c20cfc 105 output += *length;
wvd_vegt 0:3c1d63c20cfc 106
wvd_vegt 0:3c1d63c20cfc 107 // Scary scary fall throughs.
wvd_vegt 0:3c1d63c20cfc 108 switch (*length)
wvd_vegt 0:3c1d63c20cfc 109 {
wvd_vegt 0:3c1d63c20cfc 110 case 4:
wvd_vegt 0:3c1d63c20cfc 111 --output;
wvd_vegt 0:3c1d63c20cfc 112 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
wvd_vegt 0:3c1d63c20cfc 113 input >>= 6;
wvd_vegt 0:3c1d63c20cfc 114 case 3:
wvd_vegt 0:3c1d63c20cfc 115 --output;
wvd_vegt 0:3c1d63c20cfc 116 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
wvd_vegt 0:3c1d63c20cfc 117 input >>= 6;
wvd_vegt 0:3c1d63c20cfc 118 case 2:
wvd_vegt 0:3c1d63c20cfc 119 --output;
wvd_vegt 0:3c1d63c20cfc 120 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
wvd_vegt 0:3c1d63c20cfc 121 input >>= 6;
wvd_vegt 0:3c1d63c20cfc 122 case 1:
wvd_vegt 0:3c1d63c20cfc 123 --output;
wvd_vegt 0:3c1d63c20cfc 124 *output = (char)(input | FIRST_BYTE_MARK[*length]);
wvd_vegt 0:3c1d63c20cfc 125 }
wvd_vegt 0:3c1d63c20cfc 126 }
wvd_vegt 0:3c1d63c20cfc 127
wvd_vegt 0:3c1d63c20cfc 128
wvd_vegt 0:3c1d63c20cfc 129 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
wvd_vegt 0:3c1d63c20cfc 130 {
wvd_vegt 0:3c1d63c20cfc 131 // This will only work for low-ascii, everything else is assumed to be a valid
wvd_vegt 0:3c1d63c20cfc 132 // letter. I'm not sure this is the best approach, but it is quite tricky trying
wvd_vegt 0:3c1d63c20cfc 133 // to figure out alhabetical vs. not across encoding. So take a very
wvd_vegt 0:3c1d63c20cfc 134 // conservative approach.
wvd_vegt 0:3c1d63c20cfc 135
wvd_vegt 0:3c1d63c20cfc 136 // if ( encoding == TIXML_ENCODING_UTF8 )
wvd_vegt 0:3c1d63c20cfc 137 // {
wvd_vegt 0:3c1d63c20cfc 138 if ( anyByte < 127 )
wvd_vegt 0:3c1d63c20cfc 139 return isalpha( anyByte );
wvd_vegt 0:3c1d63c20cfc 140 else
wvd_vegt 0:3c1d63c20cfc 141 return 1; // What else to do? The unicode set is huge...get the english ones right.
wvd_vegt 0:3c1d63c20cfc 142 // }
wvd_vegt 0:3c1d63c20cfc 143 // else
wvd_vegt 0:3c1d63c20cfc 144 // {
wvd_vegt 0:3c1d63c20cfc 145 // return isalpha( anyByte );
wvd_vegt 0:3c1d63c20cfc 146 // }
wvd_vegt 0:3c1d63c20cfc 147 }
wvd_vegt 0:3c1d63c20cfc 148
wvd_vegt 0:3c1d63c20cfc 149
wvd_vegt 0:3c1d63c20cfc 150 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
wvd_vegt 0:3c1d63c20cfc 151 {
wvd_vegt 0:3c1d63c20cfc 152 // This will only work for low-ascii, everything else is assumed to be a valid
wvd_vegt 0:3c1d63c20cfc 153 // letter. I'm not sure this is the best approach, but it is quite tricky trying
wvd_vegt 0:3c1d63c20cfc 154 // to figure out alhabetical vs. not across encoding. So take a very
wvd_vegt 0:3c1d63c20cfc 155 // conservative approach.
wvd_vegt 0:3c1d63c20cfc 156
wvd_vegt 0:3c1d63c20cfc 157 // if ( encoding == TIXML_ENCODING_UTF8 )
wvd_vegt 0:3c1d63c20cfc 158 // {
wvd_vegt 0:3c1d63c20cfc 159 if ( anyByte < 127 )
wvd_vegt 0:3c1d63c20cfc 160 return isalnum( anyByte );
wvd_vegt 0:3c1d63c20cfc 161 else
wvd_vegt 0:3c1d63c20cfc 162 return 1; // What else to do? The unicode set is huge...get the english ones right.
wvd_vegt 0:3c1d63c20cfc 163 // }
wvd_vegt 0:3c1d63c20cfc 164 // else
wvd_vegt 0:3c1d63c20cfc 165 // {
wvd_vegt 0:3c1d63c20cfc 166 // return isalnum( anyByte );
wvd_vegt 0:3c1d63c20cfc 167 // }
wvd_vegt 0:3c1d63c20cfc 168 }
wvd_vegt 0:3c1d63c20cfc 169
wvd_vegt 0:3c1d63c20cfc 170
wvd_vegt 0:3c1d63c20cfc 171 class TiXmlParsingData
wvd_vegt 0:3c1d63c20cfc 172 {
wvd_vegt 0:3c1d63c20cfc 173 friend class TiXmlDocument;
wvd_vegt 0:3c1d63c20cfc 174 public:
wvd_vegt 0:3c1d63c20cfc 175 void Stamp( const char* now, TiXmlEncoding encoding );
wvd_vegt 0:3c1d63c20cfc 176
wvd_vegt 0:3c1d63c20cfc 177 const TiXmlCursor& Cursor() { return cursor; }
wvd_vegt 0:3c1d63c20cfc 178
wvd_vegt 0:3c1d63c20cfc 179 private:
wvd_vegt 0:3c1d63c20cfc 180 // Only used by the document!
wvd_vegt 0:3c1d63c20cfc 181 TiXmlParsingData( const char* start, int _tabsize, int row, int col )
wvd_vegt 0:3c1d63c20cfc 182 {
wvd_vegt 0:3c1d63c20cfc 183 assert( start );
wvd_vegt 0:3c1d63c20cfc 184 stamp = start;
wvd_vegt 0:3c1d63c20cfc 185 tabsize = _tabsize;
wvd_vegt 0:3c1d63c20cfc 186 cursor.row = row;
wvd_vegt 0:3c1d63c20cfc 187 cursor.col = col;
wvd_vegt 0:3c1d63c20cfc 188 }
wvd_vegt 0:3c1d63c20cfc 189
wvd_vegt 0:3c1d63c20cfc 190 TiXmlCursor cursor;
wvd_vegt 0:3c1d63c20cfc 191 const char* stamp;
wvd_vegt 0:3c1d63c20cfc 192 int tabsize;
wvd_vegt 0:3c1d63c20cfc 193 };
wvd_vegt 0:3c1d63c20cfc 194
wvd_vegt 0:3c1d63c20cfc 195
wvd_vegt 0:3c1d63c20cfc 196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 197 {
wvd_vegt 0:3c1d63c20cfc 198 assert( now );
wvd_vegt 0:3c1d63c20cfc 199
wvd_vegt 0:3c1d63c20cfc 200 // Do nothing if the tabsize is 0.
wvd_vegt 0:3c1d63c20cfc 201 if ( tabsize < 1 )
wvd_vegt 0:3c1d63c20cfc 202 {
wvd_vegt 0:3c1d63c20cfc 203 return;
wvd_vegt 0:3c1d63c20cfc 204 }
wvd_vegt 0:3c1d63c20cfc 205
wvd_vegt 0:3c1d63c20cfc 206 // Get the current row, column.
wvd_vegt 0:3c1d63c20cfc 207 int row = cursor.row;
wvd_vegt 0:3c1d63c20cfc 208 int col = cursor.col;
wvd_vegt 0:3c1d63c20cfc 209 const char* p = stamp;
wvd_vegt 0:3c1d63c20cfc 210 assert( p );
wvd_vegt 0:3c1d63c20cfc 211
wvd_vegt 0:3c1d63c20cfc 212 while ( p < now )
wvd_vegt 0:3c1d63c20cfc 213 {
wvd_vegt 0:3c1d63c20cfc 214 // Treat p as unsigned, so we have a happy compiler.
wvd_vegt 0:3c1d63c20cfc 215 const unsigned char* pU = (const unsigned char*)p;
wvd_vegt 0:3c1d63c20cfc 216
wvd_vegt 0:3c1d63c20cfc 217 // Code contributed by Fletcher Dunn: (modified by lee)
wvd_vegt 0:3c1d63c20cfc 218 switch (*pU) {
wvd_vegt 0:3c1d63c20cfc 219 case 0:
wvd_vegt 0:3c1d63c20cfc 220 // We *should* never get here, but in case we do, don't
wvd_vegt 0:3c1d63c20cfc 221 // advance past the terminating null character, ever
wvd_vegt 0:3c1d63c20cfc 222 return;
wvd_vegt 0:3c1d63c20cfc 223
wvd_vegt 0:3c1d63c20cfc 224 case '\r':
wvd_vegt 0:3c1d63c20cfc 225 // bump down to the next line
wvd_vegt 0:3c1d63c20cfc 226 ++row;
wvd_vegt 0:3c1d63c20cfc 227 col = 0;
wvd_vegt 0:3c1d63c20cfc 228 // Eat the character
wvd_vegt 0:3c1d63c20cfc 229 ++p;
wvd_vegt 0:3c1d63c20cfc 230
wvd_vegt 0:3c1d63c20cfc 231 // Check for \r\n sequence, and treat this as a single character
wvd_vegt 0:3c1d63c20cfc 232 if (*p == '\n') {
wvd_vegt 0:3c1d63c20cfc 233 ++p;
wvd_vegt 0:3c1d63c20cfc 234 }
wvd_vegt 0:3c1d63c20cfc 235 break;
wvd_vegt 0:3c1d63c20cfc 236
wvd_vegt 0:3c1d63c20cfc 237 case '\n':
wvd_vegt 0:3c1d63c20cfc 238 // bump down to the next line
wvd_vegt 0:3c1d63c20cfc 239 ++row;
wvd_vegt 0:3c1d63c20cfc 240 col = 0;
wvd_vegt 0:3c1d63c20cfc 241
wvd_vegt 0:3c1d63c20cfc 242 // Eat the character
wvd_vegt 0:3c1d63c20cfc 243 ++p;
wvd_vegt 0:3c1d63c20cfc 244
wvd_vegt 0:3c1d63c20cfc 245 // Check for \n\r sequence, and treat this as a single
wvd_vegt 0:3c1d63c20cfc 246 // character. (Yes, this bizarre thing does occur still
wvd_vegt 0:3c1d63c20cfc 247 // on some arcane platforms...)
wvd_vegt 0:3c1d63c20cfc 248 if (*p == '\r') {
wvd_vegt 0:3c1d63c20cfc 249 ++p;
wvd_vegt 0:3c1d63c20cfc 250 }
wvd_vegt 0:3c1d63c20cfc 251 break;
wvd_vegt 0:3c1d63c20cfc 252
wvd_vegt 0:3c1d63c20cfc 253 case '\t':
wvd_vegt 0:3c1d63c20cfc 254 // Eat the character
wvd_vegt 0:3c1d63c20cfc 255 ++p;
wvd_vegt 0:3c1d63c20cfc 256
wvd_vegt 0:3c1d63c20cfc 257 // Skip to next tab stop
wvd_vegt 0:3c1d63c20cfc 258 col = (col / tabsize + 1) * tabsize;
wvd_vegt 0:3c1d63c20cfc 259 break;
wvd_vegt 0:3c1d63c20cfc 260
wvd_vegt 0:3c1d63c20cfc 261 case TIXML_UTF_LEAD_0:
wvd_vegt 0:3c1d63c20cfc 262 if ( encoding == TIXML_ENCODING_UTF8 )
wvd_vegt 0:3c1d63c20cfc 263 {
wvd_vegt 0:3c1d63c20cfc 264 if ( *(p+1) && *(p+2) )
wvd_vegt 0:3c1d63c20cfc 265 {
wvd_vegt 0:3c1d63c20cfc 266 // In these cases, don't advance the column. These are
wvd_vegt 0:3c1d63c20cfc 267 // 0-width spaces.
wvd_vegt 0:3c1d63c20cfc 268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
wvd_vegt 0:3c1d63c20cfc 269 p += 3;
wvd_vegt 0:3c1d63c20cfc 270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
wvd_vegt 0:3c1d63c20cfc 271 p += 3;
wvd_vegt 0:3c1d63c20cfc 272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
wvd_vegt 0:3c1d63c20cfc 273 p += 3;
wvd_vegt 0:3c1d63c20cfc 274 else
wvd_vegt 0:3c1d63c20cfc 275 { p +=3; ++col; } // A normal character.
wvd_vegt 0:3c1d63c20cfc 276 }
wvd_vegt 0:3c1d63c20cfc 277 }
wvd_vegt 0:3c1d63c20cfc 278 else
wvd_vegt 0:3c1d63c20cfc 279 {
wvd_vegt 0:3c1d63c20cfc 280 ++p;
wvd_vegt 0:3c1d63c20cfc 281 ++col;
wvd_vegt 0:3c1d63c20cfc 282 }
wvd_vegt 0:3c1d63c20cfc 283 break;
wvd_vegt 0:3c1d63c20cfc 284
wvd_vegt 0:3c1d63c20cfc 285 default:
wvd_vegt 0:3c1d63c20cfc 286 if ( encoding == TIXML_ENCODING_UTF8 )
wvd_vegt 0:3c1d63c20cfc 287 {
wvd_vegt 0:3c1d63c20cfc 288 // Eat the 1 to 4 byte utf8 character.
wvd_vegt 0:3c1d63c20cfc 289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
wvd_vegt 0:3c1d63c20cfc 290 if ( step == 0 )
wvd_vegt 0:3c1d63c20cfc 291 step = 1; // Error case from bad encoding, but handle gracefully.
wvd_vegt 0:3c1d63c20cfc 292 p += step;
wvd_vegt 0:3c1d63c20cfc 293
wvd_vegt 0:3c1d63c20cfc 294 // Just advance one column, of course.
wvd_vegt 0:3c1d63c20cfc 295 ++col;
wvd_vegt 0:3c1d63c20cfc 296 }
wvd_vegt 0:3c1d63c20cfc 297 else
wvd_vegt 0:3c1d63c20cfc 298 {
wvd_vegt 0:3c1d63c20cfc 299 ++p;
wvd_vegt 0:3c1d63c20cfc 300 ++col;
wvd_vegt 0:3c1d63c20cfc 301 }
wvd_vegt 0:3c1d63c20cfc 302 break;
wvd_vegt 0:3c1d63c20cfc 303 }
wvd_vegt 0:3c1d63c20cfc 304 }
wvd_vegt 0:3c1d63c20cfc 305 cursor.row = row;
wvd_vegt 0:3c1d63c20cfc 306 cursor.col = col;
wvd_vegt 0:3c1d63c20cfc 307 assert( cursor.row >= -1 );
wvd_vegt 0:3c1d63c20cfc 308 assert( cursor.col >= -1 );
wvd_vegt 0:3c1d63c20cfc 309 stamp = p;
wvd_vegt 0:3c1d63c20cfc 310 assert( stamp );
wvd_vegt 0:3c1d63c20cfc 311 }
wvd_vegt 0:3c1d63c20cfc 312
wvd_vegt 0:3c1d63c20cfc 313
wvd_vegt 0:3c1d63c20cfc 314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 315 {
wvd_vegt 0:3c1d63c20cfc 316 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 317 {
wvd_vegt 0:3c1d63c20cfc 318 return 0;
wvd_vegt 0:3c1d63c20cfc 319 }
wvd_vegt 0:3c1d63c20cfc 320 if ( encoding == TIXML_ENCODING_UTF8 )
wvd_vegt 0:3c1d63c20cfc 321 {
wvd_vegt 0:3c1d63c20cfc 322 while ( *p )
wvd_vegt 0:3c1d63c20cfc 323 {
wvd_vegt 0:3c1d63c20cfc 324 const unsigned char* pU = (const unsigned char*)p;
wvd_vegt 0:3c1d63c20cfc 325
wvd_vegt 0:3c1d63c20cfc 326 // Skip the stupid Microsoft UTF-8 Byte order marks
wvd_vegt 0:3c1d63c20cfc 327 if ( *(pU+0)==TIXML_UTF_LEAD_0
wvd_vegt 0:3c1d63c20cfc 328 && *(pU+1)==TIXML_UTF_LEAD_1
wvd_vegt 0:3c1d63c20cfc 329 && *(pU+2)==TIXML_UTF_LEAD_2 )
wvd_vegt 0:3c1d63c20cfc 330 {
wvd_vegt 0:3c1d63c20cfc 331 p += 3;
wvd_vegt 0:3c1d63c20cfc 332 continue;
wvd_vegt 0:3c1d63c20cfc 333 }
wvd_vegt 0:3c1d63c20cfc 334 else if(*(pU+0)==TIXML_UTF_LEAD_0
wvd_vegt 0:3c1d63c20cfc 335 && *(pU+1)==0xbfU
wvd_vegt 0:3c1d63c20cfc 336 && *(pU+2)==0xbeU )
wvd_vegt 0:3c1d63c20cfc 337 {
wvd_vegt 0:3c1d63c20cfc 338 p += 3;
wvd_vegt 0:3c1d63c20cfc 339 continue;
wvd_vegt 0:3c1d63c20cfc 340 }
wvd_vegt 0:3c1d63c20cfc 341 else if(*(pU+0)==TIXML_UTF_LEAD_0
wvd_vegt 0:3c1d63c20cfc 342 && *(pU+1)==0xbfU
wvd_vegt 0:3c1d63c20cfc 343 && *(pU+2)==0xbfU )
wvd_vegt 0:3c1d63c20cfc 344 {
wvd_vegt 0:3c1d63c20cfc 345 p += 3;
wvd_vegt 0:3c1d63c20cfc 346 continue;
wvd_vegt 0:3c1d63c20cfc 347 }
wvd_vegt 0:3c1d63c20cfc 348
wvd_vegt 0:3c1d63c20cfc 349 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space.
wvd_vegt 0:3c1d63c20cfc 350 ++p;
wvd_vegt 0:3c1d63c20cfc 351 else
wvd_vegt 0:3c1d63c20cfc 352 break;
wvd_vegt 0:3c1d63c20cfc 353 }
wvd_vegt 0:3c1d63c20cfc 354 }
wvd_vegt 0:3c1d63c20cfc 355 else
wvd_vegt 0:3c1d63c20cfc 356 {
wvd_vegt 0:3c1d63c20cfc 357 while ( *p && IsWhiteSpace( *p ) )
wvd_vegt 0:3c1d63c20cfc 358 ++p;
wvd_vegt 0:3c1d63c20cfc 359 }
wvd_vegt 0:3c1d63c20cfc 360
wvd_vegt 0:3c1d63c20cfc 361 return p;
wvd_vegt 0:3c1d63c20cfc 362 }
wvd_vegt 0:3c1d63c20cfc 363
wvd_vegt 0:3c1d63c20cfc 364 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 365 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
wvd_vegt 0:3c1d63c20cfc 366 {
wvd_vegt 0:3c1d63c20cfc 367 for( ;; )
wvd_vegt 0:3c1d63c20cfc 368 {
wvd_vegt 0:3c1d63c20cfc 369 if ( !in->good() ) return false;
wvd_vegt 0:3c1d63c20cfc 370
wvd_vegt 0:3c1d63c20cfc 371 int c = in->peek();
wvd_vegt 0:3c1d63c20cfc 372 // At this scope, we can't get to a document. So fail silently.
wvd_vegt 0:3c1d63c20cfc 373 if ( !IsWhiteSpace( c ) || c <= 0 )
wvd_vegt 0:3c1d63c20cfc 374 return true;
wvd_vegt 0:3c1d63c20cfc 375
wvd_vegt 0:3c1d63c20cfc 376 *tag += (char) in->get();
wvd_vegt 0:3c1d63c20cfc 377 }
wvd_vegt 0:3c1d63c20cfc 378 }
wvd_vegt 0:3c1d63c20cfc 379
wvd_vegt 0:3c1d63c20cfc 380 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
wvd_vegt 0:3c1d63c20cfc 381 {
wvd_vegt 0:3c1d63c20cfc 382 //assert( character > 0 && character < 128 ); // else it won't work in utf-8
wvd_vegt 0:3c1d63c20cfc 383 while ( in->good() )
wvd_vegt 0:3c1d63c20cfc 384 {
wvd_vegt 0:3c1d63c20cfc 385 int c = in->peek();
wvd_vegt 0:3c1d63c20cfc 386 if ( c == character )
wvd_vegt 0:3c1d63c20cfc 387 return true;
wvd_vegt 0:3c1d63c20cfc 388 if ( c <= 0 ) // Silent failure: can't get document at this scope
wvd_vegt 0:3c1d63c20cfc 389 return false;
wvd_vegt 0:3c1d63c20cfc 390
wvd_vegt 0:3c1d63c20cfc 391 in->get();
wvd_vegt 0:3c1d63c20cfc 392 *tag += (char) c;
wvd_vegt 0:3c1d63c20cfc 393 }
wvd_vegt 0:3c1d63c20cfc 394 return false;
wvd_vegt 0:3c1d63c20cfc 395 }
wvd_vegt 0:3c1d63c20cfc 396 #endif
wvd_vegt 0:3c1d63c20cfc 397
wvd_vegt 0:3c1d63c20cfc 398 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
wvd_vegt 0:3c1d63c20cfc 399 // "assign" optimization removes over 10% of the execution time.
wvd_vegt 0:3c1d63c20cfc 400 //
wvd_vegt 0:3c1d63c20cfc 401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 402 {
wvd_vegt 0:3c1d63c20cfc 403 // Oddly, not supported on some comilers,
wvd_vegt 0:3c1d63c20cfc 404 //name->clear();
wvd_vegt 0:3c1d63c20cfc 405 // So use this:
wvd_vegt 0:3c1d63c20cfc 406 *name = "";
wvd_vegt 0:3c1d63c20cfc 407 assert( p );
wvd_vegt 0:3c1d63c20cfc 408
wvd_vegt 0:3c1d63c20cfc 409 // Names start with letters or underscores.
wvd_vegt 0:3c1d63c20cfc 410 // Of course, in unicode, tinyxml has no idea what a letter *is*. The
wvd_vegt 0:3c1d63c20cfc 411 // algorithm is generous.
wvd_vegt 0:3c1d63c20cfc 412 //
wvd_vegt 0:3c1d63c20cfc 413 // After that, they can be letters, underscores, numbers,
wvd_vegt 0:3c1d63c20cfc 414 // hyphens, or colons. (Colons are valid ony for namespaces,
wvd_vegt 0:3c1d63c20cfc 415 // but tinyxml can't tell namespaces from names.)
wvd_vegt 0:3c1d63c20cfc 416 if ( p && *p
wvd_vegt 0:3c1d63c20cfc 417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
wvd_vegt 0:3c1d63c20cfc 418 {
wvd_vegt 0:3c1d63c20cfc 419 const char* start = p;
wvd_vegt 0:3c1d63c20cfc 420 while( p && *p
wvd_vegt 0:3c1d63c20cfc 421 && ( IsAlphaNum( (unsigned char ) *p, encoding )
wvd_vegt 0:3c1d63c20cfc 422 || *p == '_'
wvd_vegt 0:3c1d63c20cfc 423 || *p == '-'
wvd_vegt 0:3c1d63c20cfc 424 || *p == '.'
wvd_vegt 0:3c1d63c20cfc 425 || *p == ':' ) )
wvd_vegt 0:3c1d63c20cfc 426 {
wvd_vegt 0:3c1d63c20cfc 427 //(*name) += *p; // expensive
wvd_vegt 0:3c1d63c20cfc 428 ++p;
wvd_vegt 0:3c1d63c20cfc 429 }
wvd_vegt 0:3c1d63c20cfc 430 if ( p-start > 0 ) {
wvd_vegt 0:3c1d63c20cfc 431 name->assign( start, p-start );
wvd_vegt 0:3c1d63c20cfc 432 }
wvd_vegt 0:3c1d63c20cfc 433 return p;
wvd_vegt 0:3c1d63c20cfc 434 }
wvd_vegt 0:3c1d63c20cfc 435 return 0;
wvd_vegt 0:3c1d63c20cfc 436 }
wvd_vegt 0:3c1d63c20cfc 437
wvd_vegt 0:3c1d63c20cfc 438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 439 {
wvd_vegt 0:3c1d63c20cfc 440 // Presume an entity, and pull it out.
wvd_vegt 0:3c1d63c20cfc 441 TIXML_STRING ent;
wvd_vegt 0:3c1d63c20cfc 442 int i;
wvd_vegt 0:3c1d63c20cfc 443 *length = 0;
wvd_vegt 0:3c1d63c20cfc 444
wvd_vegt 0:3c1d63c20cfc 445 if ( *(p+1) && *(p+1) == '#' && *(p+2) )
wvd_vegt 0:3c1d63c20cfc 446 {
wvd_vegt 0:3c1d63c20cfc 447 unsigned long ucs = 0;
wvd_vegt 0:3c1d63c20cfc 448 ptrdiff_t delta = 0;
wvd_vegt 0:3c1d63c20cfc 449 unsigned mult = 1;
wvd_vegt 0:3c1d63c20cfc 450
wvd_vegt 0:3c1d63c20cfc 451 if ( *(p+2) == 'x' )
wvd_vegt 0:3c1d63c20cfc 452 {
wvd_vegt 0:3c1d63c20cfc 453 // Hexadecimal.
wvd_vegt 0:3c1d63c20cfc 454 if ( !*(p+3) ) return 0;
wvd_vegt 0:3c1d63c20cfc 455
wvd_vegt 0:3c1d63c20cfc 456 const char* q = p+3;
wvd_vegt 0:3c1d63c20cfc 457 q = strchr( q, ';' );
wvd_vegt 0:3c1d63c20cfc 458
wvd_vegt 0:3c1d63c20cfc 459 if ( !q || !*q ) return 0;
wvd_vegt 0:3c1d63c20cfc 460
wvd_vegt 0:3c1d63c20cfc 461 delta = q-p;
wvd_vegt 0:3c1d63c20cfc 462 --q;
wvd_vegt 0:3c1d63c20cfc 463
wvd_vegt 0:3c1d63c20cfc 464 while ( *q != 'x' )
wvd_vegt 0:3c1d63c20cfc 465 {
wvd_vegt 0:3c1d63c20cfc 466 if ( *q >= '0' && *q <= '9' )
wvd_vegt 0:3c1d63c20cfc 467 ucs += mult * (*q - '0');
wvd_vegt 0:3c1d63c20cfc 468 else if ( *q >= 'a' && *q <= 'f' )
wvd_vegt 0:3c1d63c20cfc 469 ucs += mult * (*q - 'a' + 10);
wvd_vegt 0:3c1d63c20cfc 470 else if ( *q >= 'A' && *q <= 'F' )
wvd_vegt 0:3c1d63c20cfc 471 ucs += mult * (*q - 'A' + 10 );
wvd_vegt 0:3c1d63c20cfc 472 else
wvd_vegt 0:3c1d63c20cfc 473 return 0;
wvd_vegt 0:3c1d63c20cfc 474 mult *= 16;
wvd_vegt 0:3c1d63c20cfc 475 --q;
wvd_vegt 0:3c1d63c20cfc 476 }
wvd_vegt 0:3c1d63c20cfc 477 }
wvd_vegt 0:3c1d63c20cfc 478 else
wvd_vegt 0:3c1d63c20cfc 479 {
wvd_vegt 0:3c1d63c20cfc 480 // Decimal.
wvd_vegt 0:3c1d63c20cfc 481 if ( !*(p+2) ) return 0;
wvd_vegt 0:3c1d63c20cfc 482
wvd_vegt 0:3c1d63c20cfc 483 const char* q = p+2;
wvd_vegt 0:3c1d63c20cfc 484 q = strchr( q, ';' );
wvd_vegt 0:3c1d63c20cfc 485
wvd_vegt 0:3c1d63c20cfc 486 if ( !q || !*q ) return 0;
wvd_vegt 0:3c1d63c20cfc 487
wvd_vegt 0:3c1d63c20cfc 488 delta = q-p;
wvd_vegt 0:3c1d63c20cfc 489 --q;
wvd_vegt 0:3c1d63c20cfc 490
wvd_vegt 0:3c1d63c20cfc 491 while ( *q != '#' )
wvd_vegt 0:3c1d63c20cfc 492 {
wvd_vegt 0:3c1d63c20cfc 493 if ( *q >= '0' && *q <= '9' )
wvd_vegt 0:3c1d63c20cfc 494 ucs += mult * (*q - '0');
wvd_vegt 0:3c1d63c20cfc 495 else
wvd_vegt 0:3c1d63c20cfc 496 return 0;
wvd_vegt 0:3c1d63c20cfc 497 mult *= 10;
wvd_vegt 0:3c1d63c20cfc 498 --q;
wvd_vegt 0:3c1d63c20cfc 499 }
wvd_vegt 0:3c1d63c20cfc 500 }
wvd_vegt 0:3c1d63c20cfc 501 if ( encoding == TIXML_ENCODING_UTF8 )
wvd_vegt 0:3c1d63c20cfc 502 {
wvd_vegt 0:3c1d63c20cfc 503 // convert the UCS to UTF-8
wvd_vegt 0:3c1d63c20cfc 504 ConvertUTF32ToUTF8( ucs, value, length );
wvd_vegt 0:3c1d63c20cfc 505 }
wvd_vegt 0:3c1d63c20cfc 506 else
wvd_vegt 0:3c1d63c20cfc 507 {
wvd_vegt 0:3c1d63c20cfc 508 *value = (char)ucs;
wvd_vegt 0:3c1d63c20cfc 509 *length = 1;
wvd_vegt 0:3c1d63c20cfc 510 }
wvd_vegt 0:3c1d63c20cfc 511 return p + delta + 1;
wvd_vegt 0:3c1d63c20cfc 512 }
wvd_vegt 0:3c1d63c20cfc 513
wvd_vegt 0:3c1d63c20cfc 514 // Now try to match it.
wvd_vegt 0:3c1d63c20cfc 515 for( i=0; i<NUM_ENTITY; ++i )
wvd_vegt 0:3c1d63c20cfc 516 {
wvd_vegt 0:3c1d63c20cfc 517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
wvd_vegt 0:3c1d63c20cfc 518 {
wvd_vegt 0:3c1d63c20cfc 519 assert( strlen( entity[i].str ) == entity[i].strLength );
wvd_vegt 0:3c1d63c20cfc 520 *value = entity[i].chr;
wvd_vegt 0:3c1d63c20cfc 521 *length = 1;
wvd_vegt 0:3c1d63c20cfc 522 return ( p + entity[i].strLength );
wvd_vegt 0:3c1d63c20cfc 523 }
wvd_vegt 0:3c1d63c20cfc 524 }
wvd_vegt 0:3c1d63c20cfc 525
wvd_vegt 0:3c1d63c20cfc 526 // So it wasn't an entity, its unrecognized, or something like that.
wvd_vegt 0:3c1d63c20cfc 527 *value = *p; // Don't put back the last one, since we return it!
wvd_vegt 0:3c1d63c20cfc 528 //*length = 1; // Leave unrecognized entities - this doesn't really work.
wvd_vegt 0:3c1d63c20cfc 529 // Just writes strange XML.
wvd_vegt 0:3c1d63c20cfc 530 return p+1;
wvd_vegt 0:3c1d63c20cfc 531 }
wvd_vegt 0:3c1d63c20cfc 532
wvd_vegt 0:3c1d63c20cfc 533
wvd_vegt 0:3c1d63c20cfc 534 bool TiXmlBase::StringEqual( const char* p,
wvd_vegt 0:3c1d63c20cfc 535 const char* tag,
wvd_vegt 0:3c1d63c20cfc 536 bool ignoreCase,
wvd_vegt 0:3c1d63c20cfc 537 TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 538 {
wvd_vegt 0:3c1d63c20cfc 539 assert( p );
wvd_vegt 0:3c1d63c20cfc 540 assert( tag );
wvd_vegt 0:3c1d63c20cfc 541 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 542 {
wvd_vegt 0:3c1d63c20cfc 543 assert( 0 );
wvd_vegt 0:3c1d63c20cfc 544 return false;
wvd_vegt 0:3c1d63c20cfc 545 }
wvd_vegt 0:3c1d63c20cfc 546
wvd_vegt 0:3c1d63c20cfc 547 const char* q = p;
wvd_vegt 0:3c1d63c20cfc 548
wvd_vegt 0:3c1d63c20cfc 549 if ( ignoreCase )
wvd_vegt 0:3c1d63c20cfc 550 {
wvd_vegt 0:3c1d63c20cfc 551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
wvd_vegt 0:3c1d63c20cfc 552 {
wvd_vegt 0:3c1d63c20cfc 553 ++q;
wvd_vegt 0:3c1d63c20cfc 554 ++tag;
wvd_vegt 0:3c1d63c20cfc 555 }
wvd_vegt 0:3c1d63c20cfc 556
wvd_vegt 0:3c1d63c20cfc 557 if ( *tag == 0 )
wvd_vegt 0:3c1d63c20cfc 558 return true;
wvd_vegt 0:3c1d63c20cfc 559 }
wvd_vegt 0:3c1d63c20cfc 560 else
wvd_vegt 0:3c1d63c20cfc 561 {
wvd_vegt 0:3c1d63c20cfc 562 while ( *q && *tag && *q == *tag )
wvd_vegt 0:3c1d63c20cfc 563 {
wvd_vegt 0:3c1d63c20cfc 564 ++q;
wvd_vegt 0:3c1d63c20cfc 565 ++tag;
wvd_vegt 0:3c1d63c20cfc 566 }
wvd_vegt 0:3c1d63c20cfc 567
wvd_vegt 0:3c1d63c20cfc 568 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
wvd_vegt 0:3c1d63c20cfc 569 return true;
wvd_vegt 0:3c1d63c20cfc 570 }
wvd_vegt 0:3c1d63c20cfc 571 return false;
wvd_vegt 0:3c1d63c20cfc 572 }
wvd_vegt 0:3c1d63c20cfc 573
wvd_vegt 0:3c1d63c20cfc 574 const char* TiXmlBase::ReadText( const char* p,
wvd_vegt 0:3c1d63c20cfc 575 TIXML_STRING * text,
wvd_vegt 0:3c1d63c20cfc 576 bool trimWhiteSpace,
wvd_vegt 0:3c1d63c20cfc 577 const char* endTag,
wvd_vegt 0:3c1d63c20cfc 578 bool caseInsensitive,
wvd_vegt 0:3c1d63c20cfc 579 TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 580 {
wvd_vegt 0:3c1d63c20cfc 581 *text = "";
wvd_vegt 0:3c1d63c20cfc 582 if ( !trimWhiteSpace // certain tags always keep whitespace
wvd_vegt 0:3c1d63c20cfc 583 || !condenseWhiteSpace ) // if true, whitespace is always kept
wvd_vegt 0:3c1d63c20cfc 584 {
wvd_vegt 0:3c1d63c20cfc 585 // Keep all the white space.
wvd_vegt 0:3c1d63c20cfc 586 while ( p && *p
wvd_vegt 0:3c1d63c20cfc 587 && !StringEqual( p, endTag, caseInsensitive, encoding )
wvd_vegt 0:3c1d63c20cfc 588 )
wvd_vegt 0:3c1d63c20cfc 589 {
wvd_vegt 0:3c1d63c20cfc 590 int len;
wvd_vegt 0:3c1d63c20cfc 591 char cArr[4] = { 0, 0, 0, 0 };
wvd_vegt 0:3c1d63c20cfc 592 p = GetChar( p, cArr, &len, encoding );
wvd_vegt 0:3c1d63c20cfc 593 text->append( cArr, len );
wvd_vegt 0:3c1d63c20cfc 594 }
wvd_vegt 0:3c1d63c20cfc 595 }
wvd_vegt 0:3c1d63c20cfc 596 else
wvd_vegt 0:3c1d63c20cfc 597 {
wvd_vegt 0:3c1d63c20cfc 598 bool whitespace = false;
wvd_vegt 0:3c1d63c20cfc 599
wvd_vegt 0:3c1d63c20cfc 600 // Remove leading white space:
wvd_vegt 0:3c1d63c20cfc 601 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 602 while ( p && *p
wvd_vegt 0:3c1d63c20cfc 603 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
wvd_vegt 0:3c1d63c20cfc 604 {
wvd_vegt 0:3c1d63c20cfc 605 if ( *p == '\r' || *p == '\n' )
wvd_vegt 0:3c1d63c20cfc 606 {
wvd_vegt 0:3c1d63c20cfc 607 whitespace = true;
wvd_vegt 0:3c1d63c20cfc 608 ++p;
wvd_vegt 0:3c1d63c20cfc 609 }
wvd_vegt 0:3c1d63c20cfc 610 else if ( IsWhiteSpace( *p ) )
wvd_vegt 0:3c1d63c20cfc 611 {
wvd_vegt 0:3c1d63c20cfc 612 whitespace = true;
wvd_vegt 0:3c1d63c20cfc 613 ++p;
wvd_vegt 0:3c1d63c20cfc 614 }
wvd_vegt 0:3c1d63c20cfc 615 else
wvd_vegt 0:3c1d63c20cfc 616 {
wvd_vegt 0:3c1d63c20cfc 617 // If we've found whitespace, add it before the
wvd_vegt 0:3c1d63c20cfc 618 // new character. Any whitespace just becomes a space.
wvd_vegt 0:3c1d63c20cfc 619 if ( whitespace )
wvd_vegt 0:3c1d63c20cfc 620 {
wvd_vegt 0:3c1d63c20cfc 621 (*text) += ' ';
wvd_vegt 0:3c1d63c20cfc 622 whitespace = false;
wvd_vegt 0:3c1d63c20cfc 623 }
wvd_vegt 0:3c1d63c20cfc 624 int len;
wvd_vegt 0:3c1d63c20cfc 625 char cArr[4] = { 0, 0, 0, 0 };
wvd_vegt 0:3c1d63c20cfc 626 p = GetChar( p, cArr, &len, encoding );
wvd_vegt 0:3c1d63c20cfc 627 if ( len == 1 )
wvd_vegt 0:3c1d63c20cfc 628 (*text) += cArr[0]; // more efficient
wvd_vegt 0:3c1d63c20cfc 629 else
wvd_vegt 0:3c1d63c20cfc 630 text->append( cArr, len );
wvd_vegt 0:3c1d63c20cfc 631 }
wvd_vegt 0:3c1d63c20cfc 632 }
wvd_vegt 0:3c1d63c20cfc 633 }
wvd_vegt 0:3c1d63c20cfc 634 if ( p && *p )
wvd_vegt 0:3c1d63c20cfc 635 p += strlen( endTag );
wvd_vegt 0:3c1d63c20cfc 636 return p;
wvd_vegt 0:3c1d63c20cfc 637 }
wvd_vegt 0:3c1d63c20cfc 638
wvd_vegt 0:3c1d63c20cfc 639 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 640
wvd_vegt 0:3c1d63c20cfc 641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
wvd_vegt 0:3c1d63c20cfc 642 {
wvd_vegt 0:3c1d63c20cfc 643 // The basic issue with a document is that we don't know what we're
wvd_vegt 0:3c1d63c20cfc 644 // streaming. Read something presumed to be a tag (and hope), then
wvd_vegt 0:3c1d63c20cfc 645 // identify it, and call the appropriate stream method on the tag.
wvd_vegt 0:3c1d63c20cfc 646 //
wvd_vegt 0:3c1d63c20cfc 647 // This "pre-streaming" will never read the closing ">" so the
wvd_vegt 0:3c1d63c20cfc 648 // sub-tag can orient itself.
wvd_vegt 0:3c1d63c20cfc 649
wvd_vegt 0:3c1d63c20cfc 650 if ( !StreamTo( in, '<', tag ) )
wvd_vegt 0:3c1d63c20cfc 651 {
wvd_vegt 0:3c1d63c20cfc 652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 653 return;
wvd_vegt 0:3c1d63c20cfc 654 }
wvd_vegt 0:3c1d63c20cfc 655
wvd_vegt 0:3c1d63c20cfc 656 while ( in->good() )
wvd_vegt 0:3c1d63c20cfc 657 {
wvd_vegt 0:3c1d63c20cfc 658 int tagIndex = (int) tag->length();
wvd_vegt 0:3c1d63c20cfc 659 while ( in->good() && in->peek() != '>' )
wvd_vegt 0:3c1d63c20cfc 660 {
wvd_vegt 0:3c1d63c20cfc 661 int c = in->get();
wvd_vegt 0:3c1d63c20cfc 662 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 663 {
wvd_vegt 0:3c1d63c20cfc 664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 665 break;
wvd_vegt 0:3c1d63c20cfc 666 }
wvd_vegt 0:3c1d63c20cfc 667 (*tag) += (char) c;
wvd_vegt 0:3c1d63c20cfc 668 }
wvd_vegt 0:3c1d63c20cfc 669
wvd_vegt 0:3c1d63c20cfc 670 if ( in->good() )
wvd_vegt 0:3c1d63c20cfc 671 {
wvd_vegt 0:3c1d63c20cfc 672 // We now have something we presume to be a node of
wvd_vegt 0:3c1d63c20cfc 673 // some sort. Identify it, and call the node to
wvd_vegt 0:3c1d63c20cfc 674 // continue streaming.
wvd_vegt 0:3c1d63c20cfc 675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
wvd_vegt 0:3c1d63c20cfc 676
wvd_vegt 0:3c1d63c20cfc 677 if ( node )
wvd_vegt 0:3c1d63c20cfc 678 {
wvd_vegt 0:3c1d63c20cfc 679 node->StreamIn( in, tag );
wvd_vegt 0:3c1d63c20cfc 680 bool isElement = node->ToElement() != 0;
wvd_vegt 0:3c1d63c20cfc 681 delete node;
wvd_vegt 0:3c1d63c20cfc 682 node = 0;
wvd_vegt 0:3c1d63c20cfc 683
wvd_vegt 0:3c1d63c20cfc 684 // If this is the root element, we're done. Parsing will be
wvd_vegt 0:3c1d63c20cfc 685 // done by the >> operator.
wvd_vegt 0:3c1d63c20cfc 686 if ( isElement )
wvd_vegt 0:3c1d63c20cfc 687 {
wvd_vegt 0:3c1d63c20cfc 688 return;
wvd_vegt 0:3c1d63c20cfc 689 }
wvd_vegt 0:3c1d63c20cfc 690 }
wvd_vegt 0:3c1d63c20cfc 691 else
wvd_vegt 0:3c1d63c20cfc 692 {
wvd_vegt 0:3c1d63c20cfc 693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 694 return;
wvd_vegt 0:3c1d63c20cfc 695 }
wvd_vegt 0:3c1d63c20cfc 696 }
wvd_vegt 0:3c1d63c20cfc 697 }
wvd_vegt 0:3c1d63c20cfc 698 // We should have returned sooner.
wvd_vegt 0:3c1d63c20cfc 699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 700 }
wvd_vegt 0:3c1d63c20cfc 701
wvd_vegt 0:3c1d63c20cfc 702 #endif
wvd_vegt 0:3c1d63c20cfc 703
wvd_vegt 0:3c1d63c20cfc 704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 705 {
wvd_vegt 0:3c1d63c20cfc 706 ClearError();
wvd_vegt 0:3c1d63c20cfc 707
wvd_vegt 0:3c1d63c20cfc 708 // Parse away, at the document level. Since a document
wvd_vegt 0:3c1d63c20cfc 709 // contains nothing but other tags, most of what happens
wvd_vegt 0:3c1d63c20cfc 710 // here is skipping white space.
wvd_vegt 0:3c1d63c20cfc 711 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 712 {
wvd_vegt 0:3c1d63c20cfc 713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 714 return 0;
wvd_vegt 0:3c1d63c20cfc 715 }
wvd_vegt 0:3c1d63c20cfc 716
wvd_vegt 0:3c1d63c20cfc 717 // Note that, for a document, this needs to come
wvd_vegt 0:3c1d63c20cfc 718 // before the while space skip, so that parsing
wvd_vegt 0:3c1d63c20cfc 719 // starts from the pointer we are given.
wvd_vegt 0:3c1d63c20cfc 720 location.Clear();
wvd_vegt 0:3c1d63c20cfc 721 if ( prevData )
wvd_vegt 0:3c1d63c20cfc 722 {
wvd_vegt 0:3c1d63c20cfc 723 location.row = prevData->cursor.row;
wvd_vegt 0:3c1d63c20cfc 724 location.col = prevData->cursor.col;
wvd_vegt 0:3c1d63c20cfc 725 }
wvd_vegt 0:3c1d63c20cfc 726 else
wvd_vegt 0:3c1d63c20cfc 727 {
wvd_vegt 0:3c1d63c20cfc 728 location.row = 0;
wvd_vegt 0:3c1d63c20cfc 729 location.col = 0;
wvd_vegt 0:3c1d63c20cfc 730 }
wvd_vegt 0:3c1d63c20cfc 731 TiXmlParsingData data( p, TabSize(), location.row, location.col );
wvd_vegt 0:3c1d63c20cfc 732 location = data.Cursor();
wvd_vegt 0:3c1d63c20cfc 733
wvd_vegt 0:3c1d63c20cfc 734 if ( encoding == TIXML_ENCODING_UNKNOWN )
wvd_vegt 0:3c1d63c20cfc 735 {
wvd_vegt 0:3c1d63c20cfc 736 // Check for the Microsoft UTF-8 lead bytes.
wvd_vegt 0:3c1d63c20cfc 737 const unsigned char* pU = (const unsigned char*)p;
wvd_vegt 0:3c1d63c20cfc 738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
wvd_vegt 0:3c1d63c20cfc 739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
wvd_vegt 0:3c1d63c20cfc 740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
wvd_vegt 0:3c1d63c20cfc 741 {
wvd_vegt 0:3c1d63c20cfc 742 encoding = TIXML_ENCODING_UTF8;
wvd_vegt 0:3c1d63c20cfc 743 useMicrosoftBOM = true;
wvd_vegt 0:3c1d63c20cfc 744 }
wvd_vegt 0:3c1d63c20cfc 745 }
wvd_vegt 0:3c1d63c20cfc 746
wvd_vegt 0:3c1d63c20cfc 747 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 748 if ( !p )
wvd_vegt 0:3c1d63c20cfc 749 {
wvd_vegt 0:3c1d63c20cfc 750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 751 return 0;
wvd_vegt 0:3c1d63c20cfc 752 }
wvd_vegt 0:3c1d63c20cfc 753
wvd_vegt 0:3c1d63c20cfc 754 while ( p && *p )
wvd_vegt 0:3c1d63c20cfc 755 {
wvd_vegt 0:3c1d63c20cfc 756 TiXmlNode* node = Identify( p, encoding );
wvd_vegt 0:3c1d63c20cfc 757 if ( node )
wvd_vegt 0:3c1d63c20cfc 758 {
wvd_vegt 0:3c1d63c20cfc 759 p = node->Parse( p, &data, encoding );
wvd_vegt 0:3c1d63c20cfc 760 LinkEndChild( node );
wvd_vegt 0:3c1d63c20cfc 761 }
wvd_vegt 0:3c1d63c20cfc 762 else
wvd_vegt 0:3c1d63c20cfc 763 {
wvd_vegt 0:3c1d63c20cfc 764 break;
wvd_vegt 0:3c1d63c20cfc 765 }
wvd_vegt 0:3c1d63c20cfc 766
wvd_vegt 0:3c1d63c20cfc 767 // Did we get encoding info?
wvd_vegt 0:3c1d63c20cfc 768 if ( encoding == TIXML_ENCODING_UNKNOWN
wvd_vegt 0:3c1d63c20cfc 769 && node->ToDeclaration() )
wvd_vegt 0:3c1d63c20cfc 770 {
wvd_vegt 0:3c1d63c20cfc 771 TiXmlDeclaration* dec = node->ToDeclaration();
wvd_vegt 0:3c1d63c20cfc 772 const char* enc = dec->Encoding();
wvd_vegt 0:3c1d63c20cfc 773 assert( enc );
wvd_vegt 0:3c1d63c20cfc 774
wvd_vegt 0:3c1d63c20cfc 775 if ( *enc == 0 )
wvd_vegt 0:3c1d63c20cfc 776 encoding = TIXML_ENCODING_UTF8;
wvd_vegt 0:3c1d63c20cfc 777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
wvd_vegt 0:3c1d63c20cfc 778 encoding = TIXML_ENCODING_UTF8;
wvd_vegt 0:3c1d63c20cfc 779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
wvd_vegt 0:3c1d63c20cfc 780 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
wvd_vegt 0:3c1d63c20cfc 781 else
wvd_vegt 0:3c1d63c20cfc 782 encoding = TIXML_ENCODING_LEGACY;
wvd_vegt 0:3c1d63c20cfc 783 }
wvd_vegt 0:3c1d63c20cfc 784
wvd_vegt 0:3c1d63c20cfc 785 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 786 }
wvd_vegt 0:3c1d63c20cfc 787
wvd_vegt 0:3c1d63c20cfc 788 // Was this empty?
wvd_vegt 0:3c1d63c20cfc 789 if ( !firstChild ) {
wvd_vegt 0:3c1d63c20cfc 790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
wvd_vegt 0:3c1d63c20cfc 791 return 0;
wvd_vegt 0:3c1d63c20cfc 792 }
wvd_vegt 0:3c1d63c20cfc 793
wvd_vegt 0:3c1d63c20cfc 794 // All is well.
wvd_vegt 0:3c1d63c20cfc 795 return p;
wvd_vegt 0:3c1d63c20cfc 796 }
wvd_vegt 0:3c1d63c20cfc 797
wvd_vegt 0:3c1d63c20cfc 798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 799 {
wvd_vegt 0:3c1d63c20cfc 800 // The first error in a chain is more accurate - don't set again!
wvd_vegt 0:3c1d63c20cfc 801 if ( error )
wvd_vegt 0:3c1d63c20cfc 802 return;
wvd_vegt 0:3c1d63c20cfc 803
wvd_vegt 0:3c1d63c20cfc 804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
wvd_vegt 0:3c1d63c20cfc 805 error = true;
wvd_vegt 0:3c1d63c20cfc 806 errorId = err;
wvd_vegt 0:3c1d63c20cfc 807 errorDesc = errorString[ errorId ];
wvd_vegt 0:3c1d63c20cfc 808
wvd_vegt 0:3c1d63c20cfc 809 errorLocation.Clear();
wvd_vegt 0:3c1d63c20cfc 810 if ( pError && data )
wvd_vegt 0:3c1d63c20cfc 811 {
wvd_vegt 0:3c1d63c20cfc 812 data->Stamp( pError, encoding );
wvd_vegt 0:3c1d63c20cfc 813 errorLocation = data->Cursor();
wvd_vegt 0:3c1d63c20cfc 814 }
wvd_vegt 0:3c1d63c20cfc 815 }
wvd_vegt 0:3c1d63c20cfc 816
wvd_vegt 0:3c1d63c20cfc 817
wvd_vegt 0:3c1d63c20cfc 818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 819 {
wvd_vegt 0:3c1d63c20cfc 820 TiXmlNode* returnNode = 0;
wvd_vegt 0:3c1d63c20cfc 821
wvd_vegt 0:3c1d63c20cfc 822 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 823 if( !p || !*p || *p != '<' )
wvd_vegt 0:3c1d63c20cfc 824 {
wvd_vegt 0:3c1d63c20cfc 825 return 0;
wvd_vegt 0:3c1d63c20cfc 826 }
wvd_vegt 0:3c1d63c20cfc 827
wvd_vegt 0:3c1d63c20cfc 828 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 829
wvd_vegt 0:3c1d63c20cfc 830 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 831 {
wvd_vegt 0:3c1d63c20cfc 832 return 0;
wvd_vegt 0:3c1d63c20cfc 833 }
wvd_vegt 0:3c1d63c20cfc 834
wvd_vegt 0:3c1d63c20cfc 835 // What is this thing?
wvd_vegt 0:3c1d63c20cfc 836 // - Elements start with a letter or underscore, but xml is reserved.
wvd_vegt 0:3c1d63c20cfc 837 // - Comments: <!--
wvd_vegt 0:3c1d63c20cfc 838 // - Decleration: <?xml
wvd_vegt 0:3c1d63c20cfc 839 // - Everthing else is unknown to tinyxml.
wvd_vegt 0:3c1d63c20cfc 840 //
wvd_vegt 0:3c1d63c20cfc 841
wvd_vegt 0:3c1d63c20cfc 842 const char* xmlHeader = { "<?xml" };
wvd_vegt 0:3c1d63c20cfc 843 const char* commentHeader = { "<!--" };
wvd_vegt 0:3c1d63c20cfc 844 const char* dtdHeader = { "<!" };
wvd_vegt 0:3c1d63c20cfc 845 const char* cdataHeader = { "<![CDATA[" };
wvd_vegt 0:3c1d63c20cfc 846
wvd_vegt 0:3c1d63c20cfc 847 if ( StringEqual( p, xmlHeader, true, encoding ) )
wvd_vegt 0:3c1d63c20cfc 848 {
wvd_vegt 0:3c1d63c20cfc 849 #ifdef DEBUG_PARSER
wvd_vegt 0:3c1d63c20cfc 850 TIXML_LOG( "XML parsing Declaration\n" );
wvd_vegt 0:3c1d63c20cfc 851 #endif
wvd_vegt 0:3c1d63c20cfc 852 returnNode = new TiXmlDeclaration();
wvd_vegt 0:3c1d63c20cfc 853 }
wvd_vegt 0:3c1d63c20cfc 854 else if ( StringEqual( p, commentHeader, false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 855 {
wvd_vegt 0:3c1d63c20cfc 856 #ifdef DEBUG_PARSER
wvd_vegt 0:3c1d63c20cfc 857 TIXML_LOG( "XML parsing Comment\n" );
wvd_vegt 0:3c1d63c20cfc 858 #endif
wvd_vegt 0:3c1d63c20cfc 859 returnNode = new TiXmlComment();
wvd_vegt 0:3c1d63c20cfc 860 }
wvd_vegt 0:3c1d63c20cfc 861 else if ( StringEqual( p, cdataHeader, false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 862 {
wvd_vegt 0:3c1d63c20cfc 863 #ifdef DEBUG_PARSER
wvd_vegt 0:3c1d63c20cfc 864 TIXML_LOG( "XML parsing CDATA\n" );
wvd_vegt 0:3c1d63c20cfc 865 #endif
wvd_vegt 0:3c1d63c20cfc 866 TiXmlText* text = new TiXmlText( "" );
wvd_vegt 0:3c1d63c20cfc 867 text->SetCDATA( true );
wvd_vegt 0:3c1d63c20cfc 868 returnNode = text;
wvd_vegt 0:3c1d63c20cfc 869 }
wvd_vegt 0:3c1d63c20cfc 870 else if ( StringEqual( p, dtdHeader, false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 871 {
wvd_vegt 0:3c1d63c20cfc 872 #ifdef DEBUG_PARSER
wvd_vegt 0:3c1d63c20cfc 873 TIXML_LOG( "XML parsing Unknown(1)\n" );
wvd_vegt 0:3c1d63c20cfc 874 #endif
wvd_vegt 0:3c1d63c20cfc 875 returnNode = new TiXmlUnknown();
wvd_vegt 0:3c1d63c20cfc 876 }
wvd_vegt 0:3c1d63c20cfc 877 else if ( IsAlpha( *(p+1), encoding )
wvd_vegt 0:3c1d63c20cfc 878 || *(p+1) == '_' )
wvd_vegt 0:3c1d63c20cfc 879 {
wvd_vegt 0:3c1d63c20cfc 880 #ifdef DEBUG_PARSER
wvd_vegt 0:3c1d63c20cfc 881 TIXML_LOG( "XML parsing Element\n" );
wvd_vegt 0:3c1d63c20cfc 882 #endif
wvd_vegt 0:3c1d63c20cfc 883 returnNode = new TiXmlElement( "" );
wvd_vegt 0:3c1d63c20cfc 884 }
wvd_vegt 0:3c1d63c20cfc 885 else
wvd_vegt 0:3c1d63c20cfc 886 {
wvd_vegt 0:3c1d63c20cfc 887 #ifdef DEBUG_PARSER
wvd_vegt 0:3c1d63c20cfc 888 TIXML_LOG( "XML parsing Unknown(2)\n" );
wvd_vegt 0:3c1d63c20cfc 889 #endif
wvd_vegt 0:3c1d63c20cfc 890 returnNode = new TiXmlUnknown();
wvd_vegt 0:3c1d63c20cfc 891 }
wvd_vegt 0:3c1d63c20cfc 892
wvd_vegt 0:3c1d63c20cfc 893 if ( returnNode )
wvd_vegt 0:3c1d63c20cfc 894 {
wvd_vegt 0:3c1d63c20cfc 895 // Set the parent, so it can report errors
wvd_vegt 0:3c1d63c20cfc 896 returnNode->parent = this;
wvd_vegt 0:3c1d63c20cfc 897 }
wvd_vegt 0:3c1d63c20cfc 898 return returnNode;
wvd_vegt 0:3c1d63c20cfc 899 }
wvd_vegt 0:3c1d63c20cfc 900
wvd_vegt 0:3c1d63c20cfc 901 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 902
wvd_vegt 0:3c1d63c20cfc 903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
wvd_vegt 0:3c1d63c20cfc 904 {
wvd_vegt 0:3c1d63c20cfc 905 // We're called with some amount of pre-parsing. That is, some of "this"
wvd_vegt 0:3c1d63c20cfc 906 // element is in "tag". Go ahead and stream to the closing ">"
wvd_vegt 0:3c1d63c20cfc 907 while( in->good() )
wvd_vegt 0:3c1d63c20cfc 908 {
wvd_vegt 0:3c1d63c20cfc 909 int c = in->get();
wvd_vegt 0:3c1d63c20cfc 910 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 911 {
wvd_vegt 0:3c1d63c20cfc 912 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 913 if ( document )
wvd_vegt 0:3c1d63c20cfc 914 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 915 return;
wvd_vegt 0:3c1d63c20cfc 916 }
wvd_vegt 0:3c1d63c20cfc 917 (*tag) += (char) c ;
wvd_vegt 0:3c1d63c20cfc 918
wvd_vegt 0:3c1d63c20cfc 919 if ( c == '>' )
wvd_vegt 0:3c1d63c20cfc 920 break;
wvd_vegt 0:3c1d63c20cfc 921 }
wvd_vegt 0:3c1d63c20cfc 922
wvd_vegt 0:3c1d63c20cfc 923 if ( tag->length() < 3 ) return;
wvd_vegt 0:3c1d63c20cfc 924
wvd_vegt 0:3c1d63c20cfc 925 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
wvd_vegt 0:3c1d63c20cfc 926 // If not, identify and stream.
wvd_vegt 0:3c1d63c20cfc 927
wvd_vegt 0:3c1d63c20cfc 928 if ( tag->at( tag->length() - 1 ) == '>'
wvd_vegt 0:3c1d63c20cfc 929 && tag->at( tag->length() - 2 ) == '/' )
wvd_vegt 0:3c1d63c20cfc 930 {
wvd_vegt 0:3c1d63c20cfc 931 // All good!
wvd_vegt 0:3c1d63c20cfc 932 return;
wvd_vegt 0:3c1d63c20cfc 933 }
wvd_vegt 0:3c1d63c20cfc 934 else if ( tag->at( tag->length() - 1 ) == '>' )
wvd_vegt 0:3c1d63c20cfc 935 {
wvd_vegt 0:3c1d63c20cfc 936 // There is more. Could be:
wvd_vegt 0:3c1d63c20cfc 937 // text
wvd_vegt 0:3c1d63c20cfc 938 // cdata text (which looks like another node)
wvd_vegt 0:3c1d63c20cfc 939 // closing tag
wvd_vegt 0:3c1d63c20cfc 940 // another node.
wvd_vegt 0:3c1d63c20cfc 941 for ( ;; )
wvd_vegt 0:3c1d63c20cfc 942 {
wvd_vegt 0:3c1d63c20cfc 943 StreamWhiteSpace( in, tag );
wvd_vegt 0:3c1d63c20cfc 944
wvd_vegt 0:3c1d63c20cfc 945 // Do we have text?
wvd_vegt 0:3c1d63c20cfc 946 if ( in->good() && in->peek() != '<' )
wvd_vegt 0:3c1d63c20cfc 947 {
wvd_vegt 0:3c1d63c20cfc 948 // Yep, text.
wvd_vegt 0:3c1d63c20cfc 949 TiXmlText text( "" );
wvd_vegt 0:3c1d63c20cfc 950 text.StreamIn( in, tag );
wvd_vegt 0:3c1d63c20cfc 951
wvd_vegt 0:3c1d63c20cfc 952 // What follows text is a closing tag or another node.
wvd_vegt 0:3c1d63c20cfc 953 // Go around again and figure it out.
wvd_vegt 0:3c1d63c20cfc 954 continue;
wvd_vegt 0:3c1d63c20cfc 955 }
wvd_vegt 0:3c1d63c20cfc 956
wvd_vegt 0:3c1d63c20cfc 957 // We now have either a closing tag...or another node.
wvd_vegt 0:3c1d63c20cfc 958 // We should be at a "<", regardless.
wvd_vegt 0:3c1d63c20cfc 959 if ( !in->good() ) return;
wvd_vegt 0:3c1d63c20cfc 960 assert( in->peek() == '<' );
wvd_vegt 0:3c1d63c20cfc 961 int tagIndex = (int) tag->length();
wvd_vegt 0:3c1d63c20cfc 962
wvd_vegt 0:3c1d63c20cfc 963 bool closingTag = false;
wvd_vegt 0:3c1d63c20cfc 964 bool firstCharFound = false;
wvd_vegt 0:3c1d63c20cfc 965
wvd_vegt 0:3c1d63c20cfc 966 for( ;; )
wvd_vegt 0:3c1d63c20cfc 967 {
wvd_vegt 0:3c1d63c20cfc 968 if ( !in->good() )
wvd_vegt 0:3c1d63c20cfc 969 return;
wvd_vegt 0:3c1d63c20cfc 970
wvd_vegt 0:3c1d63c20cfc 971 int c = in->peek();
wvd_vegt 0:3c1d63c20cfc 972 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 973 {
wvd_vegt 0:3c1d63c20cfc 974 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 975 if ( document )
wvd_vegt 0:3c1d63c20cfc 976 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 977 return;
wvd_vegt 0:3c1d63c20cfc 978 }
wvd_vegt 0:3c1d63c20cfc 979
wvd_vegt 0:3c1d63c20cfc 980 if ( c == '>' )
wvd_vegt 0:3c1d63c20cfc 981 break;
wvd_vegt 0:3c1d63c20cfc 982
wvd_vegt 0:3c1d63c20cfc 983 *tag += (char) c;
wvd_vegt 0:3c1d63c20cfc 984 in->get();
wvd_vegt 0:3c1d63c20cfc 985
wvd_vegt 0:3c1d63c20cfc 986 // Early out if we find the CDATA id.
wvd_vegt 0:3c1d63c20cfc 987 if ( c == '[' && tag->size() >= 9 )
wvd_vegt 0:3c1d63c20cfc 988 {
wvd_vegt 0:3c1d63c20cfc 989 size_t len = tag->size();
wvd_vegt 0:3c1d63c20cfc 990 const char* start = tag->c_str() + len - 9;
wvd_vegt 0:3c1d63c20cfc 991 if ( strcmp( start, "<![CDATA[" ) == 0 ) {
wvd_vegt 0:3c1d63c20cfc 992 assert( !closingTag );
wvd_vegt 0:3c1d63c20cfc 993 break;
wvd_vegt 0:3c1d63c20cfc 994 }
wvd_vegt 0:3c1d63c20cfc 995 }
wvd_vegt 0:3c1d63c20cfc 996
wvd_vegt 0:3c1d63c20cfc 997 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
wvd_vegt 0:3c1d63c20cfc 998 {
wvd_vegt 0:3c1d63c20cfc 999 firstCharFound = true;
wvd_vegt 0:3c1d63c20cfc 1000 if ( c == '/' )
wvd_vegt 0:3c1d63c20cfc 1001 closingTag = true;
wvd_vegt 0:3c1d63c20cfc 1002 }
wvd_vegt 0:3c1d63c20cfc 1003 }
wvd_vegt 0:3c1d63c20cfc 1004 // If it was a closing tag, then read in the closing '>' to clean up the input stream.
wvd_vegt 0:3c1d63c20cfc 1005 // If it was not, the streaming will be done by the tag.
wvd_vegt 0:3c1d63c20cfc 1006 if ( closingTag )
wvd_vegt 0:3c1d63c20cfc 1007 {
wvd_vegt 0:3c1d63c20cfc 1008 if ( !in->good() )
wvd_vegt 0:3c1d63c20cfc 1009 return;
wvd_vegt 0:3c1d63c20cfc 1010
wvd_vegt 0:3c1d63c20cfc 1011 int c = in->get();
wvd_vegt 0:3c1d63c20cfc 1012 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 1013 {
wvd_vegt 0:3c1d63c20cfc 1014 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1015 if ( document )
wvd_vegt 0:3c1d63c20cfc 1016 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 1017 return;
wvd_vegt 0:3c1d63c20cfc 1018 }
wvd_vegt 0:3c1d63c20cfc 1019 assert( c == '>' );
wvd_vegt 0:3c1d63c20cfc 1020 *tag += (char) c;
wvd_vegt 0:3c1d63c20cfc 1021
wvd_vegt 0:3c1d63c20cfc 1022 // We are done, once we've found our closing tag.
wvd_vegt 0:3c1d63c20cfc 1023 return;
wvd_vegt 0:3c1d63c20cfc 1024 }
wvd_vegt 0:3c1d63c20cfc 1025 else
wvd_vegt 0:3c1d63c20cfc 1026 {
wvd_vegt 0:3c1d63c20cfc 1027 // If not a closing tag, id it, and stream.
wvd_vegt 0:3c1d63c20cfc 1028 const char* tagloc = tag->c_str() + tagIndex;
wvd_vegt 0:3c1d63c20cfc 1029 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
wvd_vegt 0:3c1d63c20cfc 1030 if ( !node )
wvd_vegt 0:3c1d63c20cfc 1031 return;
wvd_vegt 0:3c1d63c20cfc 1032 node->StreamIn( in, tag );
wvd_vegt 0:3c1d63c20cfc 1033 delete node;
wvd_vegt 0:3c1d63c20cfc 1034 node = 0;
wvd_vegt 0:3c1d63c20cfc 1035
wvd_vegt 0:3c1d63c20cfc 1036 // No return: go around from the beginning: text, closing tag, or node.
wvd_vegt 0:3c1d63c20cfc 1037 }
wvd_vegt 0:3c1d63c20cfc 1038 }
wvd_vegt 0:3c1d63c20cfc 1039 }
wvd_vegt 0:3c1d63c20cfc 1040 }
wvd_vegt 0:3c1d63c20cfc 1041 #endif
wvd_vegt 0:3c1d63c20cfc 1042
wvd_vegt 0:3c1d63c20cfc 1043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 1044 {
wvd_vegt 0:3c1d63c20cfc 1045 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1046 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1047
wvd_vegt 0:3c1d63c20cfc 1048 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 1049 {
wvd_vegt 0:3c1d63c20cfc 1050 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
wvd_vegt 0:3c1d63c20cfc 1051 return 0;
wvd_vegt 0:3c1d63c20cfc 1052 }
wvd_vegt 0:3c1d63c20cfc 1053
wvd_vegt 0:3c1d63c20cfc 1054 if ( data )
wvd_vegt 0:3c1d63c20cfc 1055 {
wvd_vegt 0:3c1d63c20cfc 1056 data->Stamp( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1057 location = data->Cursor();
wvd_vegt 0:3c1d63c20cfc 1058 }
wvd_vegt 0:3c1d63c20cfc 1059
wvd_vegt 0:3c1d63c20cfc 1060 if ( *p != '<' )
wvd_vegt 0:3c1d63c20cfc 1061 {
wvd_vegt 0:3c1d63c20cfc 1062 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1063 return 0;
wvd_vegt 0:3c1d63c20cfc 1064 }
wvd_vegt 0:3c1d63c20cfc 1065
wvd_vegt 0:3c1d63c20cfc 1066 p = SkipWhiteSpace( p+1, encoding );
wvd_vegt 0:3c1d63c20cfc 1067
wvd_vegt 0:3c1d63c20cfc 1068 // Read the name.
wvd_vegt 0:3c1d63c20cfc 1069 const char* pErr = p;
wvd_vegt 0:3c1d63c20cfc 1070
wvd_vegt 0:3c1d63c20cfc 1071 p = ReadName( p, &value, encoding );
wvd_vegt 0:3c1d63c20cfc 1072 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 1073 {
wvd_vegt 0:3c1d63c20cfc 1074 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1075 return 0;
wvd_vegt 0:3c1d63c20cfc 1076 }
wvd_vegt 0:3c1d63c20cfc 1077
wvd_vegt 0:3c1d63c20cfc 1078 TIXML_STRING endTag ("</");
wvd_vegt 0:3c1d63c20cfc 1079 endTag += value;
wvd_vegt 0:3c1d63c20cfc 1080
wvd_vegt 0:3c1d63c20cfc 1081 // Check for and read attributes. Also look for an empty
wvd_vegt 0:3c1d63c20cfc 1082 // tag or an end tag.
wvd_vegt 0:3c1d63c20cfc 1083 while ( p && *p )
wvd_vegt 0:3c1d63c20cfc 1084 {
wvd_vegt 0:3c1d63c20cfc 1085 pErr = p;
wvd_vegt 0:3c1d63c20cfc 1086 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1087 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 1088 {
wvd_vegt 0:3c1d63c20cfc 1089 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1090 return 0;
wvd_vegt 0:3c1d63c20cfc 1091 }
wvd_vegt 0:3c1d63c20cfc 1092 if ( *p == '/' )
wvd_vegt 0:3c1d63c20cfc 1093 {
wvd_vegt 0:3c1d63c20cfc 1094 ++p;
wvd_vegt 0:3c1d63c20cfc 1095 // Empty tag.
wvd_vegt 0:3c1d63c20cfc 1096 if ( *p != '>' )
wvd_vegt 0:3c1d63c20cfc 1097 {
wvd_vegt 0:3c1d63c20cfc 1098 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1099 return 0;
wvd_vegt 0:3c1d63c20cfc 1100 }
wvd_vegt 0:3c1d63c20cfc 1101 return (p+1);
wvd_vegt 0:3c1d63c20cfc 1102 }
wvd_vegt 0:3c1d63c20cfc 1103 else if ( *p == '>' )
wvd_vegt 0:3c1d63c20cfc 1104 {
wvd_vegt 0:3c1d63c20cfc 1105 // Done with attributes (if there were any.)
wvd_vegt 0:3c1d63c20cfc 1106 // Read the value -- which can include other
wvd_vegt 0:3c1d63c20cfc 1107 // elements -- read the end tag, and return.
wvd_vegt 0:3c1d63c20cfc 1108 ++p;
wvd_vegt 0:3c1d63c20cfc 1109 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
wvd_vegt 0:3c1d63c20cfc 1110 if ( !p || !*p ) {
wvd_vegt 0:3c1d63c20cfc 1111 // We were looking for the end tag, but found nothing.
wvd_vegt 0:3c1d63c20cfc 1112 // Fix for [ 1663758 ] Failure to report error on bad XML
wvd_vegt 0:3c1d63c20cfc 1113 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1114 return 0;
wvd_vegt 0:3c1d63c20cfc 1115 }
wvd_vegt 0:3c1d63c20cfc 1116
wvd_vegt 0:3c1d63c20cfc 1117 // We should find the end tag now
wvd_vegt 0:3c1d63c20cfc 1118 // note that:
wvd_vegt 0:3c1d63c20cfc 1119 // </foo > and
wvd_vegt 0:3c1d63c20cfc 1120 // </foo>
wvd_vegt 0:3c1d63c20cfc 1121 // are both valid end tags.
wvd_vegt 0:3c1d63c20cfc 1122 if ( StringEqual( p, endTag.c_str(), false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 1123 {
wvd_vegt 0:3c1d63c20cfc 1124 p += endTag.length();
wvd_vegt 0:3c1d63c20cfc 1125 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1126 if ( p && *p && *p == '>' ) {
wvd_vegt 0:3c1d63c20cfc 1127 ++p;
wvd_vegt 0:3c1d63c20cfc 1128 return p;
wvd_vegt 0:3c1d63c20cfc 1129 }
wvd_vegt 0:3c1d63c20cfc 1130 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1131 return 0;
wvd_vegt 0:3c1d63c20cfc 1132 }
wvd_vegt 0:3c1d63c20cfc 1133 else
wvd_vegt 0:3c1d63c20cfc 1134 {
wvd_vegt 0:3c1d63c20cfc 1135 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1136 return 0;
wvd_vegt 0:3c1d63c20cfc 1137 }
wvd_vegt 0:3c1d63c20cfc 1138 }
wvd_vegt 0:3c1d63c20cfc 1139 else
wvd_vegt 0:3c1d63c20cfc 1140 {
wvd_vegt 0:3c1d63c20cfc 1141 // Try to read an attribute:
wvd_vegt 0:3c1d63c20cfc 1142 TiXmlAttribute* attrib = new TiXmlAttribute();
wvd_vegt 0:3c1d63c20cfc 1143 if ( !attrib )
wvd_vegt 0:3c1d63c20cfc 1144 {
wvd_vegt 0:3c1d63c20cfc 1145 return 0;
wvd_vegt 0:3c1d63c20cfc 1146 }
wvd_vegt 0:3c1d63c20cfc 1147
wvd_vegt 0:3c1d63c20cfc 1148 attrib->SetDocument( document );
wvd_vegt 0:3c1d63c20cfc 1149 pErr = p;
wvd_vegt 0:3c1d63c20cfc 1150 p = attrib->Parse( p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1151
wvd_vegt 0:3c1d63c20cfc 1152 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 1153 {
wvd_vegt 0:3c1d63c20cfc 1154 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1155 delete attrib;
wvd_vegt 0:3c1d63c20cfc 1156 return 0;
wvd_vegt 0:3c1d63c20cfc 1157 }
wvd_vegt 0:3c1d63c20cfc 1158
wvd_vegt 0:3c1d63c20cfc 1159 // Handle the strange case of double attributes:
wvd_vegt 0:3c1d63c20cfc 1160 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 1161 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
wvd_vegt 0:3c1d63c20cfc 1162 #else
wvd_vegt 0:3c1d63c20cfc 1163 TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
wvd_vegt 0:3c1d63c20cfc 1164 #endif
wvd_vegt 0:3c1d63c20cfc 1165 if ( node )
wvd_vegt 0:3c1d63c20cfc 1166 {
wvd_vegt 0:3c1d63c20cfc 1167 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1168 delete attrib;
wvd_vegt 0:3c1d63c20cfc 1169 return 0;
wvd_vegt 0:3c1d63c20cfc 1170 }
wvd_vegt 0:3c1d63c20cfc 1171
wvd_vegt 0:3c1d63c20cfc 1172 attributeSet.Add( attrib );
wvd_vegt 0:3c1d63c20cfc 1173 }
wvd_vegt 0:3c1d63c20cfc 1174 }
wvd_vegt 0:3c1d63c20cfc 1175 return p;
wvd_vegt 0:3c1d63c20cfc 1176 }
wvd_vegt 0:3c1d63c20cfc 1177
wvd_vegt 0:3c1d63c20cfc 1178
wvd_vegt 0:3c1d63c20cfc 1179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 1180 {
wvd_vegt 0:3c1d63c20cfc 1181 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1182
wvd_vegt 0:3c1d63c20cfc 1183 // Read in text and elements in any order.
wvd_vegt 0:3c1d63c20cfc 1184 const char* pWithWhiteSpace = p;
wvd_vegt 0:3c1d63c20cfc 1185 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1186
wvd_vegt 0:3c1d63c20cfc 1187 while ( p && *p )
wvd_vegt 0:3c1d63c20cfc 1188 {
wvd_vegt 0:3c1d63c20cfc 1189 if ( *p != '<' )
wvd_vegt 0:3c1d63c20cfc 1190 {
wvd_vegt 0:3c1d63c20cfc 1191 // Take what we have, make a text element.
wvd_vegt 0:3c1d63c20cfc 1192 TiXmlText* textNode = new TiXmlText( "" );
wvd_vegt 0:3c1d63c20cfc 1193
wvd_vegt 0:3c1d63c20cfc 1194 if ( !textNode )
wvd_vegt 0:3c1d63c20cfc 1195 {
wvd_vegt 0:3c1d63c20cfc 1196 return 0;
wvd_vegt 0:3c1d63c20cfc 1197 }
wvd_vegt 0:3c1d63c20cfc 1198
wvd_vegt 0:3c1d63c20cfc 1199 if ( TiXmlBase::IsWhiteSpaceCondensed() )
wvd_vegt 0:3c1d63c20cfc 1200 {
wvd_vegt 0:3c1d63c20cfc 1201 p = textNode->Parse( p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1202 }
wvd_vegt 0:3c1d63c20cfc 1203 else
wvd_vegt 0:3c1d63c20cfc 1204 {
wvd_vegt 0:3c1d63c20cfc 1205 // Special case: we want to keep the white space
wvd_vegt 0:3c1d63c20cfc 1206 // so that leading spaces aren't removed.
wvd_vegt 0:3c1d63c20cfc 1207 p = textNode->Parse( pWithWhiteSpace, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1208 }
wvd_vegt 0:3c1d63c20cfc 1209
wvd_vegt 0:3c1d63c20cfc 1210 if ( !textNode->Blank() )
wvd_vegt 0:3c1d63c20cfc 1211 LinkEndChild( textNode );
wvd_vegt 0:3c1d63c20cfc 1212 else
wvd_vegt 0:3c1d63c20cfc 1213 delete textNode;
wvd_vegt 0:3c1d63c20cfc 1214 }
wvd_vegt 0:3c1d63c20cfc 1215 else
wvd_vegt 0:3c1d63c20cfc 1216 {
wvd_vegt 0:3c1d63c20cfc 1217 // We hit a '<'
wvd_vegt 0:3c1d63c20cfc 1218 // Have we hit a new element or an end tag? This could also be
wvd_vegt 0:3c1d63c20cfc 1219 // a TiXmlText in the "CDATA" style.
wvd_vegt 0:3c1d63c20cfc 1220 if ( StringEqual( p, "</", false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 1221 {
wvd_vegt 0:3c1d63c20cfc 1222 return p;
wvd_vegt 0:3c1d63c20cfc 1223 }
wvd_vegt 0:3c1d63c20cfc 1224 else
wvd_vegt 0:3c1d63c20cfc 1225 {
wvd_vegt 0:3c1d63c20cfc 1226 TiXmlNode* node = Identify( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1227 if ( node )
wvd_vegt 0:3c1d63c20cfc 1228 {
wvd_vegt 0:3c1d63c20cfc 1229 p = node->Parse( p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1230 LinkEndChild( node );
wvd_vegt 0:3c1d63c20cfc 1231 }
wvd_vegt 0:3c1d63c20cfc 1232 else
wvd_vegt 0:3c1d63c20cfc 1233 {
wvd_vegt 0:3c1d63c20cfc 1234 return 0;
wvd_vegt 0:3c1d63c20cfc 1235 }
wvd_vegt 0:3c1d63c20cfc 1236 }
wvd_vegt 0:3c1d63c20cfc 1237 }
wvd_vegt 0:3c1d63c20cfc 1238 pWithWhiteSpace = p;
wvd_vegt 0:3c1d63c20cfc 1239 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1240 }
wvd_vegt 0:3c1d63c20cfc 1241
wvd_vegt 0:3c1d63c20cfc 1242 if ( !p )
wvd_vegt 0:3c1d63c20cfc 1243 {
wvd_vegt 0:3c1d63c20cfc 1244 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
wvd_vegt 0:3c1d63c20cfc 1245 }
wvd_vegt 0:3c1d63c20cfc 1246 return p;
wvd_vegt 0:3c1d63c20cfc 1247 }
wvd_vegt 0:3c1d63c20cfc 1248
wvd_vegt 0:3c1d63c20cfc 1249
wvd_vegt 0:3c1d63c20cfc 1250 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 1251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
wvd_vegt 0:3c1d63c20cfc 1252 {
wvd_vegt 0:3c1d63c20cfc 1253 while ( in->good() )
wvd_vegt 0:3c1d63c20cfc 1254 {
wvd_vegt 0:3c1d63c20cfc 1255 int c = in->get();
wvd_vegt 0:3c1d63c20cfc 1256 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 1257 {
wvd_vegt 0:3c1d63c20cfc 1258 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1259 if ( document )
wvd_vegt 0:3c1d63c20cfc 1260 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 1261 return;
wvd_vegt 0:3c1d63c20cfc 1262 }
wvd_vegt 0:3c1d63c20cfc 1263 (*tag) += (char) c;
wvd_vegt 0:3c1d63c20cfc 1264
wvd_vegt 0:3c1d63c20cfc 1265 if ( c == '>' )
wvd_vegt 0:3c1d63c20cfc 1266 {
wvd_vegt 0:3c1d63c20cfc 1267 // All is well.
wvd_vegt 0:3c1d63c20cfc 1268 return;
wvd_vegt 0:3c1d63c20cfc 1269 }
wvd_vegt 0:3c1d63c20cfc 1270 }
wvd_vegt 0:3c1d63c20cfc 1271 }
wvd_vegt 0:3c1d63c20cfc 1272 #endif
wvd_vegt 0:3c1d63c20cfc 1273
wvd_vegt 0:3c1d63c20cfc 1274
wvd_vegt 0:3c1d63c20cfc 1275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 1276 {
wvd_vegt 0:3c1d63c20cfc 1277 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1278 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1279
wvd_vegt 0:3c1d63c20cfc 1280 if ( data )
wvd_vegt 0:3c1d63c20cfc 1281 {
wvd_vegt 0:3c1d63c20cfc 1282 data->Stamp( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1283 location = data->Cursor();
wvd_vegt 0:3c1d63c20cfc 1284 }
wvd_vegt 0:3c1d63c20cfc 1285 if ( !p || !*p || *p != '<' )
wvd_vegt 0:3c1d63c20cfc 1286 {
wvd_vegt 0:3c1d63c20cfc 1287 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1288 return 0;
wvd_vegt 0:3c1d63c20cfc 1289 }
wvd_vegt 0:3c1d63c20cfc 1290 ++p;
wvd_vegt 0:3c1d63c20cfc 1291 value = "";
wvd_vegt 0:3c1d63c20cfc 1292
wvd_vegt 0:3c1d63c20cfc 1293 while ( p && *p && *p != '>' )
wvd_vegt 0:3c1d63c20cfc 1294 {
wvd_vegt 0:3c1d63c20cfc 1295 value += *p;
wvd_vegt 0:3c1d63c20cfc 1296 ++p;
wvd_vegt 0:3c1d63c20cfc 1297 }
wvd_vegt 0:3c1d63c20cfc 1298
wvd_vegt 0:3c1d63c20cfc 1299 if ( !p )
wvd_vegt 0:3c1d63c20cfc 1300 {
wvd_vegt 0:3c1d63c20cfc 1301 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
wvd_vegt 0:3c1d63c20cfc 1302 }
wvd_vegt 0:3c1d63c20cfc 1303 if ( *p == '>' )
wvd_vegt 0:3c1d63c20cfc 1304 return p+1;
wvd_vegt 0:3c1d63c20cfc 1305 return p;
wvd_vegt 0:3c1d63c20cfc 1306 }
wvd_vegt 0:3c1d63c20cfc 1307
wvd_vegt 0:3c1d63c20cfc 1308 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 1309 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
wvd_vegt 0:3c1d63c20cfc 1310 {
wvd_vegt 0:3c1d63c20cfc 1311 while ( in->good() )
wvd_vegt 0:3c1d63c20cfc 1312 {
wvd_vegt 0:3c1d63c20cfc 1313 int c = in->get();
wvd_vegt 0:3c1d63c20cfc 1314 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 1315 {
wvd_vegt 0:3c1d63c20cfc 1316 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1317 if ( document )
wvd_vegt 0:3c1d63c20cfc 1318 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 1319 return;
wvd_vegt 0:3c1d63c20cfc 1320 }
wvd_vegt 0:3c1d63c20cfc 1321
wvd_vegt 0:3c1d63c20cfc 1322 (*tag) += (char) c;
wvd_vegt 0:3c1d63c20cfc 1323
wvd_vegt 0:3c1d63c20cfc 1324 if ( c == '>'
wvd_vegt 0:3c1d63c20cfc 1325 && tag->at( tag->length() - 2 ) == '-'
wvd_vegt 0:3c1d63c20cfc 1326 && tag->at( tag->length() - 3 ) == '-' )
wvd_vegt 0:3c1d63c20cfc 1327 {
wvd_vegt 0:3c1d63c20cfc 1328 // All is well.
wvd_vegt 0:3c1d63c20cfc 1329 return;
wvd_vegt 0:3c1d63c20cfc 1330 }
wvd_vegt 0:3c1d63c20cfc 1331 }
wvd_vegt 0:3c1d63c20cfc 1332 }
wvd_vegt 0:3c1d63c20cfc 1333 #endif
wvd_vegt 0:3c1d63c20cfc 1334
wvd_vegt 0:3c1d63c20cfc 1335
wvd_vegt 0:3c1d63c20cfc 1336 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 1337 {
wvd_vegt 0:3c1d63c20cfc 1338 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1339 value = "";
wvd_vegt 0:3c1d63c20cfc 1340
wvd_vegt 0:3c1d63c20cfc 1341 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1342
wvd_vegt 0:3c1d63c20cfc 1343 if ( data )
wvd_vegt 0:3c1d63c20cfc 1344 {
wvd_vegt 0:3c1d63c20cfc 1345 data->Stamp( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1346 location = data->Cursor();
wvd_vegt 0:3c1d63c20cfc 1347 }
wvd_vegt 0:3c1d63c20cfc 1348 const char* startTag = "<!--";
wvd_vegt 0:3c1d63c20cfc 1349 const char* endTag = "-->";
wvd_vegt 0:3c1d63c20cfc 1350
wvd_vegt 0:3c1d63c20cfc 1351 if ( !StringEqual( p, startTag, false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 1352 {
wvd_vegt 0:3c1d63c20cfc 1353 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1354 return 0;
wvd_vegt 0:3c1d63c20cfc 1355 }
wvd_vegt 0:3c1d63c20cfc 1356 p += strlen( startTag );
wvd_vegt 0:3c1d63c20cfc 1357
wvd_vegt 0:3c1d63c20cfc 1358 // [ 1475201 ] TinyXML parses entities in comments
wvd_vegt 0:3c1d63c20cfc 1359 // Oops - ReadText doesn't work, because we don't want to parse the entities.
wvd_vegt 0:3c1d63c20cfc 1360 // p = ReadText( p, &value, false, endTag, false, encoding );
wvd_vegt 0:3c1d63c20cfc 1361 //
wvd_vegt 0:3c1d63c20cfc 1362 // from the XML spec:
wvd_vegt 0:3c1d63c20cfc 1363 /*
wvd_vegt 0:3c1d63c20cfc 1364 [Definition: Comments may appear anywhere in a document outside other markup; in addition,
wvd_vegt 0:3c1d63c20cfc 1365 they may appear within the document type declaration at places allowed by the grammar.
wvd_vegt 0:3c1d63c20cfc 1366 They are not part of the document's character data; an XML processor MAY, but need not,
wvd_vegt 0:3c1d63c20cfc 1367 make it possible for an application to retrieve the text of comments. For compatibility,
wvd_vegt 0:3c1d63c20cfc 1368 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
wvd_vegt 0:3c1d63c20cfc 1369 references MUST NOT be recognized within comments.
wvd_vegt 0:3c1d63c20cfc 1370
wvd_vegt 0:3c1d63c20cfc 1371 An example of a comment:
wvd_vegt 0:3c1d63c20cfc 1372
wvd_vegt 0:3c1d63c20cfc 1373 <!-- declarations for <head> & <body> -->
wvd_vegt 0:3c1d63c20cfc 1374 */
wvd_vegt 0:3c1d63c20cfc 1375
wvd_vegt 0:3c1d63c20cfc 1376 value = "";
wvd_vegt 0:3c1d63c20cfc 1377 // Keep all the white space.
wvd_vegt 0:3c1d63c20cfc 1378 while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 1379 {
wvd_vegt 0:3c1d63c20cfc 1380 value.append( p, 1 );
wvd_vegt 0:3c1d63c20cfc 1381 ++p;
wvd_vegt 0:3c1d63c20cfc 1382 }
wvd_vegt 0:3c1d63c20cfc 1383 if ( p && *p )
wvd_vegt 0:3c1d63c20cfc 1384 p += strlen( endTag );
wvd_vegt 0:3c1d63c20cfc 1385
wvd_vegt 0:3c1d63c20cfc 1386 return p;
wvd_vegt 0:3c1d63c20cfc 1387 }
wvd_vegt 0:3c1d63c20cfc 1388
wvd_vegt 0:3c1d63c20cfc 1389
wvd_vegt 0:3c1d63c20cfc 1390 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 1391 {
wvd_vegt 0:3c1d63c20cfc 1392 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1393 if ( !p || !*p ) return 0;
wvd_vegt 0:3c1d63c20cfc 1394
wvd_vegt 0:3c1d63c20cfc 1395 if ( data )
wvd_vegt 0:3c1d63c20cfc 1396 {
wvd_vegt 0:3c1d63c20cfc 1397 data->Stamp( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1398 location = data->Cursor();
wvd_vegt 0:3c1d63c20cfc 1399 }
wvd_vegt 0:3c1d63c20cfc 1400 // Read the name, the '=' and the value.
wvd_vegt 0:3c1d63c20cfc 1401 const char* pErr = p;
wvd_vegt 0:3c1d63c20cfc 1402 p = ReadName( p, &name, encoding );
wvd_vegt 0:3c1d63c20cfc 1403 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 1404 {
wvd_vegt 0:3c1d63c20cfc 1405 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1406 return 0;
wvd_vegt 0:3c1d63c20cfc 1407 }
wvd_vegt 0:3c1d63c20cfc 1408 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1409 if ( !p || !*p || *p != '=' )
wvd_vegt 0:3c1d63c20cfc 1410 {
wvd_vegt 0:3c1d63c20cfc 1411 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1412 return 0;
wvd_vegt 0:3c1d63c20cfc 1413 }
wvd_vegt 0:3c1d63c20cfc 1414
wvd_vegt 0:3c1d63c20cfc 1415 ++p; // skip '='
wvd_vegt 0:3c1d63c20cfc 1416 p = SkipWhiteSpace( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1417 if ( !p || !*p )
wvd_vegt 0:3c1d63c20cfc 1418 {
wvd_vegt 0:3c1d63c20cfc 1419 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1420 return 0;
wvd_vegt 0:3c1d63c20cfc 1421 }
wvd_vegt 0:3c1d63c20cfc 1422
wvd_vegt 0:3c1d63c20cfc 1423 const char* end;
wvd_vegt 0:3c1d63c20cfc 1424 const char SINGLE_QUOTE = '\'';
wvd_vegt 0:3c1d63c20cfc 1425 const char DOUBLE_QUOTE = '\"';
wvd_vegt 0:3c1d63c20cfc 1426
wvd_vegt 0:3c1d63c20cfc 1427 if ( *p == SINGLE_QUOTE )
wvd_vegt 0:3c1d63c20cfc 1428 {
wvd_vegt 0:3c1d63c20cfc 1429 ++p;
wvd_vegt 0:3c1d63c20cfc 1430 end = "\'"; // single quote in string
wvd_vegt 0:3c1d63c20cfc 1431 p = ReadText( p, &value, false, end, false, encoding );
wvd_vegt 0:3c1d63c20cfc 1432 }
wvd_vegt 0:3c1d63c20cfc 1433 else if ( *p == DOUBLE_QUOTE )
wvd_vegt 0:3c1d63c20cfc 1434 {
wvd_vegt 0:3c1d63c20cfc 1435 ++p;
wvd_vegt 0:3c1d63c20cfc 1436 end = "\""; // double quote in string
wvd_vegt 0:3c1d63c20cfc 1437 p = ReadText( p, &value, false, end, false, encoding );
wvd_vegt 0:3c1d63c20cfc 1438 }
wvd_vegt 0:3c1d63c20cfc 1439 else
wvd_vegt 0:3c1d63c20cfc 1440 {
wvd_vegt 0:3c1d63c20cfc 1441 // All attribute values should be in single or double quotes.
wvd_vegt 0:3c1d63c20cfc 1442 // But this is such a common error that the parser will try
wvd_vegt 0:3c1d63c20cfc 1443 // its best, even without them.
wvd_vegt 0:3c1d63c20cfc 1444 value = "";
wvd_vegt 0:3c1d63c20cfc 1445 while ( p && *p // existence
wvd_vegt 0:3c1d63c20cfc 1446 && !IsWhiteSpace( *p ) // whitespace
wvd_vegt 0:3c1d63c20cfc 1447 && *p != '/' && *p != '>' ) // tag end
wvd_vegt 0:3c1d63c20cfc 1448 {
wvd_vegt 0:3c1d63c20cfc 1449 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
wvd_vegt 0:3c1d63c20cfc 1450 // [ 1451649 ] Attribute values with trailing quotes not handled correctly
wvd_vegt 0:3c1d63c20cfc 1451 // We did not have an opening quote but seem to have a
wvd_vegt 0:3c1d63c20cfc 1452 // closing one. Give up and throw an error.
wvd_vegt 0:3c1d63c20cfc 1453 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1454 return 0;
wvd_vegt 0:3c1d63c20cfc 1455 }
wvd_vegt 0:3c1d63c20cfc 1456 value += *p;
wvd_vegt 0:3c1d63c20cfc 1457 ++p;
wvd_vegt 0:3c1d63c20cfc 1458 }
wvd_vegt 0:3c1d63c20cfc 1459 }
wvd_vegt 0:3c1d63c20cfc 1460 return p;
wvd_vegt 0:3c1d63c20cfc 1461 }
wvd_vegt 0:3c1d63c20cfc 1462
wvd_vegt 0:3c1d63c20cfc 1463 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 1464 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
wvd_vegt 0:3c1d63c20cfc 1465 {
wvd_vegt 0:3c1d63c20cfc 1466 while ( in->good() )
wvd_vegt 0:3c1d63c20cfc 1467 {
wvd_vegt 0:3c1d63c20cfc 1468 int c = in->peek();
wvd_vegt 0:3c1d63c20cfc 1469 if ( !cdata && (c == '<' ) )
wvd_vegt 0:3c1d63c20cfc 1470 {
wvd_vegt 0:3c1d63c20cfc 1471 return;
wvd_vegt 0:3c1d63c20cfc 1472 }
wvd_vegt 0:3c1d63c20cfc 1473 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 1474 {
wvd_vegt 0:3c1d63c20cfc 1475 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1476 if ( document )
wvd_vegt 0:3c1d63c20cfc 1477 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 1478 return;
wvd_vegt 0:3c1d63c20cfc 1479 }
wvd_vegt 0:3c1d63c20cfc 1480
wvd_vegt 0:3c1d63c20cfc 1481 (*tag) += (char) c;
wvd_vegt 0:3c1d63c20cfc 1482 in->get(); // "commits" the peek made above
wvd_vegt 0:3c1d63c20cfc 1483
wvd_vegt 0:3c1d63c20cfc 1484 if ( cdata && c == '>' && tag->size() >= 3 ) {
wvd_vegt 0:3c1d63c20cfc 1485 size_t len = tag->size();
wvd_vegt 0:3c1d63c20cfc 1486 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
wvd_vegt 0:3c1d63c20cfc 1487 // terminator of cdata.
wvd_vegt 0:3c1d63c20cfc 1488 return;
wvd_vegt 0:3c1d63c20cfc 1489 }
wvd_vegt 0:3c1d63c20cfc 1490 }
wvd_vegt 0:3c1d63c20cfc 1491 }
wvd_vegt 0:3c1d63c20cfc 1492 }
wvd_vegt 0:3c1d63c20cfc 1493 #endif
wvd_vegt 0:3c1d63c20cfc 1494
wvd_vegt 0:3c1d63c20cfc 1495 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
wvd_vegt 0:3c1d63c20cfc 1496 {
wvd_vegt 0:3c1d63c20cfc 1497 value = "";
wvd_vegt 0:3c1d63c20cfc 1498 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1499
wvd_vegt 0:3c1d63c20cfc 1500 if ( data )
wvd_vegt 0:3c1d63c20cfc 1501 {
wvd_vegt 0:3c1d63c20cfc 1502 data->Stamp( p, encoding );
wvd_vegt 0:3c1d63c20cfc 1503 location = data->Cursor();
wvd_vegt 0:3c1d63c20cfc 1504 }
wvd_vegt 0:3c1d63c20cfc 1505
wvd_vegt 0:3c1d63c20cfc 1506 const char* const startTag = "<![CDATA[";
wvd_vegt 0:3c1d63c20cfc 1507 const char* const endTag = "]]>";
wvd_vegt 0:3c1d63c20cfc 1508
wvd_vegt 0:3c1d63c20cfc 1509 if ( cdata || StringEqual( p, startTag, false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 1510 {
wvd_vegt 0:3c1d63c20cfc 1511 cdata = true;
wvd_vegt 0:3c1d63c20cfc 1512
wvd_vegt 0:3c1d63c20cfc 1513 if ( !StringEqual( p, startTag, false, encoding ) )
wvd_vegt 0:3c1d63c20cfc 1514 {
wvd_vegt 0:3c1d63c20cfc 1515 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
wvd_vegt 0:3c1d63c20cfc 1516 return 0;
wvd_vegt 0:3c1d63c20cfc 1517 }
wvd_vegt 0:3c1d63c20cfc 1518 p += strlen( startTag );
wvd_vegt 0:3c1d63c20cfc 1519
wvd_vegt 0:3c1d63c20cfc 1520 // Keep all the white space, ignore the encoding, etc.
wvd_vegt 0:3c1d63c20cfc 1521 while ( p && *p
wvd_vegt 0:3c1d63c20cfc 1522 && !StringEqual( p, endTag, false, encoding )
wvd_vegt 0:3c1d63c20cfc 1523 )
wvd_vegt 0:3c1d63c20cfc 1524 {
wvd_vegt 0:3c1d63c20cfc 1525 value += *p;
wvd_vegt 0:3c1d63c20cfc 1526 ++p;
wvd_vegt 0:3c1d63c20cfc 1527 }
wvd_vegt 0:3c1d63c20cfc 1528
wvd_vegt 0:3c1d63c20cfc 1529 TIXML_STRING dummy;
wvd_vegt 0:3c1d63c20cfc 1530 p = ReadText( p, &dummy, false, endTag, false, encoding );
wvd_vegt 0:3c1d63c20cfc 1531 return p;
wvd_vegt 0:3c1d63c20cfc 1532 }
wvd_vegt 0:3c1d63c20cfc 1533 else
wvd_vegt 0:3c1d63c20cfc 1534 {
wvd_vegt 0:3c1d63c20cfc 1535 bool ignoreWhite = true;
wvd_vegt 0:3c1d63c20cfc 1536
wvd_vegt 0:3c1d63c20cfc 1537 const char* end = "<";
wvd_vegt 0:3c1d63c20cfc 1538 p = ReadText( p, &value, ignoreWhite, end, false, encoding );
wvd_vegt 0:3c1d63c20cfc 1539 if ( p )
wvd_vegt 0:3c1d63c20cfc 1540 return p-1; // don't truncate the '<'
wvd_vegt 0:3c1d63c20cfc 1541 return 0;
wvd_vegt 0:3c1d63c20cfc 1542 }
wvd_vegt 0:3c1d63c20cfc 1543 }
wvd_vegt 0:3c1d63c20cfc 1544
wvd_vegt 0:3c1d63c20cfc 1545 #ifdef TIXML_USE_STL
wvd_vegt 0:3c1d63c20cfc 1546 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
wvd_vegt 0:3c1d63c20cfc 1547 {
wvd_vegt 0:3c1d63c20cfc 1548 while ( in->good() )
wvd_vegt 0:3c1d63c20cfc 1549 {
wvd_vegt 0:3c1d63c20cfc 1550 int c = in->get();
wvd_vegt 0:3c1d63c20cfc 1551 if ( c <= 0 )
wvd_vegt 0:3c1d63c20cfc 1552 {
wvd_vegt 0:3c1d63c20cfc 1553 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1554 if ( document )
wvd_vegt 0:3c1d63c20cfc 1555 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
wvd_vegt 0:3c1d63c20cfc 1556 return;
wvd_vegt 0:3c1d63c20cfc 1557 }
wvd_vegt 0:3c1d63c20cfc 1558 (*tag) += (char) c;
wvd_vegt 0:3c1d63c20cfc 1559
wvd_vegt 0:3c1d63c20cfc 1560 if ( c == '>' )
wvd_vegt 0:3c1d63c20cfc 1561 {
wvd_vegt 0:3c1d63c20cfc 1562 // All is well.
wvd_vegt 0:3c1d63c20cfc 1563 return;
wvd_vegt 0:3c1d63c20cfc 1564 }
wvd_vegt 0:3c1d63c20cfc 1565 }
wvd_vegt 0:3c1d63c20cfc 1566 }
wvd_vegt 0:3c1d63c20cfc 1567 #endif
wvd_vegt 0:3c1d63c20cfc 1568
wvd_vegt 0:3c1d63c20cfc 1569 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
wvd_vegt 0:3c1d63c20cfc 1570 {
wvd_vegt 0:3c1d63c20cfc 1571 p = SkipWhiteSpace( p, _encoding );
wvd_vegt 0:3c1d63c20cfc 1572 // Find the beginning, find the end, and look for
wvd_vegt 0:3c1d63c20cfc 1573 // the stuff in-between.
wvd_vegt 0:3c1d63c20cfc 1574 TiXmlDocument* document = GetDocument();
wvd_vegt 0:3c1d63c20cfc 1575 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
wvd_vegt 0:3c1d63c20cfc 1576 {
wvd_vegt 0:3c1d63c20cfc 1577 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
wvd_vegt 0:3c1d63c20cfc 1578 return 0;
wvd_vegt 0:3c1d63c20cfc 1579 }
wvd_vegt 0:3c1d63c20cfc 1580 if ( data )
wvd_vegt 0:3c1d63c20cfc 1581 {
wvd_vegt 0:3c1d63c20cfc 1582 data->Stamp( p, _encoding );
wvd_vegt 0:3c1d63c20cfc 1583 location = data->Cursor();
wvd_vegt 0:3c1d63c20cfc 1584 }
wvd_vegt 0:3c1d63c20cfc 1585 p += 5;
wvd_vegt 0:3c1d63c20cfc 1586
wvd_vegt 0:3c1d63c20cfc 1587 version = "";
wvd_vegt 0:3c1d63c20cfc 1588 encoding = "";
wvd_vegt 0:3c1d63c20cfc 1589 standalone = "";
wvd_vegt 0:3c1d63c20cfc 1590
wvd_vegt 0:3c1d63c20cfc 1591 while ( p && *p )
wvd_vegt 0:3c1d63c20cfc 1592 {
wvd_vegt 0:3c1d63c20cfc 1593 if ( *p == '>' )
wvd_vegt 0:3c1d63c20cfc 1594 {
wvd_vegt 0:3c1d63c20cfc 1595 ++p;
wvd_vegt 0:3c1d63c20cfc 1596 return p;
wvd_vegt 0:3c1d63c20cfc 1597 }
wvd_vegt 0:3c1d63c20cfc 1598
wvd_vegt 0:3c1d63c20cfc 1599 p = SkipWhiteSpace( p, _encoding );
wvd_vegt 0:3c1d63c20cfc 1600 if ( StringEqual( p, "version", true, _encoding ) )
wvd_vegt 0:3c1d63c20cfc 1601 {
wvd_vegt 0:3c1d63c20cfc 1602 TiXmlAttribute attrib;
wvd_vegt 0:3c1d63c20cfc 1603 p = attrib.Parse( p, data, _encoding );
wvd_vegt 0:3c1d63c20cfc 1604 version = attrib.Value();
wvd_vegt 0:3c1d63c20cfc 1605 }
wvd_vegt 0:3c1d63c20cfc 1606 else if ( StringEqual( p, "encoding", true, _encoding ) )
wvd_vegt 0:3c1d63c20cfc 1607 {
wvd_vegt 0:3c1d63c20cfc 1608 TiXmlAttribute attrib;
wvd_vegt 0:3c1d63c20cfc 1609 p = attrib.Parse( p, data, _encoding );
wvd_vegt 0:3c1d63c20cfc 1610 encoding = attrib.Value();
wvd_vegt 0:3c1d63c20cfc 1611 }
wvd_vegt 0:3c1d63c20cfc 1612 else if ( StringEqual( p, "standalone", true, _encoding ) )
wvd_vegt 0:3c1d63c20cfc 1613 {
wvd_vegt 0:3c1d63c20cfc 1614 TiXmlAttribute attrib;
wvd_vegt 0:3c1d63c20cfc 1615 p = attrib.Parse( p, data, _encoding );
wvd_vegt 0:3c1d63c20cfc 1616 standalone = attrib.Value();
wvd_vegt 0:3c1d63c20cfc 1617 }
wvd_vegt 0:3c1d63c20cfc 1618 else
wvd_vegt 0:3c1d63c20cfc 1619 {
wvd_vegt 0:3c1d63c20cfc 1620 // Read over whatever it is.
wvd_vegt 0:3c1d63c20cfc 1621 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
wvd_vegt 0:3c1d63c20cfc 1622 ++p;
wvd_vegt 0:3c1d63c20cfc 1623 }
wvd_vegt 0:3c1d63c20cfc 1624 }
wvd_vegt 0:3c1d63c20cfc 1625 return 0;
wvd_vegt 0:3c1d63c20cfc 1626 }
wvd_vegt 0:3c1d63c20cfc 1627
wvd_vegt 0:3c1d63c20cfc 1628 bool TiXmlText::Blank() const
wvd_vegt 0:3c1d63c20cfc 1629 {
wvd_vegt 0:3c1d63c20cfc 1630 for ( unsigned i=0; i<value.length(); i++ )
wvd_vegt 0:3c1d63c20cfc 1631 if ( !IsWhiteSpace( value[i] ) )
wvd_vegt 0:3c1d63c20cfc 1632 return false;
wvd_vegt 0:3c1d63c20cfc 1633 return true;
wvd_vegt 0:3c1d63c20cfc 1634 }