SAX based XML parser

Dependents:   giken9_HTMLServer_Temp_Sample

Committer:
andrewbonney
Date:
Thu May 26 10:03:14 2011 +0000
Revision:
1:e96b2af301dd
Parent:
0:07919e3d6c56
Update to reduce buffer sizes

Who changed what in which revision?

UserRevisionLine numberNew contents of line
andrewbonney 0:07919e3d6c56 1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
andrewbonney 0:07919e3d6c56 2 See the file COPYING for copying permission.
andrewbonney 0:07919e3d6c56 3 */
andrewbonney 0:07919e3d6c56 4
andrewbonney 0:07919e3d6c56 5 /* This file is included! */
andrewbonney 0:07919e3d6c56 6 #pragma diag_suppress 111
andrewbonney 0:07919e3d6c56 7
andrewbonney 0:07919e3d6c56 8 #ifdef XML_TOK_IMPL_C
andrewbonney 0:07919e3d6c56 9
andrewbonney 0:07919e3d6c56 10 #ifndef IS_INVALID_CHAR
andrewbonney 0:07919e3d6c56 11 #define IS_INVALID_CHAR(enc, ptr, n) (0)
andrewbonney 0:07919e3d6c56 12 #endif
andrewbonney 0:07919e3d6c56 13
andrewbonney 0:07919e3d6c56 14 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
andrewbonney 0:07919e3d6c56 15 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 16 if (end - ptr < n) \
andrewbonney 0:07919e3d6c56 17 return XML_TOK_PARTIAL_CHAR; \
andrewbonney 0:07919e3d6c56 18 if (IS_INVALID_CHAR(enc, ptr, n)) { \
andrewbonney 0:07919e3d6c56 19 *(nextTokPtr) = (ptr); \
andrewbonney 0:07919e3d6c56 20 return XML_TOK_INVALID; \
andrewbonney 0:07919e3d6c56 21 } \
andrewbonney 0:07919e3d6c56 22 ptr += n; \
andrewbonney 0:07919e3d6c56 23 break;
andrewbonney 0:07919e3d6c56 24
andrewbonney 0:07919e3d6c56 25 #define INVALID_CASES(ptr, nextTokPtr) \
andrewbonney 0:07919e3d6c56 26 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
andrewbonney 0:07919e3d6c56 27 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
andrewbonney 0:07919e3d6c56 28 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
andrewbonney 0:07919e3d6c56 29 case BT_NONXML: \
andrewbonney 0:07919e3d6c56 30 case BT_MALFORM: \
andrewbonney 0:07919e3d6c56 31 case BT_TRAIL: \
andrewbonney 0:07919e3d6c56 32 *(nextTokPtr) = (ptr); \
andrewbonney 0:07919e3d6c56 33 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 34
andrewbonney 0:07919e3d6c56 35 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 36 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 37 if (end - ptr < n) \
andrewbonney 0:07919e3d6c56 38 return XML_TOK_PARTIAL_CHAR; \
andrewbonney 0:07919e3d6c56 39 if (!IS_NAME_CHAR(enc, ptr, n)) { \
andrewbonney 0:07919e3d6c56 40 *nextTokPtr = ptr; \
andrewbonney 0:07919e3d6c56 41 return XML_TOK_INVALID; \
andrewbonney 0:07919e3d6c56 42 } \
andrewbonney 0:07919e3d6c56 43 ptr += n; \
andrewbonney 0:07919e3d6c56 44 break;
andrewbonney 0:07919e3d6c56 45
andrewbonney 0:07919e3d6c56 46 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 47 case BT_NONASCII: \
andrewbonney 0:07919e3d6c56 48 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
andrewbonney 0:07919e3d6c56 49 *nextTokPtr = ptr; \
andrewbonney 0:07919e3d6c56 50 return XML_TOK_INVALID; \
andrewbonney 0:07919e3d6c56 51 } \
andrewbonney 0:07919e3d6c56 52 case BT_NMSTRT: \
andrewbonney 0:07919e3d6c56 53 case BT_HEX: \
andrewbonney 0:07919e3d6c56 54 case BT_DIGIT: \
andrewbonney 0:07919e3d6c56 55 case BT_NAME: \
andrewbonney 0:07919e3d6c56 56 case BT_MINUS: \
andrewbonney 0:07919e3d6c56 57 ptr += MINBPC(enc); \
andrewbonney 0:07919e3d6c56 58 break; \
andrewbonney 0:07919e3d6c56 59 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 60 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 61 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 62
andrewbonney 0:07919e3d6c56 63 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 64 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 65 if (end - ptr < n) \
andrewbonney 0:07919e3d6c56 66 return XML_TOK_PARTIAL_CHAR; \
andrewbonney 0:07919e3d6c56 67 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
andrewbonney 0:07919e3d6c56 68 *nextTokPtr = ptr; \
andrewbonney 0:07919e3d6c56 69 return XML_TOK_INVALID; \
andrewbonney 0:07919e3d6c56 70 } \
andrewbonney 0:07919e3d6c56 71 ptr += n; \
andrewbonney 0:07919e3d6c56 72 break;
andrewbonney 0:07919e3d6c56 73
andrewbonney 0:07919e3d6c56 74 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 75 case BT_NONASCII: \
andrewbonney 0:07919e3d6c56 76 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
andrewbonney 0:07919e3d6c56 77 *nextTokPtr = ptr; \
andrewbonney 0:07919e3d6c56 78 return XML_TOK_INVALID; \
andrewbonney 0:07919e3d6c56 79 } \
andrewbonney 0:07919e3d6c56 80 case BT_NMSTRT: \
andrewbonney 0:07919e3d6c56 81 case BT_HEX: \
andrewbonney 0:07919e3d6c56 82 ptr += MINBPC(enc); \
andrewbonney 0:07919e3d6c56 83 break; \
andrewbonney 0:07919e3d6c56 84 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 85 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
andrewbonney 0:07919e3d6c56 86 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 87
andrewbonney 0:07919e3d6c56 88 #ifndef PREFIX
andrewbonney 0:07919e3d6c56 89 #define PREFIX(ident) ident
andrewbonney 0:07919e3d6c56 90 #endif
andrewbonney 0:07919e3d6c56 91
andrewbonney 0:07919e3d6c56 92 /* ptr points to character following "<!-" */
andrewbonney 0:07919e3d6c56 93
andrewbonney 0:07919e3d6c56 94 static int PTRCALL
andrewbonney 0:07919e3d6c56 95 PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 96 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 97 {
andrewbonney 0:07919e3d6c56 98 if (ptr != end) {
andrewbonney 0:07919e3d6c56 99 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
andrewbonney 0:07919e3d6c56 100 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 101 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 102 }
andrewbonney 0:07919e3d6c56 103 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 104 while (ptr != end) {
andrewbonney 0:07919e3d6c56 105 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 106 INVALID_CASES(ptr, nextTokPtr)
andrewbonney 0:07919e3d6c56 107 case BT_MINUS:
andrewbonney 0:07919e3d6c56 108 if ((ptr += MINBPC(enc)) == end)
andrewbonney 0:07919e3d6c56 109 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 110 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
andrewbonney 0:07919e3d6c56 111 if ((ptr += MINBPC(enc)) == end)
andrewbonney 0:07919e3d6c56 112 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 113 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 114 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 115 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 116 }
andrewbonney 0:07919e3d6c56 117 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 118 return XML_TOK_COMMENT;
andrewbonney 0:07919e3d6c56 119 }
andrewbonney 0:07919e3d6c56 120 break;
andrewbonney 0:07919e3d6c56 121 default:
andrewbonney 0:07919e3d6c56 122 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 123 break;
andrewbonney 0:07919e3d6c56 124 }
andrewbonney 0:07919e3d6c56 125 }
andrewbonney 0:07919e3d6c56 126 }
andrewbonney 0:07919e3d6c56 127 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 128 }
andrewbonney 0:07919e3d6c56 129
andrewbonney 0:07919e3d6c56 130 /* ptr points to character following "<!" */
andrewbonney 0:07919e3d6c56 131
andrewbonney 0:07919e3d6c56 132 static int PTRCALL
andrewbonney 0:07919e3d6c56 133 PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 134 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 135 {
andrewbonney 0:07919e3d6c56 136 if (ptr == end)
andrewbonney 0:07919e3d6c56 137 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 138 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 139 case BT_MINUS:
andrewbonney 0:07919e3d6c56 140 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 141 case BT_LSQB:
andrewbonney 0:07919e3d6c56 142 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 143 return XML_TOK_COND_SECT_OPEN;
andrewbonney 0:07919e3d6c56 144 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 145 case BT_HEX:
andrewbonney 0:07919e3d6c56 146 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 147 break;
andrewbonney 0:07919e3d6c56 148 default:
andrewbonney 0:07919e3d6c56 149 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 150 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 151 }
andrewbonney 0:07919e3d6c56 152 while (ptr != end) {
andrewbonney 0:07919e3d6c56 153 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 154 case BT_PERCNT:
andrewbonney 0:07919e3d6c56 155 if (ptr + MINBPC(enc) == end)
andrewbonney 0:07919e3d6c56 156 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 157 /* don't allow <!ENTITY% foo "whatever"> */
andrewbonney 0:07919e3d6c56 158 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
andrewbonney 0:07919e3d6c56 159 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
andrewbonney 0:07919e3d6c56 160 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 161 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 162 }
andrewbonney 0:07919e3d6c56 163 /* fall through */
andrewbonney 0:07919e3d6c56 164 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 165 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 166 return XML_TOK_DECL_OPEN;
andrewbonney 0:07919e3d6c56 167 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 168 case BT_HEX:
andrewbonney 0:07919e3d6c56 169 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 170 break;
andrewbonney 0:07919e3d6c56 171 default:
andrewbonney 0:07919e3d6c56 172 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 173 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 174 }
andrewbonney 0:07919e3d6c56 175 }
andrewbonney 0:07919e3d6c56 176 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 177 }
andrewbonney 0:07919e3d6c56 178
andrewbonney 0:07919e3d6c56 179 static int PTRCALL
andrewbonney 0:07919e3d6c56 180 PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 181 const char *end, int *tokPtr)
andrewbonney 0:07919e3d6c56 182 {
andrewbonney 0:07919e3d6c56 183 int upper = 0;
andrewbonney 0:07919e3d6c56 184 *tokPtr = XML_TOK_PI;
andrewbonney 0:07919e3d6c56 185 if (end - ptr != MINBPC(enc)*3)
andrewbonney 0:07919e3d6c56 186 return 1;
andrewbonney 0:07919e3d6c56 187 switch (BYTE_TO_ASCII(enc, ptr)) {
andrewbonney 0:07919e3d6c56 188 case ASCII_x:
andrewbonney 0:07919e3d6c56 189 break;
andrewbonney 0:07919e3d6c56 190 case ASCII_X:
andrewbonney 0:07919e3d6c56 191 upper = 1;
andrewbonney 0:07919e3d6c56 192 break;
andrewbonney 0:07919e3d6c56 193 default:
andrewbonney 0:07919e3d6c56 194 return 1;
andrewbonney 0:07919e3d6c56 195 }
andrewbonney 0:07919e3d6c56 196 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 197 switch (BYTE_TO_ASCII(enc, ptr)) {
andrewbonney 0:07919e3d6c56 198 case ASCII_m:
andrewbonney 0:07919e3d6c56 199 break;
andrewbonney 0:07919e3d6c56 200 case ASCII_M:
andrewbonney 0:07919e3d6c56 201 upper = 1;
andrewbonney 0:07919e3d6c56 202 break;
andrewbonney 0:07919e3d6c56 203 default:
andrewbonney 0:07919e3d6c56 204 return 1;
andrewbonney 0:07919e3d6c56 205 }
andrewbonney 0:07919e3d6c56 206 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 207 switch (BYTE_TO_ASCII(enc, ptr)) {
andrewbonney 0:07919e3d6c56 208 case ASCII_l:
andrewbonney 0:07919e3d6c56 209 break;
andrewbonney 0:07919e3d6c56 210 case ASCII_L:
andrewbonney 0:07919e3d6c56 211 upper = 1;
andrewbonney 0:07919e3d6c56 212 break;
andrewbonney 0:07919e3d6c56 213 default:
andrewbonney 0:07919e3d6c56 214 return 1;
andrewbonney 0:07919e3d6c56 215 }
andrewbonney 0:07919e3d6c56 216 if (upper)
andrewbonney 0:07919e3d6c56 217 return 0;
andrewbonney 0:07919e3d6c56 218 *tokPtr = XML_TOK_XML_DECL;
andrewbonney 0:07919e3d6c56 219 return 1;
andrewbonney 0:07919e3d6c56 220 }
andrewbonney 0:07919e3d6c56 221
andrewbonney 0:07919e3d6c56 222 /* ptr points to character following "<?" */
andrewbonney 0:07919e3d6c56 223
andrewbonney 0:07919e3d6c56 224 static int PTRCALL
andrewbonney 0:07919e3d6c56 225 PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 226 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 227 {
andrewbonney 0:07919e3d6c56 228 int tok;
andrewbonney 0:07919e3d6c56 229 const char *target = ptr;
andrewbonney 0:07919e3d6c56 230 if (ptr == end)
andrewbonney 0:07919e3d6c56 231 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 232 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 233 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 234 default:
andrewbonney 0:07919e3d6c56 235 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 236 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 237 }
andrewbonney 0:07919e3d6c56 238 while (ptr != end) {
andrewbonney 0:07919e3d6c56 239 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 240 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 241 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 242 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
andrewbonney 0:07919e3d6c56 243 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 244 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 245 }
andrewbonney 0:07919e3d6c56 246 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 247 while (ptr != end) {
andrewbonney 0:07919e3d6c56 248 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 249 INVALID_CASES(ptr, nextTokPtr)
andrewbonney 0:07919e3d6c56 250 case BT_QUEST:
andrewbonney 0:07919e3d6c56 251 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 252 if (ptr == end)
andrewbonney 0:07919e3d6c56 253 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 254 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 255 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 256 return tok;
andrewbonney 0:07919e3d6c56 257 }
andrewbonney 0:07919e3d6c56 258 break;
andrewbonney 0:07919e3d6c56 259 default:
andrewbonney 0:07919e3d6c56 260 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 261 break;
andrewbonney 0:07919e3d6c56 262 }
andrewbonney 0:07919e3d6c56 263 }
andrewbonney 0:07919e3d6c56 264 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 265 case BT_QUEST:
andrewbonney 0:07919e3d6c56 266 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
andrewbonney 0:07919e3d6c56 267 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 268 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 269 }
andrewbonney 0:07919e3d6c56 270 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 271 if (ptr == end)
andrewbonney 0:07919e3d6c56 272 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 273 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 274 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 275 return tok;
andrewbonney 0:07919e3d6c56 276 }
andrewbonney 0:07919e3d6c56 277 /* fall through */
andrewbonney 0:07919e3d6c56 278 default:
andrewbonney 0:07919e3d6c56 279 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 280 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 281 }
andrewbonney 0:07919e3d6c56 282 }
andrewbonney 0:07919e3d6c56 283 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 284 }
andrewbonney 0:07919e3d6c56 285
andrewbonney 0:07919e3d6c56 286 static int PTRCALL
andrewbonney 0:07919e3d6c56 287 PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 288 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 289 {
andrewbonney 0:07919e3d6c56 290 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
andrewbonney 0:07919e3d6c56 291 ASCII_T, ASCII_A, ASCII_LSQB };
andrewbonney 0:07919e3d6c56 292 int i;
andrewbonney 0:07919e3d6c56 293 /* CDATA[ */
andrewbonney 0:07919e3d6c56 294 if (end - ptr < 6 * MINBPC(enc))
andrewbonney 0:07919e3d6c56 295 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 296 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 297 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
andrewbonney 0:07919e3d6c56 298 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 299 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 300 }
andrewbonney 0:07919e3d6c56 301 }
andrewbonney 0:07919e3d6c56 302 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 303 return XML_TOK_CDATA_SECT_OPEN;
andrewbonney 0:07919e3d6c56 304 }
andrewbonney 0:07919e3d6c56 305
andrewbonney 0:07919e3d6c56 306 static int PTRCALL
andrewbonney 0:07919e3d6c56 307 PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 308 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 309 {
andrewbonney 0:07919e3d6c56 310 if (ptr == end)
andrewbonney 0:07919e3d6c56 311 return XML_TOK_NONE;
andrewbonney 0:07919e3d6c56 312 if (MINBPC(enc) > 1) {
andrewbonney 0:07919e3d6c56 313 size_t n = end - ptr;
andrewbonney 0:07919e3d6c56 314 if (n & (MINBPC(enc) - 1)) {
andrewbonney 0:07919e3d6c56 315 n &= ~(MINBPC(enc) - 1);
andrewbonney 0:07919e3d6c56 316 if (n == 0)
andrewbonney 0:07919e3d6c56 317 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 318 end = ptr + n;
andrewbonney 0:07919e3d6c56 319 }
andrewbonney 0:07919e3d6c56 320 }
andrewbonney 0:07919e3d6c56 321 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 322 case BT_RSQB:
andrewbonney 0:07919e3d6c56 323 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 324 if (ptr == end)
andrewbonney 0:07919e3d6c56 325 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 326 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
andrewbonney 0:07919e3d6c56 327 break;
andrewbonney 0:07919e3d6c56 328 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 329 if (ptr == end)
andrewbonney 0:07919e3d6c56 330 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 331 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 332 ptr -= MINBPC(enc);
andrewbonney 0:07919e3d6c56 333 break;
andrewbonney 0:07919e3d6c56 334 }
andrewbonney 0:07919e3d6c56 335 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 336 return XML_TOK_CDATA_SECT_CLOSE;
andrewbonney 0:07919e3d6c56 337 case BT_CR:
andrewbonney 0:07919e3d6c56 338 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 339 if (ptr == end)
andrewbonney 0:07919e3d6c56 340 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 341 if (BYTE_TYPE(enc, ptr) == BT_LF)
andrewbonney 0:07919e3d6c56 342 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 343 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 344 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 345 case BT_LF:
andrewbonney 0:07919e3d6c56 346 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 347 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 348 INVALID_CASES(ptr, nextTokPtr)
andrewbonney 0:07919e3d6c56 349 default:
andrewbonney 0:07919e3d6c56 350 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 351 break;
andrewbonney 0:07919e3d6c56 352 }
andrewbonney 0:07919e3d6c56 353 while (ptr != end) {
andrewbonney 0:07919e3d6c56 354 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 355 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 356 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 357 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
andrewbonney 0:07919e3d6c56 358 *nextTokPtr = ptr; \
andrewbonney 0:07919e3d6c56 359 return XML_TOK_DATA_CHARS; \
andrewbonney 0:07919e3d6c56 360 } \
andrewbonney 0:07919e3d6c56 361 ptr += n; \
andrewbonney 0:07919e3d6c56 362 break;
andrewbonney 0:07919e3d6c56 363 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 364 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 365 case BT_NONXML:
andrewbonney 0:07919e3d6c56 366 case BT_MALFORM:
andrewbonney 0:07919e3d6c56 367 case BT_TRAIL:
andrewbonney 0:07919e3d6c56 368 case BT_CR:
andrewbonney 0:07919e3d6c56 369 case BT_LF:
andrewbonney 0:07919e3d6c56 370 case BT_RSQB:
andrewbonney 0:07919e3d6c56 371 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 372 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 373 default:
andrewbonney 0:07919e3d6c56 374 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 375 break;
andrewbonney 0:07919e3d6c56 376 }
andrewbonney 0:07919e3d6c56 377 }
andrewbonney 0:07919e3d6c56 378 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 379 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 380 }
andrewbonney 0:07919e3d6c56 381
andrewbonney 0:07919e3d6c56 382 /* ptr points to character following "</" */
andrewbonney 0:07919e3d6c56 383
andrewbonney 0:07919e3d6c56 384 static int PTRCALL
andrewbonney 0:07919e3d6c56 385 PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 386 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 387 {
andrewbonney 0:07919e3d6c56 388 if (ptr == end)
andrewbonney 0:07919e3d6c56 389 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 390 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 391 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 392 default:
andrewbonney 0:07919e3d6c56 393 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 394 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 395 }
andrewbonney 0:07919e3d6c56 396 while (ptr != end) {
andrewbonney 0:07919e3d6c56 397 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 398 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 399 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 400 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 401 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 402 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 403 break;
andrewbonney 0:07919e3d6c56 404 case BT_GT:
andrewbonney 0:07919e3d6c56 405 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 406 return XML_TOK_END_TAG;
andrewbonney 0:07919e3d6c56 407 default:
andrewbonney 0:07919e3d6c56 408 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 409 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 410 }
andrewbonney 0:07919e3d6c56 411 }
andrewbonney 0:07919e3d6c56 412 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 413 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 414 case BT_COLON:
andrewbonney 0:07919e3d6c56 415 /* no need to check qname syntax here,
andrewbonney 0:07919e3d6c56 416 since end-tag must match exactly */
andrewbonney 0:07919e3d6c56 417 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 418 break;
andrewbonney 0:07919e3d6c56 419 #endif
andrewbonney 0:07919e3d6c56 420 case BT_GT:
andrewbonney 0:07919e3d6c56 421 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 422 return XML_TOK_END_TAG;
andrewbonney 0:07919e3d6c56 423 default:
andrewbonney 0:07919e3d6c56 424 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 425 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 426 }
andrewbonney 0:07919e3d6c56 427 }
andrewbonney 0:07919e3d6c56 428 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 429 }
andrewbonney 0:07919e3d6c56 430
andrewbonney 0:07919e3d6c56 431 /* ptr points to character following "&#X" */
andrewbonney 0:07919e3d6c56 432
andrewbonney 0:07919e3d6c56 433 static int PTRCALL
andrewbonney 0:07919e3d6c56 434 PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 435 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 436 {
andrewbonney 0:07919e3d6c56 437 if (ptr != end) {
andrewbonney 0:07919e3d6c56 438 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 439 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 440 case BT_HEX:
andrewbonney 0:07919e3d6c56 441 break;
andrewbonney 0:07919e3d6c56 442 default:
andrewbonney 0:07919e3d6c56 443 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 444 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 445 }
andrewbonney 0:07919e3d6c56 446 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 447 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 448 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 449 case BT_HEX:
andrewbonney 0:07919e3d6c56 450 break;
andrewbonney 0:07919e3d6c56 451 case BT_SEMI:
andrewbonney 0:07919e3d6c56 452 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 453 return XML_TOK_CHAR_REF;
andrewbonney 0:07919e3d6c56 454 default:
andrewbonney 0:07919e3d6c56 455 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 456 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 457 }
andrewbonney 0:07919e3d6c56 458 }
andrewbonney 0:07919e3d6c56 459 }
andrewbonney 0:07919e3d6c56 460 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 461 }
andrewbonney 0:07919e3d6c56 462
andrewbonney 0:07919e3d6c56 463 /* ptr points to character following "&#" */
andrewbonney 0:07919e3d6c56 464
andrewbonney 0:07919e3d6c56 465 static int PTRCALL
andrewbonney 0:07919e3d6c56 466 PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 467 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 468 {
andrewbonney 0:07919e3d6c56 469 if (ptr != end) {
andrewbonney 0:07919e3d6c56 470 if (CHAR_MATCHES(enc, ptr, ASCII_x))
andrewbonney 0:07919e3d6c56 471 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 472 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 473 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 474 break;
andrewbonney 0:07919e3d6c56 475 default:
andrewbonney 0:07919e3d6c56 476 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 477 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 478 }
andrewbonney 0:07919e3d6c56 479 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 480 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 481 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 482 break;
andrewbonney 0:07919e3d6c56 483 case BT_SEMI:
andrewbonney 0:07919e3d6c56 484 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 485 return XML_TOK_CHAR_REF;
andrewbonney 0:07919e3d6c56 486 default:
andrewbonney 0:07919e3d6c56 487 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 488 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 489 }
andrewbonney 0:07919e3d6c56 490 }
andrewbonney 0:07919e3d6c56 491 }
andrewbonney 0:07919e3d6c56 492 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 493 }
andrewbonney 0:07919e3d6c56 494
andrewbonney 0:07919e3d6c56 495 /* ptr points to character following "&" */
andrewbonney 0:07919e3d6c56 496
andrewbonney 0:07919e3d6c56 497 static int PTRCALL
andrewbonney 0:07919e3d6c56 498 PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 499 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 500 {
andrewbonney 0:07919e3d6c56 501 if (ptr == end)
andrewbonney 0:07919e3d6c56 502 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 503 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 504 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 505 case BT_NUM:
andrewbonney 0:07919e3d6c56 506 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 507 default:
andrewbonney 0:07919e3d6c56 508 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 509 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 510 }
andrewbonney 0:07919e3d6c56 511 while (ptr != end) {
andrewbonney 0:07919e3d6c56 512 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 513 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 514 case BT_SEMI:
andrewbonney 0:07919e3d6c56 515 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 516 return XML_TOK_ENTITY_REF;
andrewbonney 0:07919e3d6c56 517 default:
andrewbonney 0:07919e3d6c56 518 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 519 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 520 }
andrewbonney 0:07919e3d6c56 521 }
andrewbonney 0:07919e3d6c56 522 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 523 }
andrewbonney 0:07919e3d6c56 524
andrewbonney 0:07919e3d6c56 525 /* ptr points to character following first character of attribute name */
andrewbonney 0:07919e3d6c56 526
andrewbonney 0:07919e3d6c56 527 static int PTRCALL
andrewbonney 0:07919e3d6c56 528 PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 529 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 530 {
andrewbonney 0:07919e3d6c56 531 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 532 int hadColon = 0;
andrewbonney 0:07919e3d6c56 533 #endif
andrewbonney 0:07919e3d6c56 534 while (ptr != end) {
andrewbonney 0:07919e3d6c56 535 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 536 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 537 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 538 case BT_COLON:
andrewbonney 0:07919e3d6c56 539 if (hadColon) {
andrewbonney 0:07919e3d6c56 540 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 541 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 542 }
andrewbonney 0:07919e3d6c56 543 hadColon = 1;
andrewbonney 0:07919e3d6c56 544 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 545 if (ptr == end)
andrewbonney 0:07919e3d6c56 546 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 547 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 548 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 549 default:
andrewbonney 0:07919e3d6c56 550 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 551 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 552 }
andrewbonney 0:07919e3d6c56 553 break;
andrewbonney 0:07919e3d6c56 554 #endif
andrewbonney 0:07919e3d6c56 555 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 556 for (;;) {
andrewbonney 0:07919e3d6c56 557 int t;
andrewbonney 0:07919e3d6c56 558
andrewbonney 0:07919e3d6c56 559 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 560 if (ptr == end)
andrewbonney 0:07919e3d6c56 561 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 562 t = BYTE_TYPE(enc, ptr);
andrewbonney 0:07919e3d6c56 563 if (t == BT_EQUALS)
andrewbonney 0:07919e3d6c56 564 break;
andrewbonney 0:07919e3d6c56 565 switch (t) {
andrewbonney 0:07919e3d6c56 566 case BT_S:
andrewbonney 0:07919e3d6c56 567 case BT_LF:
andrewbonney 0:07919e3d6c56 568 case BT_CR:
andrewbonney 0:07919e3d6c56 569 break;
andrewbonney 0:07919e3d6c56 570 default:
andrewbonney 0:07919e3d6c56 571 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 572 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 573 }
andrewbonney 0:07919e3d6c56 574 }
andrewbonney 0:07919e3d6c56 575 /* fall through */
andrewbonney 0:07919e3d6c56 576 case BT_EQUALS:
andrewbonney 0:07919e3d6c56 577 {
andrewbonney 0:07919e3d6c56 578 int open;
andrewbonney 0:07919e3d6c56 579 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 580 hadColon = 0;
andrewbonney 0:07919e3d6c56 581 #endif
andrewbonney 0:07919e3d6c56 582 for (;;) {
andrewbonney 0:07919e3d6c56 583 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 584 if (ptr == end)
andrewbonney 0:07919e3d6c56 585 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 586 open = BYTE_TYPE(enc, ptr);
andrewbonney 0:07919e3d6c56 587 if (open == BT_QUOT || open == BT_APOS)
andrewbonney 0:07919e3d6c56 588 break;
andrewbonney 0:07919e3d6c56 589 switch (open) {
andrewbonney 0:07919e3d6c56 590 case BT_S:
andrewbonney 0:07919e3d6c56 591 case BT_LF:
andrewbonney 0:07919e3d6c56 592 case BT_CR:
andrewbonney 0:07919e3d6c56 593 break;
andrewbonney 0:07919e3d6c56 594 default:
andrewbonney 0:07919e3d6c56 595 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 596 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 597 }
andrewbonney 0:07919e3d6c56 598 }
andrewbonney 0:07919e3d6c56 599 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 600 /* in attribute value */
andrewbonney 0:07919e3d6c56 601 for (;;) {
andrewbonney 0:07919e3d6c56 602 int t;
andrewbonney 0:07919e3d6c56 603 if (ptr == end)
andrewbonney 0:07919e3d6c56 604 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 605 t = BYTE_TYPE(enc, ptr);
andrewbonney 0:07919e3d6c56 606 if (t == open)
andrewbonney 0:07919e3d6c56 607 break;
andrewbonney 0:07919e3d6c56 608 switch (t) {
andrewbonney 0:07919e3d6c56 609 INVALID_CASES(ptr, nextTokPtr)
andrewbonney 0:07919e3d6c56 610 case BT_AMP:
andrewbonney 0:07919e3d6c56 611 {
andrewbonney 0:07919e3d6c56 612 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
andrewbonney 0:07919e3d6c56 613 if (tok <= 0) {
andrewbonney 0:07919e3d6c56 614 if (tok == XML_TOK_INVALID)
andrewbonney 0:07919e3d6c56 615 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 616 return tok;
andrewbonney 0:07919e3d6c56 617 }
andrewbonney 0:07919e3d6c56 618 break;
andrewbonney 0:07919e3d6c56 619 }
andrewbonney 0:07919e3d6c56 620 case BT_LT:
andrewbonney 0:07919e3d6c56 621 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 622 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 623 default:
andrewbonney 0:07919e3d6c56 624 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 625 break;
andrewbonney 0:07919e3d6c56 626 }
andrewbonney 0:07919e3d6c56 627 }
andrewbonney 0:07919e3d6c56 628 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 629 if (ptr == end)
andrewbonney 0:07919e3d6c56 630 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 631 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 632 case BT_S:
andrewbonney 0:07919e3d6c56 633 case BT_CR:
andrewbonney 0:07919e3d6c56 634 case BT_LF:
andrewbonney 0:07919e3d6c56 635 break;
andrewbonney 0:07919e3d6c56 636 case BT_SOL:
andrewbonney 0:07919e3d6c56 637 goto sol;
andrewbonney 0:07919e3d6c56 638 case BT_GT:
andrewbonney 0:07919e3d6c56 639 goto gt;
andrewbonney 0:07919e3d6c56 640 default:
andrewbonney 0:07919e3d6c56 641 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 642 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 643 }
andrewbonney 0:07919e3d6c56 644 /* ptr points to closing quote */
andrewbonney 0:07919e3d6c56 645 for (;;) {
andrewbonney 0:07919e3d6c56 646 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 647 if (ptr == end)
andrewbonney 0:07919e3d6c56 648 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 649 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 650 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 651 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 652 continue;
andrewbonney 0:07919e3d6c56 653 case BT_GT:
andrewbonney 0:07919e3d6c56 654 gt:
andrewbonney 0:07919e3d6c56 655 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 656 return XML_TOK_START_TAG_WITH_ATTS;
andrewbonney 0:07919e3d6c56 657 case BT_SOL:
andrewbonney 0:07919e3d6c56 658 sol:
andrewbonney 0:07919e3d6c56 659 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 660 if (ptr == end)
andrewbonney 0:07919e3d6c56 661 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 662 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 663 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 664 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 665 }
andrewbonney 0:07919e3d6c56 666 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 667 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
andrewbonney 0:07919e3d6c56 668 default:
andrewbonney 0:07919e3d6c56 669 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 670 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 671 }
andrewbonney 0:07919e3d6c56 672 break;
andrewbonney 0:07919e3d6c56 673 }
andrewbonney 0:07919e3d6c56 674 break;
andrewbonney 0:07919e3d6c56 675 }
andrewbonney 0:07919e3d6c56 676 default:
andrewbonney 0:07919e3d6c56 677 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 678 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 679 }
andrewbonney 0:07919e3d6c56 680 }
andrewbonney 0:07919e3d6c56 681 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 682 }
andrewbonney 0:07919e3d6c56 683
andrewbonney 0:07919e3d6c56 684 /* ptr points to character following "<" */
andrewbonney 0:07919e3d6c56 685
andrewbonney 0:07919e3d6c56 686 static int PTRCALL
andrewbonney 0:07919e3d6c56 687 PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 688 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 689 {
andrewbonney 0:07919e3d6c56 690 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 691 int hadColon;
andrewbonney 0:07919e3d6c56 692 #endif
andrewbonney 0:07919e3d6c56 693 if (ptr == end)
andrewbonney 0:07919e3d6c56 694 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 695 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 696 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 697 case BT_EXCL:
andrewbonney 0:07919e3d6c56 698 if ((ptr += MINBPC(enc)) == end)
andrewbonney 0:07919e3d6c56 699 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 700 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 701 case BT_MINUS:
andrewbonney 0:07919e3d6c56 702 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 703 case BT_LSQB:
andrewbonney 0:07919e3d6c56 704 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
andrewbonney 0:07919e3d6c56 705 end, nextTokPtr);
andrewbonney 0:07919e3d6c56 706 }
andrewbonney 0:07919e3d6c56 707 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 708 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 709 case BT_QUEST:
andrewbonney 0:07919e3d6c56 710 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 711 case BT_SOL:
andrewbonney 0:07919e3d6c56 712 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 713 default:
andrewbonney 0:07919e3d6c56 714 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 715 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 716 }
andrewbonney 0:07919e3d6c56 717 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 718 hadColon = 0;
andrewbonney 0:07919e3d6c56 719 #endif
andrewbonney 0:07919e3d6c56 720 /* we have a start-tag */
andrewbonney 0:07919e3d6c56 721 while (ptr != end) {
andrewbonney 0:07919e3d6c56 722 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 723 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 724 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 725 case BT_COLON:
andrewbonney 0:07919e3d6c56 726 if (hadColon) {
andrewbonney 0:07919e3d6c56 727 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 728 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 729 }
andrewbonney 0:07919e3d6c56 730 hadColon = 1;
andrewbonney 0:07919e3d6c56 731 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 732 if (ptr == end)
andrewbonney 0:07919e3d6c56 733 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 734 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 735 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 736 default:
andrewbonney 0:07919e3d6c56 737 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 738 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 739 }
andrewbonney 0:07919e3d6c56 740 break;
andrewbonney 0:07919e3d6c56 741 #endif
andrewbonney 0:07919e3d6c56 742 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 743 {
andrewbonney 0:07919e3d6c56 744 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 745 while (ptr != end) {
andrewbonney 0:07919e3d6c56 746 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 747 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 748 case BT_GT:
andrewbonney 0:07919e3d6c56 749 goto gt;
andrewbonney 0:07919e3d6c56 750 case BT_SOL:
andrewbonney 0:07919e3d6c56 751 goto sol;
andrewbonney 0:07919e3d6c56 752 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 753 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 754 continue;
andrewbonney 0:07919e3d6c56 755 default:
andrewbonney 0:07919e3d6c56 756 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 757 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 758 }
andrewbonney 0:07919e3d6c56 759 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
andrewbonney 0:07919e3d6c56 760 }
andrewbonney 0:07919e3d6c56 761 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 762 }
andrewbonney 0:07919e3d6c56 763 case BT_GT:
andrewbonney 0:07919e3d6c56 764 gt:
andrewbonney 0:07919e3d6c56 765 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 766 return XML_TOK_START_TAG_NO_ATTS;
andrewbonney 0:07919e3d6c56 767 case BT_SOL:
andrewbonney 0:07919e3d6c56 768 sol:
andrewbonney 0:07919e3d6c56 769 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 770 if (ptr == end)
andrewbonney 0:07919e3d6c56 771 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 772 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 773 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 774 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 775 }
andrewbonney 0:07919e3d6c56 776 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 777 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
andrewbonney 0:07919e3d6c56 778 default:
andrewbonney 0:07919e3d6c56 779 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 780 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 781 }
andrewbonney 0:07919e3d6c56 782 }
andrewbonney 0:07919e3d6c56 783 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 784 }
andrewbonney 0:07919e3d6c56 785
andrewbonney 0:07919e3d6c56 786 static int PTRCALL
andrewbonney 0:07919e3d6c56 787 PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 788 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 789 {
andrewbonney 0:07919e3d6c56 790 if (ptr == end)
andrewbonney 0:07919e3d6c56 791 return XML_TOK_NONE;
andrewbonney 0:07919e3d6c56 792 if (MINBPC(enc) > 1) {
andrewbonney 0:07919e3d6c56 793 size_t n = end - ptr;
andrewbonney 0:07919e3d6c56 794 if (n & (MINBPC(enc) - 1)) {
andrewbonney 0:07919e3d6c56 795 n &= ~(MINBPC(enc) - 1);
andrewbonney 0:07919e3d6c56 796 if (n == 0)
andrewbonney 0:07919e3d6c56 797 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 798 end = ptr + n;
andrewbonney 0:07919e3d6c56 799 }
andrewbonney 0:07919e3d6c56 800 }
andrewbonney 0:07919e3d6c56 801 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 802 case BT_LT:
andrewbonney 0:07919e3d6c56 803 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 804 case BT_AMP:
andrewbonney 0:07919e3d6c56 805 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 806 case BT_CR:
andrewbonney 0:07919e3d6c56 807 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 808 if (ptr == end)
andrewbonney 0:07919e3d6c56 809 return XML_TOK_TRAILING_CR;
andrewbonney 0:07919e3d6c56 810 if (BYTE_TYPE(enc, ptr) == BT_LF)
andrewbonney 0:07919e3d6c56 811 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 812 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 813 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 814 case BT_LF:
andrewbonney 0:07919e3d6c56 815 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 816 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 817 case BT_RSQB:
andrewbonney 0:07919e3d6c56 818 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 819 if (ptr == end)
andrewbonney 0:07919e3d6c56 820 return XML_TOK_TRAILING_RSQB;
andrewbonney 0:07919e3d6c56 821 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
andrewbonney 0:07919e3d6c56 822 break;
andrewbonney 0:07919e3d6c56 823 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 824 if (ptr == end)
andrewbonney 0:07919e3d6c56 825 return XML_TOK_TRAILING_RSQB;
andrewbonney 0:07919e3d6c56 826 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 827 ptr -= MINBPC(enc);
andrewbonney 0:07919e3d6c56 828 break;
andrewbonney 0:07919e3d6c56 829 }
andrewbonney 0:07919e3d6c56 830 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 831 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 832 INVALID_CASES(ptr, nextTokPtr)
andrewbonney 0:07919e3d6c56 833 default:
andrewbonney 0:07919e3d6c56 834 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 835 break;
andrewbonney 0:07919e3d6c56 836 }
andrewbonney 0:07919e3d6c56 837 while (ptr != end) {
andrewbonney 0:07919e3d6c56 838 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 839 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 840 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 841 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
andrewbonney 0:07919e3d6c56 842 *nextTokPtr = ptr; \
andrewbonney 0:07919e3d6c56 843 return XML_TOK_DATA_CHARS; \
andrewbonney 0:07919e3d6c56 844 } \
andrewbonney 0:07919e3d6c56 845 ptr += n; \
andrewbonney 0:07919e3d6c56 846 break;
andrewbonney 0:07919e3d6c56 847 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 848 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 849 case BT_RSQB:
andrewbonney 0:07919e3d6c56 850 if (ptr + MINBPC(enc) != end) {
andrewbonney 0:07919e3d6c56 851 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
andrewbonney 0:07919e3d6c56 852 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 853 break;
andrewbonney 0:07919e3d6c56 854 }
andrewbonney 0:07919e3d6c56 855 if (ptr + 2*MINBPC(enc) != end) {
andrewbonney 0:07919e3d6c56 856 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
andrewbonney 0:07919e3d6c56 857 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 858 break;
andrewbonney 0:07919e3d6c56 859 }
andrewbonney 0:07919e3d6c56 860 *nextTokPtr = ptr + 2*MINBPC(enc);
andrewbonney 0:07919e3d6c56 861 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 862 }
andrewbonney 0:07919e3d6c56 863 }
andrewbonney 0:07919e3d6c56 864 /* fall through */
andrewbonney 0:07919e3d6c56 865 case BT_AMP:
andrewbonney 0:07919e3d6c56 866 case BT_LT:
andrewbonney 0:07919e3d6c56 867 case BT_NONXML:
andrewbonney 0:07919e3d6c56 868 case BT_MALFORM:
andrewbonney 0:07919e3d6c56 869 case BT_TRAIL:
andrewbonney 0:07919e3d6c56 870 case BT_CR:
andrewbonney 0:07919e3d6c56 871 case BT_LF:
andrewbonney 0:07919e3d6c56 872 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 873 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 874 default:
andrewbonney 0:07919e3d6c56 875 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 876 break;
andrewbonney 0:07919e3d6c56 877 }
andrewbonney 0:07919e3d6c56 878 }
andrewbonney 0:07919e3d6c56 879 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 880 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 881 }
andrewbonney 0:07919e3d6c56 882
andrewbonney 0:07919e3d6c56 883 /* ptr points to character following "%" */
andrewbonney 0:07919e3d6c56 884
andrewbonney 0:07919e3d6c56 885 static int PTRCALL
andrewbonney 0:07919e3d6c56 886 PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 887 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 888 {
andrewbonney 0:07919e3d6c56 889 if (ptr == end)
andrewbonney 0:07919e3d6c56 890 return -XML_TOK_PERCENT;
andrewbonney 0:07919e3d6c56 891 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 892 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 893 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
andrewbonney 0:07919e3d6c56 894 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 895 return XML_TOK_PERCENT;
andrewbonney 0:07919e3d6c56 896 default:
andrewbonney 0:07919e3d6c56 897 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 898 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 899 }
andrewbonney 0:07919e3d6c56 900 while (ptr != end) {
andrewbonney 0:07919e3d6c56 901 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 902 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 903 case BT_SEMI:
andrewbonney 0:07919e3d6c56 904 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 905 return XML_TOK_PARAM_ENTITY_REF;
andrewbonney 0:07919e3d6c56 906 default:
andrewbonney 0:07919e3d6c56 907 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 908 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 909 }
andrewbonney 0:07919e3d6c56 910 }
andrewbonney 0:07919e3d6c56 911 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 912 }
andrewbonney 0:07919e3d6c56 913
andrewbonney 0:07919e3d6c56 914 static int PTRCALL
andrewbonney 0:07919e3d6c56 915 PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 916 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 917 {
andrewbonney 0:07919e3d6c56 918 if (ptr == end)
andrewbonney 0:07919e3d6c56 919 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 920 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 921 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 922 default:
andrewbonney 0:07919e3d6c56 923 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 924 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 925 }
andrewbonney 0:07919e3d6c56 926 while (ptr != end) {
andrewbonney 0:07919e3d6c56 927 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 928 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 929 case BT_CR: case BT_LF: case BT_S:
andrewbonney 0:07919e3d6c56 930 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
andrewbonney 0:07919e3d6c56 931 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 932 return XML_TOK_POUND_NAME;
andrewbonney 0:07919e3d6c56 933 default:
andrewbonney 0:07919e3d6c56 934 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 935 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 936 }
andrewbonney 0:07919e3d6c56 937 }
andrewbonney 0:07919e3d6c56 938 return -XML_TOK_POUND_NAME;
andrewbonney 0:07919e3d6c56 939 }
andrewbonney 0:07919e3d6c56 940
andrewbonney 0:07919e3d6c56 941 static int PTRCALL
andrewbonney 0:07919e3d6c56 942 PREFIX(scanLit)(int open, const ENCODING *enc,
andrewbonney 0:07919e3d6c56 943 const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 944 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 945 {
andrewbonney 0:07919e3d6c56 946 while (ptr != end) {
andrewbonney 0:07919e3d6c56 947 int t = BYTE_TYPE(enc, ptr);
andrewbonney 0:07919e3d6c56 948 switch (t) {
andrewbonney 0:07919e3d6c56 949 INVALID_CASES(ptr, nextTokPtr)
andrewbonney 0:07919e3d6c56 950 case BT_QUOT:
andrewbonney 0:07919e3d6c56 951 case BT_APOS:
andrewbonney 0:07919e3d6c56 952 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 953 if (t != open)
andrewbonney 0:07919e3d6c56 954 break;
andrewbonney 0:07919e3d6c56 955 if (ptr == end)
andrewbonney 0:07919e3d6c56 956 return -XML_TOK_LITERAL;
andrewbonney 0:07919e3d6c56 957 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 958 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 959 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 960 case BT_GT: case BT_PERCNT: case BT_LSQB:
andrewbonney 0:07919e3d6c56 961 return XML_TOK_LITERAL;
andrewbonney 0:07919e3d6c56 962 default:
andrewbonney 0:07919e3d6c56 963 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 964 }
andrewbonney 0:07919e3d6c56 965 default:
andrewbonney 0:07919e3d6c56 966 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 967 break;
andrewbonney 0:07919e3d6c56 968 }
andrewbonney 0:07919e3d6c56 969 }
andrewbonney 0:07919e3d6c56 970 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 971 }
andrewbonney 0:07919e3d6c56 972
andrewbonney 0:07919e3d6c56 973 static int PTRCALL
andrewbonney 0:07919e3d6c56 974 PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 975 const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 976 {
andrewbonney 0:07919e3d6c56 977 int tok;
andrewbonney 0:07919e3d6c56 978 if (ptr == end)
andrewbonney 0:07919e3d6c56 979 return XML_TOK_NONE;
andrewbonney 0:07919e3d6c56 980 if (MINBPC(enc) > 1) {
andrewbonney 0:07919e3d6c56 981 size_t n = end - ptr;
andrewbonney 0:07919e3d6c56 982 if (n & (MINBPC(enc) - 1)) {
andrewbonney 0:07919e3d6c56 983 n &= ~(MINBPC(enc) - 1);
andrewbonney 0:07919e3d6c56 984 if (n == 0)
andrewbonney 0:07919e3d6c56 985 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 986 end = ptr + n;
andrewbonney 0:07919e3d6c56 987 }
andrewbonney 0:07919e3d6c56 988 }
andrewbonney 0:07919e3d6c56 989 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 990 case BT_QUOT:
andrewbonney 0:07919e3d6c56 991 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 992 case BT_APOS:
andrewbonney 0:07919e3d6c56 993 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 994 case BT_LT:
andrewbonney 0:07919e3d6c56 995 {
andrewbonney 0:07919e3d6c56 996 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 997 if (ptr == end)
andrewbonney 0:07919e3d6c56 998 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 999 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1000 case BT_EXCL:
andrewbonney 0:07919e3d6c56 1001 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 1002 case BT_QUEST:
andrewbonney 0:07919e3d6c56 1003 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 1004 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 1005 case BT_HEX:
andrewbonney 0:07919e3d6c56 1006 case BT_NONASCII:
andrewbonney 0:07919e3d6c56 1007 case BT_LEAD2:
andrewbonney 0:07919e3d6c56 1008 case BT_LEAD3:
andrewbonney 0:07919e3d6c56 1009 case BT_LEAD4:
andrewbonney 0:07919e3d6c56 1010 *nextTokPtr = ptr - MINBPC(enc);
andrewbonney 0:07919e3d6c56 1011 return XML_TOK_INSTANCE_START;
andrewbonney 0:07919e3d6c56 1012 }
andrewbonney 0:07919e3d6c56 1013 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1014 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1015 }
andrewbonney 0:07919e3d6c56 1016 case BT_CR:
andrewbonney 0:07919e3d6c56 1017 if (ptr + MINBPC(enc) == end) {
andrewbonney 0:07919e3d6c56 1018 *nextTokPtr = end;
andrewbonney 0:07919e3d6c56 1019 /* indicate that this might be part of a CR/LF pair */
andrewbonney 0:07919e3d6c56 1020 return -XML_TOK_PROLOG_S;
andrewbonney 0:07919e3d6c56 1021 }
andrewbonney 0:07919e3d6c56 1022 /* fall through */
andrewbonney 0:07919e3d6c56 1023 case BT_S: case BT_LF:
andrewbonney 0:07919e3d6c56 1024 for (;;) {
andrewbonney 0:07919e3d6c56 1025 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1026 if (ptr == end)
andrewbonney 0:07919e3d6c56 1027 break;
andrewbonney 0:07919e3d6c56 1028 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1029 case BT_S: case BT_LF:
andrewbonney 0:07919e3d6c56 1030 break;
andrewbonney 0:07919e3d6c56 1031 case BT_CR:
andrewbonney 0:07919e3d6c56 1032 /* don't split CR/LF pair */
andrewbonney 0:07919e3d6c56 1033 if (ptr + MINBPC(enc) != end)
andrewbonney 0:07919e3d6c56 1034 break;
andrewbonney 0:07919e3d6c56 1035 /* fall through */
andrewbonney 0:07919e3d6c56 1036 default:
andrewbonney 0:07919e3d6c56 1037 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1038 return XML_TOK_PROLOG_S;
andrewbonney 0:07919e3d6c56 1039 }
andrewbonney 0:07919e3d6c56 1040 }
andrewbonney 0:07919e3d6c56 1041 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1042 return XML_TOK_PROLOG_S;
andrewbonney 0:07919e3d6c56 1043 case BT_PERCNT:
andrewbonney 0:07919e3d6c56 1044 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 1045 case BT_COMMA:
andrewbonney 0:07919e3d6c56 1046 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1047 return XML_TOK_COMMA;
andrewbonney 0:07919e3d6c56 1048 case BT_LSQB:
andrewbonney 0:07919e3d6c56 1049 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1050 return XML_TOK_OPEN_BRACKET;
andrewbonney 0:07919e3d6c56 1051 case BT_RSQB:
andrewbonney 0:07919e3d6c56 1052 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1053 if (ptr == end)
andrewbonney 0:07919e3d6c56 1054 return -XML_TOK_CLOSE_BRACKET;
andrewbonney 0:07919e3d6c56 1055 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
andrewbonney 0:07919e3d6c56 1056 if (ptr + MINBPC(enc) == end)
andrewbonney 0:07919e3d6c56 1057 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 1058 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
andrewbonney 0:07919e3d6c56 1059 *nextTokPtr = ptr + 2*MINBPC(enc);
andrewbonney 0:07919e3d6c56 1060 return XML_TOK_COND_SECT_CLOSE;
andrewbonney 0:07919e3d6c56 1061 }
andrewbonney 0:07919e3d6c56 1062 }
andrewbonney 0:07919e3d6c56 1063 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1064 return XML_TOK_CLOSE_BRACKET;
andrewbonney 0:07919e3d6c56 1065 case BT_LPAR:
andrewbonney 0:07919e3d6c56 1066 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1067 return XML_TOK_OPEN_PAREN;
andrewbonney 0:07919e3d6c56 1068 case BT_RPAR:
andrewbonney 0:07919e3d6c56 1069 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1070 if (ptr == end)
andrewbonney 0:07919e3d6c56 1071 return -XML_TOK_CLOSE_PAREN;
andrewbonney 0:07919e3d6c56 1072 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1073 case BT_AST:
andrewbonney 0:07919e3d6c56 1074 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1075 return XML_TOK_CLOSE_PAREN_ASTERISK;
andrewbonney 0:07919e3d6c56 1076 case BT_QUEST:
andrewbonney 0:07919e3d6c56 1077 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1078 return XML_TOK_CLOSE_PAREN_QUESTION;
andrewbonney 0:07919e3d6c56 1079 case BT_PLUS:
andrewbonney 0:07919e3d6c56 1080 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1081 return XML_TOK_CLOSE_PAREN_PLUS;
andrewbonney 0:07919e3d6c56 1082 case BT_CR: case BT_LF: case BT_S:
andrewbonney 0:07919e3d6c56 1083 case BT_GT: case BT_COMMA: case BT_VERBAR:
andrewbonney 0:07919e3d6c56 1084 case BT_RPAR:
andrewbonney 0:07919e3d6c56 1085 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1086 return XML_TOK_CLOSE_PAREN;
andrewbonney 0:07919e3d6c56 1087 }
andrewbonney 0:07919e3d6c56 1088 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1089 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1090 case BT_VERBAR:
andrewbonney 0:07919e3d6c56 1091 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1092 return XML_TOK_OR;
andrewbonney 0:07919e3d6c56 1093 case BT_GT:
andrewbonney 0:07919e3d6c56 1094 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1095 return XML_TOK_DECL_CLOSE;
andrewbonney 0:07919e3d6c56 1096 case BT_NUM:
andrewbonney 0:07919e3d6c56 1097 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 1098 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 1099 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 1100 if (end - ptr < n) \
andrewbonney 0:07919e3d6c56 1101 return XML_TOK_PARTIAL_CHAR; \
andrewbonney 0:07919e3d6c56 1102 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
andrewbonney 0:07919e3d6c56 1103 ptr += n; \
andrewbonney 0:07919e3d6c56 1104 tok = XML_TOK_NAME; \
andrewbonney 0:07919e3d6c56 1105 break; \
andrewbonney 0:07919e3d6c56 1106 } \
andrewbonney 0:07919e3d6c56 1107 if (IS_NAME_CHAR(enc, ptr, n)) { \
andrewbonney 0:07919e3d6c56 1108 ptr += n; \
andrewbonney 0:07919e3d6c56 1109 tok = XML_TOK_NMTOKEN; \
andrewbonney 0:07919e3d6c56 1110 break; \
andrewbonney 0:07919e3d6c56 1111 } \
andrewbonney 0:07919e3d6c56 1112 *nextTokPtr = ptr; \
andrewbonney 0:07919e3d6c56 1113 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1114 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 1115 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 1116 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 1117 case BT_HEX:
andrewbonney 0:07919e3d6c56 1118 tok = XML_TOK_NAME;
andrewbonney 0:07919e3d6c56 1119 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1120 break;
andrewbonney 0:07919e3d6c56 1121 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 1122 case BT_NAME:
andrewbonney 0:07919e3d6c56 1123 case BT_MINUS:
andrewbonney 0:07919e3d6c56 1124 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 1125 case BT_COLON:
andrewbonney 0:07919e3d6c56 1126 #endif
andrewbonney 0:07919e3d6c56 1127 tok = XML_TOK_NMTOKEN;
andrewbonney 0:07919e3d6c56 1128 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1129 break;
andrewbonney 0:07919e3d6c56 1130 case BT_NONASCII:
andrewbonney 0:07919e3d6c56 1131 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1132 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1133 tok = XML_TOK_NAME;
andrewbonney 0:07919e3d6c56 1134 break;
andrewbonney 0:07919e3d6c56 1135 }
andrewbonney 0:07919e3d6c56 1136 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1137 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1138 tok = XML_TOK_NMTOKEN;
andrewbonney 0:07919e3d6c56 1139 break;
andrewbonney 0:07919e3d6c56 1140 }
andrewbonney 0:07919e3d6c56 1141 /* fall through */
andrewbonney 0:07919e3d6c56 1142 default:
andrewbonney 0:07919e3d6c56 1143 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1144 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1145 }
andrewbonney 0:07919e3d6c56 1146 while (ptr != end) {
andrewbonney 0:07919e3d6c56 1147 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1148 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 1149 case BT_GT: case BT_RPAR: case BT_COMMA:
andrewbonney 0:07919e3d6c56 1150 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
andrewbonney 0:07919e3d6c56 1151 case BT_S: case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 1152 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1153 return tok;
andrewbonney 0:07919e3d6c56 1154 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 1155 case BT_COLON:
andrewbonney 0:07919e3d6c56 1156 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1157 switch (tok) {
andrewbonney 0:07919e3d6c56 1158 case XML_TOK_NAME:
andrewbonney 0:07919e3d6c56 1159 if (ptr == end)
andrewbonney 0:07919e3d6c56 1160 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 1161 tok = XML_TOK_PREFIXED_NAME;
andrewbonney 0:07919e3d6c56 1162 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1163 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
andrewbonney 0:07919e3d6c56 1164 default:
andrewbonney 0:07919e3d6c56 1165 tok = XML_TOK_NMTOKEN;
andrewbonney 0:07919e3d6c56 1166 break;
andrewbonney 0:07919e3d6c56 1167 }
andrewbonney 0:07919e3d6c56 1168 break;
andrewbonney 0:07919e3d6c56 1169 case XML_TOK_PREFIXED_NAME:
andrewbonney 0:07919e3d6c56 1170 tok = XML_TOK_NMTOKEN;
andrewbonney 0:07919e3d6c56 1171 break;
andrewbonney 0:07919e3d6c56 1172 }
andrewbonney 0:07919e3d6c56 1173 break;
andrewbonney 0:07919e3d6c56 1174 #endif
andrewbonney 0:07919e3d6c56 1175 case BT_PLUS:
andrewbonney 0:07919e3d6c56 1176 if (tok == XML_TOK_NMTOKEN) {
andrewbonney 0:07919e3d6c56 1177 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1178 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1179 }
andrewbonney 0:07919e3d6c56 1180 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1181 return XML_TOK_NAME_PLUS;
andrewbonney 0:07919e3d6c56 1182 case BT_AST:
andrewbonney 0:07919e3d6c56 1183 if (tok == XML_TOK_NMTOKEN) {
andrewbonney 0:07919e3d6c56 1184 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1185 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1186 }
andrewbonney 0:07919e3d6c56 1187 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1188 return XML_TOK_NAME_ASTERISK;
andrewbonney 0:07919e3d6c56 1189 case BT_QUEST:
andrewbonney 0:07919e3d6c56 1190 if (tok == XML_TOK_NMTOKEN) {
andrewbonney 0:07919e3d6c56 1191 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1192 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1193 }
andrewbonney 0:07919e3d6c56 1194 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1195 return XML_TOK_NAME_QUESTION;
andrewbonney 0:07919e3d6c56 1196 default:
andrewbonney 0:07919e3d6c56 1197 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1198 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1199 }
andrewbonney 0:07919e3d6c56 1200 }
andrewbonney 0:07919e3d6c56 1201 return -tok;
andrewbonney 0:07919e3d6c56 1202 }
andrewbonney 0:07919e3d6c56 1203
andrewbonney 0:07919e3d6c56 1204 static int PTRCALL
andrewbonney 0:07919e3d6c56 1205 PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 1206 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 1207 {
andrewbonney 0:07919e3d6c56 1208 const char *start;
andrewbonney 0:07919e3d6c56 1209 if (ptr == end)
andrewbonney 0:07919e3d6c56 1210 return XML_TOK_NONE;
andrewbonney 0:07919e3d6c56 1211 start = ptr;
andrewbonney 0:07919e3d6c56 1212 while (ptr != end) {
andrewbonney 0:07919e3d6c56 1213 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1214 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 1215 case BT_LEAD ## n: ptr += n; break;
andrewbonney 0:07919e3d6c56 1216 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 1217 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 1218 case BT_AMP:
andrewbonney 0:07919e3d6c56 1219 if (ptr == start)
andrewbonney 0:07919e3d6c56 1220 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 1221 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1222 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1223 case BT_LT:
andrewbonney 0:07919e3d6c56 1224 /* this is for inside entity references */
andrewbonney 0:07919e3d6c56 1225 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1226 return XML_TOK_INVALID;
andrewbonney 0:07919e3d6c56 1227 case BT_LF:
andrewbonney 0:07919e3d6c56 1228 if (ptr == start) {
andrewbonney 0:07919e3d6c56 1229 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1230 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 1231 }
andrewbonney 0:07919e3d6c56 1232 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1233 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1234 case BT_CR:
andrewbonney 0:07919e3d6c56 1235 if (ptr == start) {
andrewbonney 0:07919e3d6c56 1236 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1237 if (ptr == end)
andrewbonney 0:07919e3d6c56 1238 return XML_TOK_TRAILING_CR;
andrewbonney 0:07919e3d6c56 1239 if (BYTE_TYPE(enc, ptr) == BT_LF)
andrewbonney 0:07919e3d6c56 1240 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1241 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1242 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 1243 }
andrewbonney 0:07919e3d6c56 1244 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1245 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1246 case BT_S:
andrewbonney 0:07919e3d6c56 1247 if (ptr == start) {
andrewbonney 0:07919e3d6c56 1248 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1249 return XML_TOK_ATTRIBUTE_VALUE_S;
andrewbonney 0:07919e3d6c56 1250 }
andrewbonney 0:07919e3d6c56 1251 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1252 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1253 default:
andrewbonney 0:07919e3d6c56 1254 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1255 break;
andrewbonney 0:07919e3d6c56 1256 }
andrewbonney 0:07919e3d6c56 1257 }
andrewbonney 0:07919e3d6c56 1258 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1259 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1260 }
andrewbonney 0:07919e3d6c56 1261
andrewbonney 0:07919e3d6c56 1262 static int PTRCALL
andrewbonney 0:07919e3d6c56 1263 PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 1264 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 1265 {
andrewbonney 0:07919e3d6c56 1266 const char *start;
andrewbonney 0:07919e3d6c56 1267 if (ptr == end)
andrewbonney 0:07919e3d6c56 1268 return XML_TOK_NONE;
andrewbonney 0:07919e3d6c56 1269 start = ptr;
andrewbonney 0:07919e3d6c56 1270 while (ptr != end) {
andrewbonney 0:07919e3d6c56 1271 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1272 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 1273 case BT_LEAD ## n: ptr += n; break;
andrewbonney 0:07919e3d6c56 1274 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 1275 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 1276 case BT_AMP:
andrewbonney 0:07919e3d6c56 1277 if (ptr == start)
andrewbonney 0:07919e3d6c56 1278 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
andrewbonney 0:07919e3d6c56 1279 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1280 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1281 case BT_PERCNT:
andrewbonney 0:07919e3d6c56 1282 if (ptr == start) {
andrewbonney 0:07919e3d6c56 1283 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
andrewbonney 0:07919e3d6c56 1284 end, nextTokPtr);
andrewbonney 0:07919e3d6c56 1285 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
andrewbonney 0:07919e3d6c56 1286 }
andrewbonney 0:07919e3d6c56 1287 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1288 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1289 case BT_LF:
andrewbonney 0:07919e3d6c56 1290 if (ptr == start) {
andrewbonney 0:07919e3d6c56 1291 *nextTokPtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1292 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 1293 }
andrewbonney 0:07919e3d6c56 1294 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1295 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1296 case BT_CR:
andrewbonney 0:07919e3d6c56 1297 if (ptr == start) {
andrewbonney 0:07919e3d6c56 1298 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1299 if (ptr == end)
andrewbonney 0:07919e3d6c56 1300 return XML_TOK_TRAILING_CR;
andrewbonney 0:07919e3d6c56 1301 if (BYTE_TYPE(enc, ptr) == BT_LF)
andrewbonney 0:07919e3d6c56 1302 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1303 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1304 return XML_TOK_DATA_NEWLINE;
andrewbonney 0:07919e3d6c56 1305 }
andrewbonney 0:07919e3d6c56 1306 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1307 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1308 default:
andrewbonney 0:07919e3d6c56 1309 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1310 break;
andrewbonney 0:07919e3d6c56 1311 }
andrewbonney 0:07919e3d6c56 1312 }
andrewbonney 0:07919e3d6c56 1313 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1314 return XML_TOK_DATA_CHARS;
andrewbonney 0:07919e3d6c56 1315 }
andrewbonney 0:07919e3d6c56 1316
andrewbonney 0:07919e3d6c56 1317 #ifdef XML_DTD
andrewbonney 0:07919e3d6c56 1318
andrewbonney 0:07919e3d6c56 1319 static int PTRCALL
andrewbonney 0:07919e3d6c56 1320 PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 1321 const char *end, const char **nextTokPtr)
andrewbonney 0:07919e3d6c56 1322 {
andrewbonney 0:07919e3d6c56 1323 int level = 0;
andrewbonney 0:07919e3d6c56 1324 if (MINBPC(enc) > 1) {
andrewbonney 0:07919e3d6c56 1325 size_t n = end - ptr;
andrewbonney 0:07919e3d6c56 1326 if (n & (MINBPC(enc) - 1)) {
andrewbonney 0:07919e3d6c56 1327 n &= ~(MINBPC(enc) - 1);
andrewbonney 0:07919e3d6c56 1328 end = ptr + n;
andrewbonney 0:07919e3d6c56 1329 }
andrewbonney 0:07919e3d6c56 1330 }
andrewbonney 0:07919e3d6c56 1331 while (ptr != end) {
andrewbonney 0:07919e3d6c56 1332 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1333 INVALID_CASES(ptr, nextTokPtr)
andrewbonney 0:07919e3d6c56 1334 case BT_LT:
andrewbonney 0:07919e3d6c56 1335 if ((ptr += MINBPC(enc)) == end)
andrewbonney 0:07919e3d6c56 1336 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 1337 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
andrewbonney 0:07919e3d6c56 1338 if ((ptr += MINBPC(enc)) == end)
andrewbonney 0:07919e3d6c56 1339 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 1340 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
andrewbonney 0:07919e3d6c56 1341 ++level;
andrewbonney 0:07919e3d6c56 1342 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1343 }
andrewbonney 0:07919e3d6c56 1344 }
andrewbonney 0:07919e3d6c56 1345 break;
andrewbonney 0:07919e3d6c56 1346 case BT_RSQB:
andrewbonney 0:07919e3d6c56 1347 if ((ptr += MINBPC(enc)) == end)
andrewbonney 0:07919e3d6c56 1348 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 1349 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
andrewbonney 0:07919e3d6c56 1350 if ((ptr += MINBPC(enc)) == end)
andrewbonney 0:07919e3d6c56 1351 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 1352 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
andrewbonney 0:07919e3d6c56 1353 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1354 if (level == 0) {
andrewbonney 0:07919e3d6c56 1355 *nextTokPtr = ptr;
andrewbonney 0:07919e3d6c56 1356 return XML_TOK_IGNORE_SECT;
andrewbonney 0:07919e3d6c56 1357 }
andrewbonney 0:07919e3d6c56 1358 --level;
andrewbonney 0:07919e3d6c56 1359 }
andrewbonney 0:07919e3d6c56 1360 }
andrewbonney 0:07919e3d6c56 1361 break;
andrewbonney 0:07919e3d6c56 1362 default:
andrewbonney 0:07919e3d6c56 1363 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1364 break;
andrewbonney 0:07919e3d6c56 1365 }
andrewbonney 0:07919e3d6c56 1366 }
andrewbonney 0:07919e3d6c56 1367 return XML_TOK_PARTIAL;
andrewbonney 0:07919e3d6c56 1368 }
andrewbonney 0:07919e3d6c56 1369
andrewbonney 0:07919e3d6c56 1370 #endif /* XML_DTD */
andrewbonney 0:07919e3d6c56 1371
andrewbonney 0:07919e3d6c56 1372 static int PTRCALL
andrewbonney 0:07919e3d6c56 1373 PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
andrewbonney 0:07919e3d6c56 1374 const char **badPtr)
andrewbonney 0:07919e3d6c56 1375 {
andrewbonney 0:07919e3d6c56 1376 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1377 end -= MINBPC(enc);
andrewbonney 0:07919e3d6c56 1378 for (; ptr != end; ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 1379 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1380 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 1381 case BT_HEX:
andrewbonney 0:07919e3d6c56 1382 case BT_MINUS:
andrewbonney 0:07919e3d6c56 1383 case BT_APOS:
andrewbonney 0:07919e3d6c56 1384 case BT_LPAR:
andrewbonney 0:07919e3d6c56 1385 case BT_RPAR:
andrewbonney 0:07919e3d6c56 1386 case BT_PLUS:
andrewbonney 0:07919e3d6c56 1387 case BT_COMMA:
andrewbonney 0:07919e3d6c56 1388 case BT_SOL:
andrewbonney 0:07919e3d6c56 1389 case BT_EQUALS:
andrewbonney 0:07919e3d6c56 1390 case BT_QUEST:
andrewbonney 0:07919e3d6c56 1391 case BT_CR:
andrewbonney 0:07919e3d6c56 1392 case BT_LF:
andrewbonney 0:07919e3d6c56 1393 case BT_SEMI:
andrewbonney 0:07919e3d6c56 1394 case BT_EXCL:
andrewbonney 0:07919e3d6c56 1395 case BT_AST:
andrewbonney 0:07919e3d6c56 1396 case BT_PERCNT:
andrewbonney 0:07919e3d6c56 1397 case BT_NUM:
andrewbonney 0:07919e3d6c56 1398 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 1399 case BT_COLON:
andrewbonney 0:07919e3d6c56 1400 #endif
andrewbonney 0:07919e3d6c56 1401 break;
andrewbonney 0:07919e3d6c56 1402 case BT_S:
andrewbonney 0:07919e3d6c56 1403 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
andrewbonney 0:07919e3d6c56 1404 *badPtr = ptr;
andrewbonney 0:07919e3d6c56 1405 return 0;
andrewbonney 0:07919e3d6c56 1406 }
andrewbonney 0:07919e3d6c56 1407 break;
andrewbonney 0:07919e3d6c56 1408 case BT_NAME:
andrewbonney 0:07919e3d6c56 1409 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 1410 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
andrewbonney 0:07919e3d6c56 1411 break;
andrewbonney 0:07919e3d6c56 1412 default:
andrewbonney 0:07919e3d6c56 1413 switch (BYTE_TO_ASCII(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1414 case 0x24: /* $ */
andrewbonney 0:07919e3d6c56 1415 case 0x40: /* @ */
andrewbonney 0:07919e3d6c56 1416 break;
andrewbonney 0:07919e3d6c56 1417 default:
andrewbonney 0:07919e3d6c56 1418 *badPtr = ptr;
andrewbonney 0:07919e3d6c56 1419 return 0;
andrewbonney 0:07919e3d6c56 1420 }
andrewbonney 0:07919e3d6c56 1421 break;
andrewbonney 0:07919e3d6c56 1422 }
andrewbonney 0:07919e3d6c56 1423 }
andrewbonney 0:07919e3d6c56 1424 return 1;
andrewbonney 0:07919e3d6c56 1425 }
andrewbonney 0:07919e3d6c56 1426
andrewbonney 0:07919e3d6c56 1427 /* This must only be called for a well-formed start-tag or empty
andrewbonney 0:07919e3d6c56 1428 element tag. Returns the number of attributes. Pointers to the
andrewbonney 0:07919e3d6c56 1429 first attsMax attributes are stored in atts.
andrewbonney 0:07919e3d6c56 1430 */
andrewbonney 0:07919e3d6c56 1431
andrewbonney 0:07919e3d6c56 1432 static int PTRCALL
andrewbonney 0:07919e3d6c56 1433 PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 1434 int attsMax, ATTRIBUTE *atts)
andrewbonney 0:07919e3d6c56 1435 {
andrewbonney 0:07919e3d6c56 1436 enum { other, inName, inValue } state = inName;
andrewbonney 0:07919e3d6c56 1437 int nAtts = 0;
andrewbonney 0:07919e3d6c56 1438 int open = 0; /* defined when state == inValue;
andrewbonney 0:07919e3d6c56 1439 initialization just to shut up compilers */
andrewbonney 0:07919e3d6c56 1440
andrewbonney 0:07919e3d6c56 1441 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 1442 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1443 #define START_NAME \
andrewbonney 0:07919e3d6c56 1444 if (state == other) { \
andrewbonney 0:07919e3d6c56 1445 if (nAtts < attsMax) { \
andrewbonney 0:07919e3d6c56 1446 atts[nAtts].name = ptr; \
andrewbonney 0:07919e3d6c56 1447 atts[nAtts].normalized = 1; \
andrewbonney 0:07919e3d6c56 1448 } \
andrewbonney 0:07919e3d6c56 1449 state = inName; \
andrewbonney 0:07919e3d6c56 1450 }
andrewbonney 0:07919e3d6c56 1451 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 1452 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
andrewbonney 0:07919e3d6c56 1453 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 1454 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 1455 case BT_NONASCII:
andrewbonney 0:07919e3d6c56 1456 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 1457 case BT_HEX:
andrewbonney 0:07919e3d6c56 1458 START_NAME
andrewbonney 0:07919e3d6c56 1459 break;
andrewbonney 0:07919e3d6c56 1460 #undef START_NAME
andrewbonney 0:07919e3d6c56 1461 case BT_QUOT:
andrewbonney 0:07919e3d6c56 1462 if (state != inValue) {
andrewbonney 0:07919e3d6c56 1463 if (nAtts < attsMax)
andrewbonney 0:07919e3d6c56 1464 atts[nAtts].valuePtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1465 state = inValue;
andrewbonney 0:07919e3d6c56 1466 open = BT_QUOT;
andrewbonney 0:07919e3d6c56 1467 }
andrewbonney 0:07919e3d6c56 1468 else if (open == BT_QUOT) {
andrewbonney 0:07919e3d6c56 1469 state = other;
andrewbonney 0:07919e3d6c56 1470 if (nAtts < attsMax)
andrewbonney 0:07919e3d6c56 1471 atts[nAtts].valueEnd = ptr;
andrewbonney 0:07919e3d6c56 1472 nAtts++;
andrewbonney 0:07919e3d6c56 1473 }
andrewbonney 0:07919e3d6c56 1474 break;
andrewbonney 0:07919e3d6c56 1475 case BT_APOS:
andrewbonney 0:07919e3d6c56 1476 if (state != inValue) {
andrewbonney 0:07919e3d6c56 1477 if (nAtts < attsMax)
andrewbonney 0:07919e3d6c56 1478 atts[nAtts].valuePtr = ptr + MINBPC(enc);
andrewbonney 0:07919e3d6c56 1479 state = inValue;
andrewbonney 0:07919e3d6c56 1480 open = BT_APOS;
andrewbonney 0:07919e3d6c56 1481 }
andrewbonney 0:07919e3d6c56 1482 else if (open == BT_APOS) {
andrewbonney 0:07919e3d6c56 1483 state = other;
andrewbonney 0:07919e3d6c56 1484 if (nAtts < attsMax)
andrewbonney 0:07919e3d6c56 1485 atts[nAtts].valueEnd = ptr;
andrewbonney 0:07919e3d6c56 1486 nAtts++;
andrewbonney 0:07919e3d6c56 1487 }
andrewbonney 0:07919e3d6c56 1488 break;
andrewbonney 0:07919e3d6c56 1489 case BT_AMP:
andrewbonney 0:07919e3d6c56 1490 if (nAtts < attsMax)
andrewbonney 0:07919e3d6c56 1491 atts[nAtts].normalized = 0;
andrewbonney 0:07919e3d6c56 1492 break;
andrewbonney 0:07919e3d6c56 1493 case BT_S:
andrewbonney 0:07919e3d6c56 1494 if (state == inName)
andrewbonney 0:07919e3d6c56 1495 state = other;
andrewbonney 0:07919e3d6c56 1496 else if (state == inValue
andrewbonney 0:07919e3d6c56 1497 && nAtts < attsMax
andrewbonney 0:07919e3d6c56 1498 && atts[nAtts].normalized
andrewbonney 0:07919e3d6c56 1499 && (ptr == atts[nAtts].valuePtr
andrewbonney 0:07919e3d6c56 1500 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
andrewbonney 0:07919e3d6c56 1501 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
andrewbonney 0:07919e3d6c56 1502 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
andrewbonney 0:07919e3d6c56 1503 atts[nAtts].normalized = 0;
andrewbonney 0:07919e3d6c56 1504 break;
andrewbonney 0:07919e3d6c56 1505 case BT_CR: case BT_LF:
andrewbonney 0:07919e3d6c56 1506 /* This case ensures that the first attribute name is counted
andrewbonney 0:07919e3d6c56 1507 Apart from that we could just change state on the quote. */
andrewbonney 0:07919e3d6c56 1508 if (state == inName)
andrewbonney 0:07919e3d6c56 1509 state = other;
andrewbonney 0:07919e3d6c56 1510 else if (state == inValue && nAtts < attsMax)
andrewbonney 0:07919e3d6c56 1511 atts[nAtts].normalized = 0;
andrewbonney 0:07919e3d6c56 1512 break;
andrewbonney 0:07919e3d6c56 1513 case BT_GT:
andrewbonney 0:07919e3d6c56 1514 case BT_SOL:
andrewbonney 0:07919e3d6c56 1515 if (state != inValue)
andrewbonney 0:07919e3d6c56 1516 return nAtts;
andrewbonney 0:07919e3d6c56 1517 break;
andrewbonney 0:07919e3d6c56 1518 default:
andrewbonney 0:07919e3d6c56 1519 break;
andrewbonney 0:07919e3d6c56 1520 }
andrewbonney 0:07919e3d6c56 1521 }
andrewbonney 0:07919e3d6c56 1522 /* not reached */
andrewbonney 0:07919e3d6c56 1523 }
andrewbonney 0:07919e3d6c56 1524
andrewbonney 0:07919e3d6c56 1525 static int PTRFASTCALL
andrewbonney 0:07919e3d6c56 1526 PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
andrewbonney 0:07919e3d6c56 1527 {
andrewbonney 0:07919e3d6c56 1528 int result = 0;
andrewbonney 0:07919e3d6c56 1529 /* skip &# */
andrewbonney 0:07919e3d6c56 1530 ptr += 2*MINBPC(enc);
andrewbonney 0:07919e3d6c56 1531 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
andrewbonney 0:07919e3d6c56 1532 for (ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1533 !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
andrewbonney 0:07919e3d6c56 1534 ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 1535 int c = BYTE_TO_ASCII(enc, ptr);
andrewbonney 0:07919e3d6c56 1536 switch (c) {
andrewbonney 0:07919e3d6c56 1537 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
andrewbonney 0:07919e3d6c56 1538 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
andrewbonney 0:07919e3d6c56 1539 result <<= 4;
andrewbonney 0:07919e3d6c56 1540 result |= (c - ASCII_0);
andrewbonney 0:07919e3d6c56 1541 break;
andrewbonney 0:07919e3d6c56 1542 case ASCII_A: case ASCII_B: case ASCII_C:
andrewbonney 0:07919e3d6c56 1543 case ASCII_D: case ASCII_E: case ASCII_F:
andrewbonney 0:07919e3d6c56 1544 result <<= 4;
andrewbonney 0:07919e3d6c56 1545 result += 10 + (c - ASCII_A);
andrewbonney 0:07919e3d6c56 1546 break;
andrewbonney 0:07919e3d6c56 1547 case ASCII_a: case ASCII_b: case ASCII_c:
andrewbonney 0:07919e3d6c56 1548 case ASCII_d: case ASCII_e: case ASCII_f:
andrewbonney 0:07919e3d6c56 1549 result <<= 4;
andrewbonney 0:07919e3d6c56 1550 result += 10 + (c - ASCII_a);
andrewbonney 0:07919e3d6c56 1551 break;
andrewbonney 0:07919e3d6c56 1552 }
andrewbonney 0:07919e3d6c56 1553 if (result >= 0x110000)
andrewbonney 0:07919e3d6c56 1554 return -1;
andrewbonney 0:07919e3d6c56 1555 }
andrewbonney 0:07919e3d6c56 1556 }
andrewbonney 0:07919e3d6c56 1557 else {
andrewbonney 0:07919e3d6c56 1558 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 1559 int c = BYTE_TO_ASCII(enc, ptr);
andrewbonney 0:07919e3d6c56 1560 result *= 10;
andrewbonney 0:07919e3d6c56 1561 result += (c - ASCII_0);
andrewbonney 0:07919e3d6c56 1562 if (result >= 0x110000)
andrewbonney 0:07919e3d6c56 1563 return -1;
andrewbonney 0:07919e3d6c56 1564 }
andrewbonney 0:07919e3d6c56 1565 }
andrewbonney 0:07919e3d6c56 1566 return checkCharRefNumber(result);
andrewbonney 0:07919e3d6c56 1567 }
andrewbonney 0:07919e3d6c56 1568
andrewbonney 0:07919e3d6c56 1569 static int PTRCALL
andrewbonney 0:07919e3d6c56 1570 PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
andrewbonney 0:07919e3d6c56 1571 const char *end)
andrewbonney 0:07919e3d6c56 1572 {
andrewbonney 0:07919e3d6c56 1573 switch ((end - ptr)/MINBPC(enc)) {
andrewbonney 0:07919e3d6c56 1574 case 2:
andrewbonney 0:07919e3d6c56 1575 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
andrewbonney 0:07919e3d6c56 1576 switch (BYTE_TO_ASCII(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1577 case ASCII_l:
andrewbonney 0:07919e3d6c56 1578 return ASCII_LT;
andrewbonney 0:07919e3d6c56 1579 case ASCII_g:
andrewbonney 0:07919e3d6c56 1580 return ASCII_GT;
andrewbonney 0:07919e3d6c56 1581 }
andrewbonney 0:07919e3d6c56 1582 }
andrewbonney 0:07919e3d6c56 1583 break;
andrewbonney 0:07919e3d6c56 1584 case 3:
andrewbonney 0:07919e3d6c56 1585 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
andrewbonney 0:07919e3d6c56 1586 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1587 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
andrewbonney 0:07919e3d6c56 1588 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1589 if (CHAR_MATCHES(enc, ptr, ASCII_p))
andrewbonney 0:07919e3d6c56 1590 return ASCII_AMP;
andrewbonney 0:07919e3d6c56 1591 }
andrewbonney 0:07919e3d6c56 1592 }
andrewbonney 0:07919e3d6c56 1593 break;
andrewbonney 0:07919e3d6c56 1594 case 4:
andrewbonney 0:07919e3d6c56 1595 switch (BYTE_TO_ASCII(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1596 case ASCII_q:
andrewbonney 0:07919e3d6c56 1597 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1598 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
andrewbonney 0:07919e3d6c56 1599 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1600 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
andrewbonney 0:07919e3d6c56 1601 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1602 if (CHAR_MATCHES(enc, ptr, ASCII_t))
andrewbonney 0:07919e3d6c56 1603 return ASCII_QUOT;
andrewbonney 0:07919e3d6c56 1604 }
andrewbonney 0:07919e3d6c56 1605 }
andrewbonney 0:07919e3d6c56 1606 break;
andrewbonney 0:07919e3d6c56 1607 case ASCII_a:
andrewbonney 0:07919e3d6c56 1608 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1609 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
andrewbonney 0:07919e3d6c56 1610 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1611 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
andrewbonney 0:07919e3d6c56 1612 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1613 if (CHAR_MATCHES(enc, ptr, ASCII_s))
andrewbonney 0:07919e3d6c56 1614 return ASCII_APOS;
andrewbonney 0:07919e3d6c56 1615 }
andrewbonney 0:07919e3d6c56 1616 }
andrewbonney 0:07919e3d6c56 1617 break;
andrewbonney 0:07919e3d6c56 1618 }
andrewbonney 0:07919e3d6c56 1619 }
andrewbonney 0:07919e3d6c56 1620 return 0;
andrewbonney 0:07919e3d6c56 1621 }
andrewbonney 0:07919e3d6c56 1622
andrewbonney 0:07919e3d6c56 1623 static int PTRCALL
andrewbonney 0:07919e3d6c56 1624 PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
andrewbonney 0:07919e3d6c56 1625 {
andrewbonney 0:07919e3d6c56 1626 for (;;) {
andrewbonney 0:07919e3d6c56 1627 switch (BYTE_TYPE(enc, ptr1)) {
andrewbonney 0:07919e3d6c56 1628 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 1629 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 1630 if (*ptr1++ != *ptr2++) \
andrewbonney 0:07919e3d6c56 1631 return 0;
andrewbonney 0:07919e3d6c56 1632 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
andrewbonney 0:07919e3d6c56 1633 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 1634 /* fall through */
andrewbonney 0:07919e3d6c56 1635 if (*ptr1++ != *ptr2++)
andrewbonney 0:07919e3d6c56 1636 return 0;
andrewbonney 0:07919e3d6c56 1637 break;
andrewbonney 0:07919e3d6c56 1638 case BT_NONASCII:
andrewbonney 0:07919e3d6c56 1639 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 1640 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 1641 case BT_COLON:
andrewbonney 0:07919e3d6c56 1642 #endif
andrewbonney 0:07919e3d6c56 1643 case BT_HEX:
andrewbonney 0:07919e3d6c56 1644 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 1645 case BT_NAME:
andrewbonney 0:07919e3d6c56 1646 case BT_MINUS:
andrewbonney 0:07919e3d6c56 1647 if (*ptr2++ != *ptr1++)
andrewbonney 0:07919e3d6c56 1648 return 0;
andrewbonney 0:07919e3d6c56 1649 if (MINBPC(enc) > 1) {
andrewbonney 0:07919e3d6c56 1650 if (*ptr2++ != *ptr1++)
andrewbonney 0:07919e3d6c56 1651 return 0;
andrewbonney 0:07919e3d6c56 1652 if (MINBPC(enc) > 2) {
andrewbonney 0:07919e3d6c56 1653 if (*ptr2++ != *ptr1++)
andrewbonney 0:07919e3d6c56 1654 return 0;
andrewbonney 0:07919e3d6c56 1655 if (MINBPC(enc) > 3) {
andrewbonney 0:07919e3d6c56 1656 if (*ptr2++ != *ptr1++)
andrewbonney 0:07919e3d6c56 1657 return 0;
andrewbonney 0:07919e3d6c56 1658 }
andrewbonney 0:07919e3d6c56 1659 }
andrewbonney 0:07919e3d6c56 1660 }
andrewbonney 0:07919e3d6c56 1661 break;
andrewbonney 0:07919e3d6c56 1662 default:
andrewbonney 0:07919e3d6c56 1663 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
andrewbonney 0:07919e3d6c56 1664 return 1;
andrewbonney 0:07919e3d6c56 1665 switch (BYTE_TYPE(enc, ptr2)) {
andrewbonney 0:07919e3d6c56 1666 case BT_LEAD2:
andrewbonney 0:07919e3d6c56 1667 case BT_LEAD3:
andrewbonney 0:07919e3d6c56 1668 case BT_LEAD4:
andrewbonney 0:07919e3d6c56 1669 case BT_NONASCII:
andrewbonney 0:07919e3d6c56 1670 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 1671 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 1672 case BT_COLON:
andrewbonney 0:07919e3d6c56 1673 #endif
andrewbonney 0:07919e3d6c56 1674 case BT_HEX:
andrewbonney 0:07919e3d6c56 1675 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 1676 case BT_NAME:
andrewbonney 0:07919e3d6c56 1677 case BT_MINUS:
andrewbonney 0:07919e3d6c56 1678 return 0;
andrewbonney 0:07919e3d6c56 1679 default:
andrewbonney 0:07919e3d6c56 1680 return 1;
andrewbonney 0:07919e3d6c56 1681 }
andrewbonney 0:07919e3d6c56 1682 }
andrewbonney 0:07919e3d6c56 1683 }
andrewbonney 0:07919e3d6c56 1684 /* not reached */
andrewbonney 0:07919e3d6c56 1685 }
andrewbonney 0:07919e3d6c56 1686
andrewbonney 0:07919e3d6c56 1687 static int PTRCALL
andrewbonney 0:07919e3d6c56 1688 PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
andrewbonney 0:07919e3d6c56 1689 const char *end1, const char *ptr2)
andrewbonney 0:07919e3d6c56 1690 {
andrewbonney 0:07919e3d6c56 1691 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
andrewbonney 0:07919e3d6c56 1692 if (ptr1 == end1)
andrewbonney 0:07919e3d6c56 1693 return 0;
andrewbonney 0:07919e3d6c56 1694 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
andrewbonney 0:07919e3d6c56 1695 return 0;
andrewbonney 0:07919e3d6c56 1696 }
andrewbonney 0:07919e3d6c56 1697 return ptr1 == end1;
andrewbonney 0:07919e3d6c56 1698 }
andrewbonney 0:07919e3d6c56 1699
andrewbonney 0:07919e3d6c56 1700 static int PTRFASTCALL
andrewbonney 0:07919e3d6c56 1701 PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
andrewbonney 0:07919e3d6c56 1702 {
andrewbonney 0:07919e3d6c56 1703 const char *start = ptr;
andrewbonney 0:07919e3d6c56 1704 for (;;) {
andrewbonney 0:07919e3d6c56 1705 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1706 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 1707 case BT_LEAD ## n: ptr += n; break;
andrewbonney 0:07919e3d6c56 1708 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 1709 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 1710 case BT_NONASCII:
andrewbonney 0:07919e3d6c56 1711 case BT_NMSTRT:
andrewbonney 0:07919e3d6c56 1712 #ifdef XML_NS
andrewbonney 0:07919e3d6c56 1713 case BT_COLON:
andrewbonney 0:07919e3d6c56 1714 #endif
andrewbonney 0:07919e3d6c56 1715 case BT_HEX:
andrewbonney 0:07919e3d6c56 1716 case BT_DIGIT:
andrewbonney 0:07919e3d6c56 1717 case BT_NAME:
andrewbonney 0:07919e3d6c56 1718 case BT_MINUS:
andrewbonney 0:07919e3d6c56 1719 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1720 break;
andrewbonney 0:07919e3d6c56 1721 default:
andrewbonney 0:07919e3d6c56 1722 return (int)(ptr - start);
andrewbonney 0:07919e3d6c56 1723 }
andrewbonney 0:07919e3d6c56 1724 }
andrewbonney 0:07919e3d6c56 1725 }
andrewbonney 0:07919e3d6c56 1726
andrewbonney 0:07919e3d6c56 1727 static const char * PTRFASTCALL
andrewbonney 0:07919e3d6c56 1728 PREFIX(skipS)(const ENCODING *enc, const char *ptr)
andrewbonney 0:07919e3d6c56 1729 {
andrewbonney 0:07919e3d6c56 1730 for (;;) {
andrewbonney 0:07919e3d6c56 1731 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1732 case BT_LF:
andrewbonney 0:07919e3d6c56 1733 case BT_CR:
andrewbonney 0:07919e3d6c56 1734 case BT_S:
andrewbonney 0:07919e3d6c56 1735 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1736 break;
andrewbonney 0:07919e3d6c56 1737 default:
andrewbonney 0:07919e3d6c56 1738 return ptr;
andrewbonney 0:07919e3d6c56 1739 }
andrewbonney 0:07919e3d6c56 1740 }
andrewbonney 0:07919e3d6c56 1741 }
andrewbonney 0:07919e3d6c56 1742
andrewbonney 0:07919e3d6c56 1743 static void PTRCALL
andrewbonney 0:07919e3d6c56 1744 PREFIX(updatePosition)(const ENCODING *enc,
andrewbonney 0:07919e3d6c56 1745 const char *ptr,
andrewbonney 0:07919e3d6c56 1746 const char *end,
andrewbonney 0:07919e3d6c56 1747 POSITION *pos)
andrewbonney 0:07919e3d6c56 1748 {
andrewbonney 0:07919e3d6c56 1749 while (ptr != end) {
andrewbonney 0:07919e3d6c56 1750 switch (BYTE_TYPE(enc, ptr)) {
andrewbonney 0:07919e3d6c56 1751 #define LEAD_CASE(n) \
andrewbonney 0:07919e3d6c56 1752 case BT_LEAD ## n: \
andrewbonney 0:07919e3d6c56 1753 ptr += n; \
andrewbonney 0:07919e3d6c56 1754 break;
andrewbonney 0:07919e3d6c56 1755 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
andrewbonney 0:07919e3d6c56 1756 #undef LEAD_CASE
andrewbonney 0:07919e3d6c56 1757 case BT_LF:
andrewbonney 0:07919e3d6c56 1758 pos->columnNumber = (XML_Size)-1;
andrewbonney 0:07919e3d6c56 1759 pos->lineNumber++;
andrewbonney 0:07919e3d6c56 1760 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1761 break;
andrewbonney 0:07919e3d6c56 1762 case BT_CR:
andrewbonney 0:07919e3d6c56 1763 pos->lineNumber++;
andrewbonney 0:07919e3d6c56 1764 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1765 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
andrewbonney 0:07919e3d6c56 1766 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1767 pos->columnNumber = (XML_Size)-1;
andrewbonney 0:07919e3d6c56 1768 break;
andrewbonney 0:07919e3d6c56 1769 default:
andrewbonney 0:07919e3d6c56 1770 ptr += MINBPC(enc);
andrewbonney 0:07919e3d6c56 1771 break;
andrewbonney 0:07919e3d6c56 1772 }
andrewbonney 0:07919e3d6c56 1773 pos->columnNumber++;
andrewbonney 0:07919e3d6c56 1774 }
andrewbonney 0:07919e3d6c56 1775 }
andrewbonney 0:07919e3d6c56 1776
andrewbonney 0:07919e3d6c56 1777 #undef DO_LEAD_CASE
andrewbonney 0:07919e3d6c56 1778 #undef MULTIBYTE_CASES
andrewbonney 0:07919e3d6c56 1779 #undef INVALID_CASES
andrewbonney 0:07919e3d6c56 1780 #undef CHECK_NAME_CASE
andrewbonney 0:07919e3d6c56 1781 #undef CHECK_NAME_CASES
andrewbonney 0:07919e3d6c56 1782 #undef CHECK_NMSTRT_CASE
andrewbonney 0:07919e3d6c56 1783 #undef CHECK_NMSTRT_CASES
andrewbonney 0:07919e3d6c56 1784
andrewbonney 0:07919e3d6c56 1785 #endif /* XML_TOK_IMPL_C */