Mbed port of the Simple Plain Xml parser. See http://code.google.com/p/spxml/ for more details. This library uses less memory and is much better suited to streaming data than TinyXML (doesn\'t use as much C++ features, and especially works without streams). See http://mbed.org/users/hlipka/notebook/xml-parsing/ for usage examples.

Dependents:   spxmltest_weather VFD_fontx2_weather weather_LCD_display News_LCD_display ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers spxmlreader.cpp Source File

spxmlreader.cpp

00001 /*
00002  * Copyright 2007 Stephen Liu
00003  * LGPL, see http://code.google.com/p/spxml/
00004  * For license terms, see the file COPYING along with this library.
00005  */
00006 
00007 #include <string.h>
00008 #include <ctype.h>
00009 #include <stdlib.h>
00010 #include <typeinfo>
00011 
00012 #include "spxmlparser.hpp"
00013 #include "spxmlreader.hpp"
00014 #include "spxmlutils.hpp"
00015 #include "spxmlstag.hpp"
00016 #include "spxmlevent.hpp"
00017 #include "spxmlcodec.hpp"
00018 
00019 //=========================================================
00020 
00021 SP_XmlReader :: SP_XmlReader()
00022 {
00023     mBuffer = new SP_XmlStringBuffer();
00024 }
00025 
00026 SP_XmlReader :: ~SP_XmlReader()
00027 {
00028     delete mBuffer;
00029 }
00030 
00031 void SP_XmlReader :: changeReader(
00032         SP_XmlPullParser * parser, SP_XmlReader * reader )
00033 {
00034     parser->changeReader( reader );
00035 }
00036 
00037 SP_XmlReader * SP_XmlReader :: getReader( SP_XmlPullParser * parser, int type )
00038 {
00039     return parser->getReader( type );
00040 }
00041 
00042 void SP_XmlReader :: setError( SP_XmlPullParser * parser, const char * error )
00043 {
00044     parser->setError( error );
00045 }
00046 
00047 void SP_XmlReader :: reset()
00048 {
00049     mBuffer->clean();
00050 }
00051 
00052 //=========================================================
00053 
00054 SP_XmlPIReader :: SP_XmlPIReader()
00055 {
00056 }
00057 
00058 SP_XmlPIReader :: ~SP_XmlPIReader()
00059 {
00060 }
00061 
00062 void SP_XmlPIReader :: read( SP_XmlPullParser * parser, char c )
00063 {
00064     if( '>' == c ) {
00065         changeReader( parser, getReader( parser, SP_XmlReader::ePCData ) );
00066     } else {
00067         mBuffer->append( c );
00068     }
00069 }
00070 
00071 SP_XmlPullEvent * SP_XmlPIReader :: getEvent( SP_XmlPullParser * parser )
00072 {
00073     SP_XmlPullEvent * retEvent = NULL;
00074 
00075     if( mBuffer->getSize() > 0 ) {
00076         char * begin = (char*)mBuffer->getBuffer();
00077         for( ; isspace( *begin ); ) begin++;
00078 
00079         char * end = begin;
00080         for( ; '\0' != *end && '?' != *end && ( ! isspace( *end ) ); ) end++;
00081 
00082         char savedChar = *end;
00083         *end = '\0';
00084 
00085         if( 0 == strcasecmp( begin, "xml" ) ) {
00086             *end = savedChar;
00087 
00088             retEvent = parseDocDeclEvent( parser, mBuffer );
00089         } else {
00090             SP_XmlPIEvent * piEvent = new SP_XmlPIEvent();
00091             piEvent->setTarget( begin );
00092 
00093             *end = savedChar;
00094 
00095             begin = end;
00096             for( ; isspace( *begin ); ) begin++;
00097 
00098             end = begin;
00099             for( ; '\0' != *end && '?' != *end; ) end++;
00100 
00101             piEvent->setData( begin, end - begin );
00102 
00103             retEvent = piEvent;
00104         }
00105     }
00106 
00107     return retEvent;
00108 }
00109 
00110 SP_XmlPullEvent * SP_XmlPIReader :: parseDocDeclEvent( SP_XmlPullParser * parser,
00111         SP_XmlStringBuffer * buffer )
00112 {
00113     SP_XmlDocDeclEvent * retEvent = NULL;
00114 
00115     SP_XmlSTagParser tagParser( parser->getEncoding() );
00116 
00117     tagParser.append( buffer->getBuffer(), buffer->getSize() );
00118     tagParser.append( " ", 1 );
00119 
00120     if( NULL == tagParser.getError() ) {
00121         SP_XmlStartTagEvent * event = tagParser.takeEvent();
00122 
00123         const char * version = event->getAttrValue( "version" );
00124         const char * encoding = event->getAttrValue( "encoding" );
00125         const char * standalone = event->getAttrValue( "standalone" );
00126 
00127         retEvent = new SP_XmlDocDeclEvent();
00128         retEvent->setVersion( NULL == version ? "" : version );
00129         retEvent->setEncoding( NULL == encoding ? "" : encoding );
00130         if( NULL != standalone ) {
00131             if( 0 == strcasecmp( "no", standalone ) ) {
00132                 retEvent->setStandalone( 0 );
00133             } else {
00134                 retEvent->setStandalone( 1 );
00135             }
00136         }
00137 
00138         delete event;
00139     } else {
00140         setError( parser, tagParser.getError() );
00141     }
00142 
00143     return retEvent;
00144 }
00145 
00146 //=========================================================
00147 
00148 SP_XmlStartTagReader :: SP_XmlStartTagReader()
00149 {
00150     mIsQuot = 0;
00151 }
00152 
00153 SP_XmlStartTagReader :: ~SP_XmlStartTagReader()
00154 {
00155 }
00156 
00157 void SP_XmlStartTagReader :: read( SP_XmlPullParser * parser, char c )
00158 {
00159     if( '>' == c && 0 == mIsQuot ) {
00160         changeReader( parser, getReader( parser, SP_XmlReader::ePCData ) );
00161     } else if( '/' == c && 0 == mIsQuot ) {
00162         SP_XmlReader * reader = getReader( parser, SP_XmlReader::eETag );
00163         const char * pos = mBuffer->getBuffer();
00164         for( ; isspace( *pos ); ) pos++;
00165         for( ; 0 == isspace( *pos ) && '\0' != *pos; pos++ ) {
00166             reader->read( parser, *pos );
00167         }
00168         changeReader( parser, reader );
00169     } else if( '<' == c && 0 == mIsQuot ) {
00170         setError( parser, "illegal char" );
00171     } else {
00172         mBuffer->append( c );
00173 
00174         if( 0 == mIsQuot ) {
00175             if( '\'' == c ) mIsQuot = 1;
00176             if( '"' == c ) mIsQuot = 2;
00177         } else {
00178             if( 1 == mIsQuot && '\'' == c ) mIsQuot = 0;
00179             if( 2 == mIsQuot && '"' == c ) mIsQuot = 0;
00180         }
00181     }
00182 }
00183 
00184 SP_XmlPullEvent * SP_XmlStartTagReader :: getEvent( SP_XmlPullParser * parser )
00185 {
00186     SP_XmlStartTagEvent * retEvent = NULL;
00187 
00188     SP_XmlSTagParser tagParser( parser->getEncoding() );
00189     tagParser.append( mBuffer->getBuffer(), mBuffer->getSize() );
00190     tagParser.append( " ", 1 );
00191 
00192     if( NULL == tagParser.getError() ) {
00193         retEvent = tagParser.takeEvent();
00194     } else {
00195         setError( parser, tagParser.getError() );
00196     }
00197 
00198     return retEvent;
00199 }
00200 
00201 void SP_XmlStartTagReader :: reset()
00202 {
00203     SP_XmlReader::reset();
00204     mIsQuot = 0;
00205 }
00206 
00207 //=========================================================
00208 
00209 SP_XmlEndTagReader :: SP_XmlEndTagReader()
00210 {
00211 }
00212 
00213 SP_XmlEndTagReader :: ~SP_XmlEndTagReader()
00214 {
00215 }
00216 
00217 void SP_XmlEndTagReader :: read( SP_XmlPullParser * parser,    char c )
00218 {
00219     if( '>' == c ) {
00220         changeReader( parser, getReader( parser, SP_XmlReader::ePCData ) );
00221     } else if( '/' == c ) {
00222         setError( parser, "illegal name char" );
00223     } else {
00224         mBuffer->append( c );
00225     }
00226 }
00227 
00228 SP_XmlPullEvent * SP_XmlEndTagReader :: getEvent( SP_XmlPullParser * parser )
00229 {
00230     const char * end = mBuffer->getBuffer() + mBuffer->getSize() - 1;
00231 
00232     for( ; end > mBuffer->getBuffer() && isspace( *end ); ) end--;
00233 
00234     SP_XmlEndTagEvent * retEvent = new SP_XmlEndTagEvent();
00235     retEvent->setText( mBuffer->getBuffer(), end - mBuffer->getBuffer() + 1 );
00236 
00237     return retEvent;
00238 }
00239 
00240 //=========================================================
00241 
00242 SP_XmlPCDataReader :: SP_XmlPCDataReader()
00243 {
00244 }
00245 
00246 SP_XmlPCDataReader :: ~SP_XmlPCDataReader()
00247 {
00248 }
00249 
00250 void SP_XmlPCDataReader :: read( SP_XmlPullParser * parser, char c )
00251 {
00252     if( '<' == c ) {
00253         SP_XmlReader * reader = getReader( parser, SP_XmlReader::eLBracket );
00254         reader->read( parser, c );
00255         changeReader( parser, reader );
00256     } else {
00257         mBuffer->append( c );
00258     }
00259 }
00260 
00261 SP_XmlPullEvent * SP_XmlPCDataReader :: getEvent( SP_XmlPullParser * parser )
00262 {
00263     SP_XmlCDataEvent * retEvent = NULL;
00264 
00265     int ignore = 0;
00266 
00267     if( 0 != parser->getIgnoreWhitespace() ) {
00268         ignore = 1;
00269         for( const char * pos = mBuffer->getBuffer(); '\0' != *pos; pos++ ) {
00270             if( !isspace( *pos ) ) {
00271                 ignore = 0;
00272                 break;
00273             }
00274         }
00275     }
00276 
00277     if( 0 == ignore && mBuffer->getSize() > 0 ) {
00278         retEvent = new SP_XmlCDataEvent();
00279         SP_XmlStringBuffer buffer;
00280         SP_XmlStringCodec::decode( parser->getEncoding(), mBuffer->getBuffer(), &buffer );
00281         retEvent->setText( buffer.getBuffer(), buffer.getSize() );
00282     }
00283 
00284     return retEvent;
00285 }
00286 
00287 //=========================================================
00288 
00289 SP_XmlCDataSectionReader :: SP_XmlCDataSectionReader()
00290 {
00291 }
00292 
00293 SP_XmlCDataSectionReader :: ~SP_XmlCDataSectionReader()
00294 {
00295 }
00296 
00297 void SP_XmlCDataSectionReader :: read( SP_XmlPullParser * parser, char c )
00298 {
00299     if( '>' == c && mBuffer->getSize() > 2 ) {
00300         char last1 = mBuffer->getBuffer()[ mBuffer->getSize() - 1 ];
00301         char last2 = mBuffer->getBuffer()[ mBuffer->getSize() - 2 ];
00302 
00303         if( ']' == last1 && ']' == last2 ) {
00304             changeReader( parser, getReader( parser, SP_XmlReader::ePCData ) );
00305         } else {
00306             mBuffer->append( c );
00307         }
00308     } else {
00309         mBuffer->append( c );
00310     }
00311 }
00312 
00313 SP_XmlPullEvent * SP_XmlCDataSectionReader :: getEvent( SP_XmlPullParser * parser )
00314 {
00315     SP_XmlCDataEvent * retEvent = NULL;
00316 
00317     int len = mBuffer->getSize();
00318     const char * data = mBuffer->getBuffer();
00319     if( 0 == strncmp( data, "CDATA[", strlen( "CDATA[" ) ) ) {
00320         data += strlen( "CDATA[" );
00321         len -= strlen( "CDATA[" );
00322     }
00323 
00324     int ignore = 0;
00325     if( 0 != parser->getIgnoreWhitespace() ) {
00326         ignore = 1;
00327         for( int i = 0; i < len - 2; i++ ) {
00328             if( !isspace( data[i] ) ) {
00329                 ignore = 0;
00330                 break;
00331             }
00332         }
00333     }
00334 
00335     if( 0 == ignore && len > 2 ) {
00336         retEvent = new SP_XmlCDataEvent();
00337         retEvent->setText( data, len - 2 );
00338     }
00339 
00340     return retEvent;
00341 }
00342 
00343 //=========================================================
00344 
00345 SP_XmlCommentReader :: SP_XmlCommentReader()
00346 {
00347 }
00348 
00349 SP_XmlCommentReader :: ~SP_XmlCommentReader()
00350 {
00351 }
00352 
00353 void SP_XmlCommentReader :: read( SP_XmlPullParser * parser, char c )
00354 {
00355     if( '>' == c && mBuffer->getSize() >= 2 ) {
00356         int size = mBuffer->getSize();
00357         if( '-' == mBuffer->getBuffer()[ size - 1 ]
00358                 && '-' == mBuffer->getBuffer()[ size - 2 ] ) {
00359             changeReader( parser, getReader( parser, SP_XmlReader::ePCData ) );
00360         } else {
00361             mBuffer->append( c );
00362         }
00363     } else {
00364         mBuffer->append( c );
00365     }
00366 }
00367 
00368 SP_XmlPullEvent * SP_XmlCommentReader :: getEvent( SP_XmlPullParser * parser )
00369 {
00370     SP_XmlCommentEvent * retEvent = new SP_XmlCommentEvent();
00371 
00372     retEvent->setText( mBuffer->getBuffer(), mBuffer->getSize() - 2 );
00373 
00374     return retEvent;
00375 }
00376 
00377 //=========================================================
00378 
00379 SP_XmlDocTypeReader :: SP_XmlDocTypeReader()
00380 {
00381 }
00382 
00383 SP_XmlDocTypeReader :: ~SP_XmlDocTypeReader()
00384 {
00385 }
00386 
00387 void SP_XmlDocTypeReader :: read( SP_XmlPullParser * parser, char c )
00388 {
00389     if( '>' == c ) {
00390         if( NULL != strchr( mBuffer->getBuffer(), '[' ) ) {
00391             char last = mBuffer->getBuffer()[ mBuffer->getSize() - 1 ];
00392             if( ']' == last ) {
00393                 changeReader( parser, getReader( parser, SP_XmlReader::ePCData ) );
00394             } else {
00395                 mBuffer->append( c );
00396             }
00397         } else {
00398             changeReader( parser, getReader( parser, SP_XmlReader::ePCData ) );
00399         }
00400     } else {
00401         mBuffer->append( c );
00402     }
00403 }
00404 
00405 SP_XmlPullEvent * SP_XmlDocTypeReader :: getEvent( SP_XmlPullParser * parser )
00406 {
00407     SP_XmlDocTypeEvent * retEvent = NULL;
00408 
00409     SP_XmlSTagParser tagParser( parser->getEncoding() );
00410 
00411     tagParser.append( "DOCTYPE ", strlen( "DOCTYPE " ) );
00412     tagParser.append( mBuffer->getBuffer(), mBuffer->getSize() );
00413     tagParser.append( " ", 1 );
00414     if( NULL == tagParser.getError() ) {
00415         SP_XmlStartTagEvent * event = tagParser.takeEvent();
00416 
00417         retEvent = new SP_XmlDocTypeEvent();
00418 
00419         for( int i = 0; i < event->getAttrCount(); i += 2 ) {
00420             const char * name = event->getAttr( i, NULL );
00421             if( 0 == strcmp( name, "DOCTYPE" ) ) {
00422                 name = event->getAttr( i + 1, NULL );
00423                 retEvent->setName( NULL == name ? "" : name );    
00424             } else if( 0 == strcmp( name, "PUBLIC" ) ) {
00425                 name = event->getAttr( i + 1, NULL );
00426                 retEvent->setPublicID( NULL == name ? "" : name );
00427             } else if( 0 == strcmp( name, "SYSTEM" ) ) {
00428                 name = event->getAttr( i + 1, NULL );
00429                 retEvent->setSystemID( NULL == name ? "" : name );
00430             } else if( NULL != strstr( name, ".dtd" ) ) {
00431                 retEvent->setDTD( name );
00432             }
00433         }
00434 
00435         delete event;
00436     } else {
00437         //setError( parser, tagParser.getError() );
00438     }
00439 
00440     return retEvent;
00441 }
00442 
00443 //=========================================================
00444 
00445 SP_XmlLeftBracketReader :: SP_XmlLeftBracketReader()
00446 {
00447     mHasReadBracket = 0;
00448 }
00449 
00450 SP_XmlLeftBracketReader :: ~SP_XmlLeftBracketReader()
00451 {
00452 }
00453 
00454 void SP_XmlLeftBracketReader :: read( SP_XmlPullParser * parser, char c )
00455 {
00456     if( 0 == mHasReadBracket ) {
00457         if( isspace( c ) ) {
00458             //skip
00459         } else if( '<' == c ) {
00460             mHasReadBracket = 1;
00461         }
00462     } else {
00463         if( '?' == c ) {
00464             changeReader( parser, getReader( parser, SP_XmlReader::ePI ) );
00465         } else if( '/' == c ) {
00466             changeReader( parser, getReader( parser, SP_XmlReader::eETag ) );
00467         } else if( '!' == c ) {
00468             changeReader( parser, getReader( parser, SP_XmlReader::eSign ) );
00469         } else if( SP_XmlStringCodec::isNameChar( parser->getEncoding(), c ) ) {
00470             SP_XmlReader * reader = getReader( parser, SP_XmlReader::eSTag );
00471             reader->read( parser, c );
00472             changeReader( parser, reader );
00473         } else {
00474             setError( parser, "not well-formed" );
00475         }
00476     }
00477 }
00478 
00479 SP_XmlPullEvent * SP_XmlLeftBracketReader :: getEvent( SP_XmlPullParser * parser )
00480 {
00481     return NULL;
00482 }
00483 
00484 void SP_XmlLeftBracketReader :: reset()
00485 {
00486     SP_XmlReader::reset();
00487     mHasReadBracket = 0;
00488 }
00489 
00490 //=========================================================
00491 
00492 SP_XmlSignReader :: SP_XmlSignReader()
00493 {
00494 }
00495 
00496 SP_XmlSignReader :: ~SP_XmlSignReader()
00497 {
00498 }
00499 
00500 void SP_XmlSignReader :: read( SP_XmlPullParser * parser, char c )
00501 {
00502     if( '[' == c ) {
00503         changeReader( parser, getReader( parser, SP_XmlReader::eCDataSection ) );
00504     } else if( '-' == c ) {
00505         changeReader( parser, getReader( parser, SP_XmlReader::eComment ) );
00506     } else if( isupper( c ) ) {
00507         SP_XmlReader * reader = getReader( parser, SP_XmlReader::eDocType );
00508         reader->read( parser, c );
00509         changeReader( parser, reader );
00510     } else {
00511         setError( parser, "not well-formed" );
00512     }
00513 }
00514 
00515 SP_XmlPullEvent * SP_XmlSignReader :: getEvent( SP_XmlPullParser * parser )
00516 {
00517     return NULL;
00518 }
00519 
00520 //=========================================================
00521 
00522 SP_XmlReaderPool :: SP_XmlReaderPool()
00523 {
00524     mReaderList = (SP_XmlReader**)malloc( sizeof( void * ) * SP_XmlReader::MAX_READER );
00525     memset( mReaderList, 0, sizeof( void * ) * SP_XmlReader::MAX_READER );
00526 }
00527 
00528 SP_XmlReaderPool :: ~SP_XmlReaderPool()
00529 {
00530     for( int i = 0; i < SP_XmlReader::MAX_READER; i++ ) {
00531         if( NULL != mReaderList[i] ) {
00532             delete mReaderList[i];
00533         }
00534     }
00535     free( mReaderList );
00536 }
00537 
00538 SP_XmlReader * SP_XmlReaderPool :: borrow( int type )
00539 {
00540     SP_XmlReader * reader = NULL;
00541 
00542     if( type >= 0 && type < SP_XmlReader::MAX_READER ) {
00543         reader = mReaderList[ type ];
00544         if( NULL == reader ) {
00545             switch( type ) {
00546             case SP_XmlReader::ePI: reader = new SP_XmlPIReader(); break;
00547             case SP_XmlReader::eSTag: reader = new SP_XmlStartTagReader(); break;
00548             case SP_XmlReader::eETag: reader = new SP_XmlEndTagReader(); break;
00549             case SP_XmlReader::ePCData: reader = new SP_XmlPCDataReader(); break;
00550             case SP_XmlReader::eCDataSection: reader = new SP_XmlCDataSectionReader(); break;
00551             case SP_XmlReader::eComment: reader = new SP_XmlCommentReader(); break;
00552             case SP_XmlReader::eDocType: reader = new SP_XmlDocTypeReader(); break;
00553             case SP_XmlReader::eLBracket: reader = new SP_XmlLeftBracketReader(); break;
00554             case SP_XmlReader::eSign: reader = new SP_XmlSignReader(); break;
00555             }
00556             mReaderList[ type ] = reader;
00557         }
00558     }
00559 
00560     //printf( "\nborrow change: %s\n", typeid( *reader ).name() );
00561 
00562     return reader;
00563 }
00564 
00565 void SP_XmlReaderPool :: save( SP_XmlReader * reader )
00566 {
00567     //printf( "\nreturn change: %s\n", typeid( *reader ).name() );
00568     reader->reset();
00569 }
00570 
00571 //=========================================================
00572