Download NHK English news podcast automatically. XML Parser "spxml" is used. This application requires mpod mother board. See also http://mbed.org/users/geodenx/notebook/mpod/

Dependencies:   BlinkLed HTTPClient EthernetInterface FatFileSystemCpp MSCFileSystem spxml mbed-rtos mbed

Fork of mpod_nhk_english by Satoshi Togawa

Download NHK English news podcast automatically.
XML Parser "spxml" is used.
This application requires mpod mother board.
See also http://mbed.org/users/geodenx/notebook/mpod/

Committer:
togayan
Date:
Mon Aug 20 13:27:17 2012 +0000
Revision:
4:7dae52cf560f
1st revision

Who changed what in which revision?

UserRevisionLine numberNew contents of line
togayan 4:7dae52cf560f 1 /*
togayan 4:7dae52cf560f 2 * Copyright 2007 Stephen Liu
togayan 4:7dae52cf560f 3 * For license terms, see the file COPYING along with this library.
togayan 4:7dae52cf560f 4 */
togayan 4:7dae52cf560f 5
togayan 4:7dae52cf560f 6 #include <string.h>
togayan 4:7dae52cf560f 7 #include <stdlib.h>
togayan 4:7dae52cf560f 8 #include <ctype.h>
togayan 4:7dae52cf560f 9
togayan 4:7dae52cf560f 10 #include "spxmlcodec.hpp"
togayan 4:7dae52cf560f 11 #include "spxmlutils.hpp"
togayan 4:7dae52cf560f 12
togayan 4:7dae52cf560f 13 const char * SP_XmlStringCodec :: DEFAULT_ENCODING = "utf-8";
togayan 4:7dae52cf560f 14
togayan 4:7dae52cf560f 15 const char SP_XmlStringCodec :: XML_CHARS [] =
togayan 4:7dae52cf560f 16 { '<', '>', '&', '\'', '"' };
togayan 4:7dae52cf560f 17 const char * SP_XmlStringCodec :: ESC_CHARS [] =
togayan 4:7dae52cf560f 18 { "&lt;", "&gt;", "&amp;", "&apos;", "&quot;" };
togayan 4:7dae52cf560f 19
togayan 4:7dae52cf560f 20 int SP_XmlStringCodec :: decode( const char * encoding, const char * encodeValue,
togayan 4:7dae52cf560f 21 SP_XmlStringBuffer * outBuffer )
togayan 4:7dae52cf560f 22 {
togayan 4:7dae52cf560f 23 int isUtf8 = ( 0 == strcasecmp( encoding, "utf-8" ) );
togayan 4:7dae52cf560f 24
togayan 4:7dae52cf560f 25 const char * pos = encodeValue;
togayan 4:7dae52cf560f 26 for( ; '\0' != *pos; ) {
togayan 4:7dae52cf560f 27 if( '&' == *pos ) {
togayan 4:7dae52cf560f 28 int index = -1;
togayan 4:7dae52cf560f 29 int len = 0;
togayan 4:7dae52cf560f 30 for( int i = 0; i < (int)( sizeof( ESC_CHARS ) / sizeof( ESC_CHARS[0] ) ); i++ ) {
togayan 4:7dae52cf560f 31 len = strlen( ESC_CHARS[ i ] );
togayan 4:7dae52cf560f 32 if( 0 == strncmp( pos, ESC_CHARS[i], len ) ) {
togayan 4:7dae52cf560f 33 index = i;
togayan 4:7dae52cf560f 34 break;
togayan 4:7dae52cf560f 35 }
togayan 4:7dae52cf560f 36 }
togayan 4:7dae52cf560f 37 if( index >= 0 ) {
togayan 4:7dae52cf560f 38 outBuffer->append( XML_CHARS[ index ] );
togayan 4:7dae52cf560f 39 pos += len;
togayan 4:7dae52cf560f 40 } else {
togayan 4:7dae52cf560f 41 char * next = "";
togayan 4:7dae52cf560f 42 int ch = 0;
togayan 4:7dae52cf560f 43 if( '#' == *( pos + 1 ) ) {
togayan 4:7dae52cf560f 44 if( 'x' == *( pos + 2 ) ) {
togayan 4:7dae52cf560f 45 ch = strtol( pos + 3, &next, 16 );
togayan 4:7dae52cf560f 46 } else {
togayan 4:7dae52cf560f 47 ch = strtol( pos + 2, &next, 10 );
togayan 4:7dae52cf560f 48 }
togayan 4:7dae52cf560f 49 }
togayan 4:7dae52cf560f 50
togayan 4:7dae52cf560f 51 // TODO: fully support xml entity, currently only support unicode entity
togayan 4:7dae52cf560f 52 if( ';' == *next && 0 != ch ) {
togayan 4:7dae52cf560f 53 if( isUtf8 ) {
togayan 4:7dae52cf560f 54 SP_XmlUtf8Codec::uni2utf8( ch, outBuffer );
togayan 4:7dae52cf560f 55 } else {
togayan 4:7dae52cf560f 56 outBuffer->append( ch );
togayan 4:7dae52cf560f 57 }
togayan 4:7dae52cf560f 58 pos = next + 1;
togayan 4:7dae52cf560f 59 } else {
togayan 4:7dae52cf560f 60 outBuffer->append( *pos++ );
togayan 4:7dae52cf560f 61 }
togayan 4:7dae52cf560f 62 }
togayan 4:7dae52cf560f 63 } else {
togayan 4:7dae52cf560f 64 outBuffer->append( *pos++ );
togayan 4:7dae52cf560f 65 }
togayan 4:7dae52cf560f 66 }
togayan 4:7dae52cf560f 67
togayan 4:7dae52cf560f 68 return 0;
togayan 4:7dae52cf560f 69 }
togayan 4:7dae52cf560f 70
togayan 4:7dae52cf560f 71 int SP_XmlStringCodec :: encode( const char * encoding, const char * decodeValue,
togayan 4:7dae52cf560f 72 SP_XmlStringBuffer * outBuffer )
togayan 4:7dae52cf560f 73 {
togayan 4:7dae52cf560f 74 int isUtf8 = ( 0 == strcasecmp( encoding, "utf-8" ) );
togayan 4:7dae52cf560f 75
togayan 4:7dae52cf560f 76 const unsigned char * pos = (unsigned char *)decodeValue;
togayan 4:7dae52cf560f 77 for( ; '\0' != *pos; pos++ ) {
togayan 4:7dae52cf560f 78 int index = -1;
togayan 4:7dae52cf560f 79 for( int i = 0; i < (int)( sizeof( XML_CHARS ) / sizeof( XML_CHARS[0] ) ); i++ ) {
togayan 4:7dae52cf560f 80 if( XML_CHARS[i] == *pos ) {
togayan 4:7dae52cf560f 81 index = i;
togayan 4:7dae52cf560f 82 break;
togayan 4:7dae52cf560f 83 }
togayan 4:7dae52cf560f 84 }
togayan 4:7dae52cf560f 85 if( index >= 0 && '\'' != *pos ) {
togayan 4:7dae52cf560f 86 outBuffer->append( ESC_CHARS[ index ] );
togayan 4:7dae52cf560f 87 } else {
togayan 4:7dae52cf560f 88 if( isUtf8 ) {
togayan 4:7dae52cf560f 89 int ch = 0;
togayan 4:7dae52cf560f 90 int len = SP_XmlUtf8Codec::utf82uni( (unsigned char*)pos, &ch );
togayan 4:7dae52cf560f 91
togayan 4:7dae52cf560f 92 if( len > 0 ) {
togayan 4:7dae52cf560f 93 pos += len - 1;
togayan 4:7dae52cf560f 94
togayan 4:7dae52cf560f 95 char temp[ 32 ] = { 0 };
togayan 4:7dae52cf560f 96 snprintf( temp, sizeof( temp ), "&#%d;", ch );
togayan 4:7dae52cf560f 97 outBuffer->append( temp );
togayan 4:7dae52cf560f 98 } else {
togayan 4:7dae52cf560f 99 outBuffer->append( *pos );
togayan 4:7dae52cf560f 100 }
togayan 4:7dae52cf560f 101 } else {
togayan 4:7dae52cf560f 102 if( *pos < 32 ) {
togayan 4:7dae52cf560f 103 char temp[ 32 ] = { 0 };
togayan 4:7dae52cf560f 104 snprintf( temp, sizeof( temp ), "&#%d;", *pos );
togayan 4:7dae52cf560f 105 outBuffer->append( temp );
togayan 4:7dae52cf560f 106 } else {
togayan 4:7dae52cf560f 107 outBuffer->append( *pos );
togayan 4:7dae52cf560f 108 }
togayan 4:7dae52cf560f 109 }
togayan 4:7dae52cf560f 110 }
togayan 4:7dae52cf560f 111 }
togayan 4:7dae52cf560f 112
togayan 4:7dae52cf560f 113 return 0;
togayan 4:7dae52cf560f 114 }
togayan 4:7dae52cf560f 115
togayan 4:7dae52cf560f 116 int SP_XmlStringCodec :: isNameChar( const char * encoding, char c )
togayan 4:7dae52cf560f 117 {
togayan 4:7dae52cf560f 118 if( 0 == strcasecmp( encoding, "utf-8" ) ) {
togayan 4:7dae52cf560f 119 return 1;
togayan 4:7dae52cf560f 120 } else {
togayan 4:7dae52cf560f 121 return isalnum(c) || c == ':' || c == '-' || c == '.' || c == '_';
togayan 4:7dae52cf560f 122 }
togayan 4:7dae52cf560f 123 }
togayan 4:7dae52cf560f 124
togayan 4:7dae52cf560f 125 //=========================================================
togayan 4:7dae52cf560f 126
togayan 4:7dae52cf560f 127 int SP_XmlUtf8Codec :: utf82uni( const unsigned char * utf8, int * ch )
togayan 4:7dae52cf560f 128 {
togayan 4:7dae52cf560f 129 int len = 0;
togayan 4:7dae52cf560f 130
togayan 4:7dae52cf560f 131 unsigned char c1 = 0, c2 = 0, c3 = 0, c4 = 0;
togayan 4:7dae52cf560f 132
togayan 4:7dae52cf560f 133 if( *utf8 >= 0x80 ) {
togayan 4:7dae52cf560f 134 c1 = *utf8++;
togayan 4:7dae52cf560f 135
togayan 4:7dae52cf560f 136 if( c1 < 0xE0 ) { // 2 bytes
togayan 4:7dae52cf560f 137 if( '\0' != ( c2 = *utf8 ) ) {
togayan 4:7dae52cf560f 138 *ch = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
togayan 4:7dae52cf560f 139 len = 2;
togayan 4:7dae52cf560f 140 }
togayan 4:7dae52cf560f 141 } else if( c1 < 0xF0 ) { // 3 bytes
togayan 4:7dae52cf560f 142 if( '\0' != ( c2 = *utf8++ ) && '\0' != ( c3 = *utf8 ) ) {
togayan 4:7dae52cf560f 143 *ch = ((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6)| (c3 & 0x3F);
togayan 4:7dae52cf560f 144 len = 3;
togayan 4:7dae52cf560f 145 }
togayan 4:7dae52cf560f 146 } else { // 4 bytes
togayan 4:7dae52cf560f 147 if( '\0' != ( c2 = *utf8++ ) && '\0' != ( c3 = *utf8++ )
togayan 4:7dae52cf560f 148 && '\0' != ( c4 = *utf8 ) ) {
togayan 4:7dae52cf560f 149 *ch = ((c1 & 0x07) << 16) | ((c2 & 0x3F) << 12)
togayan 4:7dae52cf560f 150 | ((c3 & 0x3F) << 6) | (c4 & 0x3F);
togayan 4:7dae52cf560f 151 len = 4;
togayan 4:7dae52cf560f 152 }
togayan 4:7dae52cf560f 153 }
togayan 4:7dae52cf560f 154 }
togayan 4:7dae52cf560f 155
togayan 4:7dae52cf560f 156 return len;
togayan 4:7dae52cf560f 157 }
togayan 4:7dae52cf560f 158
togayan 4:7dae52cf560f 159 void SP_XmlUtf8Codec :: uni2utf8( int ch, SP_XmlStringBuffer * outBuffer )
togayan 4:7dae52cf560f 160 {
togayan 4:7dae52cf560f 161 if( ch < 0x80 ) outBuffer->append( ch );
togayan 4:7dae52cf560f 162 else if( ch < 0x800 ) {
togayan 4:7dae52cf560f 163 outBuffer->append( 0xC0 | ( ch >> 6 ) );
togayan 4:7dae52cf560f 164 outBuffer->append( 0x80 | ( ch & 0x3F ) );
togayan 4:7dae52cf560f 165 } else if( ch < 0x10000 ) {
togayan 4:7dae52cf560f 166 outBuffer->append( 0xE0 | ( ch >> 12 ) );
togayan 4:7dae52cf560f 167 outBuffer->append( 0x80 | ( ( ch >> 6 ) & 0x3F ) );
togayan 4:7dae52cf560f 168 outBuffer->append( 0x80 | ( ch & 0x3F ) );
togayan 4:7dae52cf560f 169 } else if( ch < 0x200000 ) {
togayan 4:7dae52cf560f 170 outBuffer->append( 0xF0 | ( ch >> 18 ) );
togayan 4:7dae52cf560f 171 outBuffer->append( 0x80 | ( ( ch >> 12 ) & 0x3F ) );
togayan 4:7dae52cf560f 172 outBuffer->append( 0x80 | ( ( ch >> 6 ) & 0x3F ) );
togayan 4:7dae52cf560f 173 outBuffer->append( 0x80 | ( ch & 0x3F ) );
togayan 4:7dae52cf560f 174 }
togayan 4:7dae52cf560f 175 }