From 2ff101097feedf85b0ab0163983159200fc146a2 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Sat, 14 Dec 2019 21:18:59 -0800 Subject: Json provides line/char in errors now. All errors are also rewoked and the parser state is tracked much better. Also fixed a parser issue where it would error correctly, but report it poorly when an object started with something other than a string. --- src/unstable/json.cpp | 208 ++++++++++++++++++++++++++++++-------------------- src/unstable/json.h | 39 ++++++---- 2 files changed, 151 insertions(+), 96 deletions(-) diff --git a/src/unstable/json.cpp b/src/unstable/json.cpp index f6a8d52..9745a7f 100644 --- a/src/unstable/json.cpp +++ b/src/unstable/json.cpp @@ -1,12 +1,13 @@ #include "bu/json.h" #include "bu/staticmembuf.h" #include "bu/membuf.h" +#include "bu/exceptionparse.h" #include "bu/sio.h" #include -#define next( txt ) readChar( c, sInput, "Unexpected end of stream while reading " txt "." ) +#define next( txt ) readChar( ps, "Unexpected end of stream while reading " txt "." ) Bu::Json::Json() : eType( Null ) @@ -75,10 +76,10 @@ Bu::Json::Json( Bu::Stream &sInput ) : parse( sInput ); } -Bu::Json::Json( Bu::UtfChar &c, Bu::Stream &sInput ) : +Bu::Json::Json( Bu::Json::ParseState &ps ) : eType( Invalid ) { - parse( c, sInput ); + parse( ps ); } Bu::Json::Json( const Json &rSrc ) : @@ -294,10 +295,10 @@ void Bu::Json::parse( Bu::Stream &sInput ) { reset(); - Bu::UtfChar c; + ParseState ps( sInput ); next("json"); - parse( c, sInput ); + parse( ps ); } void Bu::Json::parse( const Bu::String &sInput ) @@ -306,40 +307,42 @@ void Bu::Json::parse( const Bu::String &sInput ) parse( mb ); } -void Bu::Json::parse( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parse( ParseState &ps ) { - while( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) + while( ps.c == ' ' || ps.c == '\t' || ps.c == '\r' || ps.c == '\n' ) { next( "json" ); } - if( c == '"' ) + if( ps.c == '"' ) { // String - parseString( c, sInput ); + parseString( ps ); } - else if( c == '{' ) + else if( ps.c == '{' ) { // Object - parseObject( c, sInput ); + parseObject( ps ); } - else if( c == '[' ) + else if( ps.c == '[' ) { // Array - parseArray( c, sInput ); + parseArray( ps ); } - else if( c == '-' || (c >= '0' && c <= '9') ) + else if( ps.c == '-' || (ps.c >= '0' && ps.c <= '9') ) { // Number -- apparently they can't start with a period - parseNumber( c, sInput ); + parseNumber( ps ); } - else if( c == 't' || c == 'f' || c == 'n' ) + else if( ps.c == 't' || ps.c == 'f' || ps.c == 'n' ) { // True / false / null - parseLiteral( c, sInput ); + parseLiteral( ps ); } else { - throw Bu::ExceptionBase("Invalid characters in json stream."); + ps.error( + Bu::String("Invalid json: Invalid character: '%1'.").arg( (char)ps.c ) + ); } } @@ -382,7 +385,7 @@ void Bu::Json::write( Bu::Stream &sOutput ) const switch( eType ) { case Invalid: - throw Bu::ExceptionBase("Invalid type in json"); + throw Bu::ExceptionBase("Invalid type in json."); break; case Object: @@ -537,22 +540,21 @@ bool Bu::Json::operator==( const Bu::String &rRhs ) return (*uDat.pString) == rRhs; } -void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, - Bu::UtfString &sOut ) +void Bu::Json::parseString( Bu::Json::ParseState &ps, Bu::UtfString &sOut ) { - skipWs( c, sInput ); + skipWs( ps ); bool bEscape = false; for(;;) { next( "string" ); if( bEscape ) { - switch( c ) + switch( ps.c ) { case '"': case '\\': case '/': - sOut += c; + sOut += ps.c; break; case 'b': @@ -580,8 +582,9 @@ void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, break; default: - throw Bu::ExceptionBase( - "Invalid escape sequence encountered in string." + ps.error( + Bu::String("Invalid json: Invalid escape sequence: " + " '\\%1'.").arg( (char)ps.c ) ); break; } @@ -589,37 +592,37 @@ void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, } else { - if( c == '\\' ) + if( ps.c == '\\' ) bEscape = true; - else if( c == '"' ) + else if( ps.c == '"' ) { - readChar( c, sInput ); + readChar( ps ); break; } else - sOut += c; + sOut += ps.c; } } } -void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseString( Bu::Json::ParseState &ps ) { eType = String; uDat.pString = new Bu::UtfString(); - parseString( c, sInput, *uDat.pString ); + parseString( ps, *uDat.pString ); } -void Bu::Json::parseObject( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseObject( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); eType = Object; uDat.pObject = new JsonHash(); next( "object" ); - skipWs( c, sInput ); + skipWs( ps ); // Check to see if it's an empty object. - if( c == '}' ) + if( ps.c == '}' ) { next("object"); return; @@ -627,35 +630,45 @@ void Bu::Json::parseObject( Bu::UtfChar &c, Bu::Stream &sInput ) for(;;) { + skipWs( ps ); + if( ps.c != '"' ) + { + ps.error( + Bu::String("Invalid json: expected string as key in object, " + "found '%1'.").arg( (char)ps.c ) + ); + } Bu::UtfString sKey; - parseString( c, sInput, sKey ); - skipWs( c, sInput ); - if( c != ':' ) + parseString( ps, sKey ); + skipWs( ps ); + if( ps.c != ':' ) { - throw Bu::ExceptionBase( - "Invalid json, expected colon after key in object." + ps.error( + Bu::String("Invalid json: expected colon after key in object, " + "found '%1'.").arg( (char)ps.c ) ); } next("object"); - uDat.pObject->insert( sKey, new Json( c, sInput ) ); - skipWs( c, sInput ); - if( c == '}' ) + uDat.pObject->insert( sKey, new Json( ps ) ); + skipWs( ps ); + if( ps.c == '}' ) { - readChar( c, sInput ); + readChar( ps ); break; } - else if( c == ',' ) + else if( ps.c == ',' ) next( "object" ); else - throw Bu::ExceptionBase( - "Invalid json, expected comma or } after value in object." + ps.error( + Bu::String("Invalid json: expected comma or } after value " + "in object, found '%1'.").arg( (char)ps.c ) ); } } -void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseArray( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); eType = Array; uDat.pArray = new JsonList(); @@ -663,7 +676,7 @@ void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) next("array"); // Check to see if it's an empty array. - if( c == ']' ) + if( ps.c == ']' ) { next("array"); return; @@ -671,74 +684,78 @@ void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) for(;;) { - uDat.pArray->append( new Json( c, sInput ) ); - skipWs( c, sInput ); - if( c == ']' ) + uDat.pArray->append( new Json( ps ) ); + skipWs( ps ); + if( ps.c == ']' ) { - readChar( c, sInput ); + readChar( ps ); break; } - else if( c == ',' ) + else if( ps.c == ',' ) { next("array"); continue; } else { - throw Bu::ExceptionBase( - "Invalid json, expected comma or ] after value in array." + ps.error( + Bu::String("Invalid json: expected comma or ] after value " + "in array, found '%1'.").arg( (char)ps.c ) ); } } } -void Bu::Json::parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseNumber( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); Bu::String sBuf; - if( c == '-' ) + if( ps.c == '-' ) { - sBuf += c; + sBuf += ps.c; next( "number" ); } bool bIntPart = true; do { - if( c >= '0' && c <= '9' ) - sBuf += c; - else if( c == '.' && bIntPart == true ) + if( ps.c >= '0' && ps.c <= '9' ) + sBuf += ps.c; + else if( ps.c == '.' && bIntPart == true ) { bIntPart = false; - sBuf += c; + sBuf += ps.c; } - else if( c == ' ' || c == '\t' || c == '\n' || c == '\r' || - c == '}' || c == ']' || c == ',' ) + else if( ps.c == ' ' || ps.c == '\t' || ps.c == '\n' || ps.c == '\r' || + ps.c == '}' || ps.c == ']' || ps.c == ',' ) { break; } else { - throw Bu::ExceptionBase("Invalid character in number."); + ps.error( + Bu::String("Invalid json: Invalid character in number: '%1'."). + arg( (char)ps.c ) + ); } - } while( readChar( c, sInput ) ); + } while( readChar( ps ) ); eType = Number; uDat.dNumber = atof( sBuf.getStr() ); } -void Bu::Json::parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseLiteral( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); Bu::String s; do { - if( isWs( c ) || c == ',' || c == '}' || c == ']' ) + if( isWs( ps.c ) || ps.c == ',' || ps.c == '}' || ps.c == ']' ) break; else - s += c; - } while( readChar( c, sInput ) ); + s += ps.c; + } while( readChar( ps ) ); if( s == "true" ) { @@ -757,22 +774,39 @@ void Bu::Json::parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ) } else { - throw Bu::ExceptionBase("Invalid literal token found."); + ps.error( + Bu::String("Invalid json: Invalid literal token found, '%1'."). + arg( s ) + ); } } -bool Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput ) +bool Bu::Json::readChar( Bu::Json::ParseState &ps ) { - if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) + if( Bu::UtfString::readPoint( ps.sInput, ps.c ) == 0 && ps.sInput.isEos() ) return false; + + if( ps.c == '\n' ) + { + // Increment the line and set iChar to zero. This makes sense only + // beacuse we only complain after a charecter has been read, so this + // will be too large by one unless we start at zero. + ps.iLine++; + ps.iChar = 0; + } + else + { + ps.iChar++; + } + return true; } -void Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput, const char *sSection ) +void Bu::Json::readChar( Bu::Json::ParseState &ps, const char *sSection ) { - if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) + if( !readChar( ps ) ) { - throw Bu::ExceptionBase( sSection ); + ps.error( sSection ); } } @@ -781,9 +815,9 @@ bool Bu::Json::isWs( Bu::UtfChar c ) return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } -void Bu::Json::skipWs( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::skipWs( Bu::Json::ParseState &ps ) { - while( isWs( c ) ) + while( isWs( ps.c ) ) { next("whitespace"); } @@ -850,3 +884,11 @@ Bu::Formatter &Bu::operator<<( Bu::Formatter &f, const Bu::Json &j ) return f; } +void Bu::Json::ParseState::error( const Bu::String &sTxt ) +{ + throw Bu::ExceptionParse( + Bu::String("%1:%2: %3"). + arg( iLine ).arg( iChar ).arg( sTxt ).end().getStr() + ); +} + diff --git a/src/unstable/json.h b/src/unstable/json.h index 5373bcf..a973f74 100644 --- a/src/unstable/json.h +++ b/src/unstable/json.h @@ -16,7 +16,22 @@ namespace Bu class Json { private: - Json( Bu::UtfChar &c, Bu::Stream &sInput ); + class ParseState + { + public: + ParseState( Bu::Stream &sInput ) : + c( 0 ), sInput( sInput ), iLine( 1 ), iChar( 0 ) + { + } + + void error( const Bu::String &sTxt ); + + Bu::UtfChar c; + Bu::Stream &sInput; + int iLine; + int iChar; + }; + Json( ParseState &ps ); typedef Bu::Hash JsonHash; typedef Bu::Array JsonList; @@ -90,19 +105,17 @@ namespace Bu bool operator==( const Bu::String &rRhs ); private: - void parse( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseString( Bu::UtfChar &c, Bu::Stream &sInput, - Bu::UtfString &sOut ); - void parseString( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseObject( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseArray( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ); - bool readChar( Bu::UtfChar &c, Bu::Stream &sInput ); - void readChar( Bu::UtfChar &c, Bu::Stream &sInput, - const char *sSection ); + void parse( ParseState &ps ); + void parseString( ParseState &ps, Bu::UtfString &sOut ); + void parseString( ParseState &ps ); + void parseObject( ParseState &ps ); + void parseArray( ParseState &ps ); + void parseNumber( ParseState &ps ); + void parseLiteral( ParseState &ps ); + bool readChar( ParseState &ps ); + void readChar( ParseState &ps, const char *sSection ); bool isWs( Bu::UtfChar c ); - void skipWs( Bu::UtfChar &c, Bu::Stream &sInput ); + void skipWs( ParseState &ps ); void writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const; private: -- cgit v1.2.3