From 8f9337568befa81e82e97abd4d2d75e3a5c5fbe9 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Sat, 14 Dec 2019 21:18:22 -0800 Subject: Added ExceptionParse. I would like to move to exceptions carrying encoded parameters. For example parse could carry source name (filename), line, character, etc. --- src/stable/exceptionparse.cpp | 14 ++++++++++++++ src/stable/exceptionparse.h | 17 +++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 src/stable/exceptionparse.cpp create mode 100644 src/stable/exceptionparse.h (limited to 'src') diff --git a/src/stable/exceptionparse.cpp b/src/stable/exceptionparse.cpp new file mode 100644 index 0000000..98440a4 --- /dev/null +++ b/src/stable/exceptionparse.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#include "exceptionparse.h" + +namespace Bu +{ + subExceptionDef( ExceptionParse ); +} + diff --git a/src/stable/exceptionparse.h b/src/stable/exceptionparse.h new file mode 100644 index 0000000..83eddb0 --- /dev/null +++ b/src/stable/exceptionparse.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ +#ifndef BU_EXCPTION_PARSE_H +#define BU_EXCPTION_PARSE_H + +#include "bu/exceptionbase.h" + +namespace Bu +{ + subExceptionDecl( ExceptionParse ); +} + +#endif -- cgit v1.2.3 From 2ff101097feedf85b0ab0163983159200fc146a2 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Sat, 14 Dec 2019 21:18:59 -0800 Subject: Json provides line/char in errors now. All errors are also rewoked and the parser state is tracked much better. Also fixed a parser issue where it would error correctly, but report it poorly when an object started with something other than a string. --- src/unstable/json.cpp | 208 ++++++++++++++++++++++++++++++-------------------- src/unstable/json.h | 39 ++++++---- 2 files changed, 151 insertions(+), 96 deletions(-) (limited to 'src') diff --git a/src/unstable/json.cpp b/src/unstable/json.cpp index f6a8d52..9745a7f 100644 --- a/src/unstable/json.cpp +++ b/src/unstable/json.cpp @@ -1,12 +1,13 @@ #include "bu/json.h" #include "bu/staticmembuf.h" #include "bu/membuf.h" +#include "bu/exceptionparse.h" #include "bu/sio.h" #include -#define next( txt ) readChar( c, sInput, "Unexpected end of stream while reading " txt "." ) +#define next( txt ) readChar( ps, "Unexpected end of stream while reading " txt "." ) Bu::Json::Json() : eType( Null ) @@ -75,10 +76,10 @@ Bu::Json::Json( Bu::Stream &sInput ) : parse( sInput ); } -Bu::Json::Json( Bu::UtfChar &c, Bu::Stream &sInput ) : +Bu::Json::Json( Bu::Json::ParseState &ps ) : eType( Invalid ) { - parse( c, sInput ); + parse( ps ); } Bu::Json::Json( const Json &rSrc ) : @@ -294,10 +295,10 @@ void Bu::Json::parse( Bu::Stream &sInput ) { reset(); - Bu::UtfChar c; + ParseState ps( sInput ); next("json"); - parse( c, sInput ); + parse( ps ); } void Bu::Json::parse( const Bu::String &sInput ) @@ -306,40 +307,42 @@ void Bu::Json::parse( const Bu::String &sInput ) parse( mb ); } -void Bu::Json::parse( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parse( ParseState &ps ) { - while( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) + while( ps.c == ' ' || ps.c == '\t' || ps.c == '\r' || ps.c == '\n' ) { next( "json" ); } - if( c == '"' ) + if( ps.c == '"' ) { // String - parseString( c, sInput ); + parseString( ps ); } - else if( c == '{' ) + else if( ps.c == '{' ) { // Object - parseObject( c, sInput ); + parseObject( ps ); } - else if( c == '[' ) + else if( ps.c == '[' ) { // Array - parseArray( c, sInput ); + parseArray( ps ); } - else if( c == '-' || (c >= '0' && c <= '9') ) + else if( ps.c == '-' || (ps.c >= '0' && ps.c <= '9') ) { // Number -- apparently they can't start with a period - parseNumber( c, sInput ); + parseNumber( ps ); } - else if( c == 't' || c == 'f' || c == 'n' ) + else if( ps.c == 't' || ps.c == 'f' || ps.c == 'n' ) { // True / false / null - parseLiteral( c, sInput ); + parseLiteral( ps ); } else { - throw Bu::ExceptionBase("Invalid characters in json stream."); + ps.error( + Bu::String("Invalid json: Invalid character: '%1'.").arg( (char)ps.c ) + ); } } @@ -382,7 +385,7 @@ void Bu::Json::write( Bu::Stream &sOutput ) const switch( eType ) { case Invalid: - throw Bu::ExceptionBase("Invalid type in json"); + throw Bu::ExceptionBase("Invalid type in json."); break; case Object: @@ -537,22 +540,21 @@ bool Bu::Json::operator==( const Bu::String &rRhs ) return (*uDat.pString) == rRhs; } -void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, - Bu::UtfString &sOut ) +void Bu::Json::parseString( Bu::Json::ParseState &ps, Bu::UtfString &sOut ) { - skipWs( c, sInput ); + skipWs( ps ); bool bEscape = false; for(;;) { next( "string" ); if( bEscape ) { - switch( c ) + switch( ps.c ) { case '"': case '\\': case '/': - sOut += c; + sOut += ps.c; break; case 'b': @@ -580,8 +582,9 @@ void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, break; default: - throw Bu::ExceptionBase( - "Invalid escape sequence encountered in string." + ps.error( + Bu::String("Invalid json: Invalid escape sequence: " + " '\\%1'.").arg( (char)ps.c ) ); break; } @@ -589,37 +592,37 @@ void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, } else { - if( c == '\\' ) + if( ps.c == '\\' ) bEscape = true; - else if( c == '"' ) + else if( ps.c == '"' ) { - readChar( c, sInput ); + readChar( ps ); break; } else - sOut += c; + sOut += ps.c; } } } -void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseString( Bu::Json::ParseState &ps ) { eType = String; uDat.pString = new Bu::UtfString(); - parseString( c, sInput, *uDat.pString ); + parseString( ps, *uDat.pString ); } -void Bu::Json::parseObject( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseObject( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); eType = Object; uDat.pObject = new JsonHash(); next( "object" ); - skipWs( c, sInput ); + skipWs( ps ); // Check to see if it's an empty object. - if( c == '}' ) + if( ps.c == '}' ) { next("object"); return; @@ -627,35 +630,45 @@ void Bu::Json::parseObject( Bu::UtfChar &c, Bu::Stream &sInput ) for(;;) { + skipWs( ps ); + if( ps.c != '"' ) + { + ps.error( + Bu::String("Invalid json: expected string as key in object, " + "found '%1'.").arg( (char)ps.c ) + ); + } Bu::UtfString sKey; - parseString( c, sInput, sKey ); - skipWs( c, sInput ); - if( c != ':' ) + parseString( ps, sKey ); + skipWs( ps ); + if( ps.c != ':' ) { - throw Bu::ExceptionBase( - "Invalid json, expected colon after key in object." + ps.error( + Bu::String("Invalid json: expected colon after key in object, " + "found '%1'.").arg( (char)ps.c ) ); } next("object"); - uDat.pObject->insert( sKey, new Json( c, sInput ) ); - skipWs( c, sInput ); - if( c == '}' ) + uDat.pObject->insert( sKey, new Json( ps ) ); + skipWs( ps ); + if( ps.c == '}' ) { - readChar( c, sInput ); + readChar( ps ); break; } - else if( c == ',' ) + else if( ps.c == ',' ) next( "object" ); else - throw Bu::ExceptionBase( - "Invalid json, expected comma or } after value in object." + ps.error( + Bu::String("Invalid json: expected comma or } after value " + "in object, found '%1'.").arg( (char)ps.c ) ); } } -void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseArray( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); eType = Array; uDat.pArray = new JsonList(); @@ -663,7 +676,7 @@ void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) next("array"); // Check to see if it's an empty array. - if( c == ']' ) + if( ps.c == ']' ) { next("array"); return; @@ -671,74 +684,78 @@ void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) for(;;) { - uDat.pArray->append( new Json( c, sInput ) ); - skipWs( c, sInput ); - if( c == ']' ) + uDat.pArray->append( new Json( ps ) ); + skipWs( ps ); + if( ps.c == ']' ) { - readChar( c, sInput ); + readChar( ps ); break; } - else if( c == ',' ) + else if( ps.c == ',' ) { next("array"); continue; } else { - throw Bu::ExceptionBase( - "Invalid json, expected comma or ] after value in array." + ps.error( + Bu::String("Invalid json: expected comma or ] after value " + "in array, found '%1'.").arg( (char)ps.c ) ); } } } -void Bu::Json::parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseNumber( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); Bu::String sBuf; - if( c == '-' ) + if( ps.c == '-' ) { - sBuf += c; + sBuf += ps.c; next( "number" ); } bool bIntPart = true; do { - if( c >= '0' && c <= '9' ) - sBuf += c; - else if( c == '.' && bIntPart == true ) + if( ps.c >= '0' && ps.c <= '9' ) + sBuf += ps.c; + else if( ps.c == '.' && bIntPart == true ) { bIntPart = false; - sBuf += c; + sBuf += ps.c; } - else if( c == ' ' || c == '\t' || c == '\n' || c == '\r' || - c == '}' || c == ']' || c == ',' ) + else if( ps.c == ' ' || ps.c == '\t' || ps.c == '\n' || ps.c == '\r' || + ps.c == '}' || ps.c == ']' || ps.c == ',' ) { break; } else { - throw Bu::ExceptionBase("Invalid character in number."); + ps.error( + Bu::String("Invalid json: Invalid character in number: '%1'."). + arg( (char)ps.c ) + ); } - } while( readChar( c, sInput ) ); + } while( readChar( ps ) ); eType = Number; uDat.dNumber = atof( sBuf.getStr() ); } -void Bu::Json::parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::parseLiteral( Bu::Json::ParseState &ps ) { - skipWs( c, sInput ); + skipWs( ps ); Bu::String s; do { - if( isWs( c ) || c == ',' || c == '}' || c == ']' ) + if( isWs( ps.c ) || ps.c == ',' || ps.c == '}' || ps.c == ']' ) break; else - s += c; - } while( readChar( c, sInput ) ); + s += ps.c; + } while( readChar( ps ) ); if( s == "true" ) { @@ -757,22 +774,39 @@ void Bu::Json::parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ) } else { - throw Bu::ExceptionBase("Invalid literal token found."); + ps.error( + Bu::String("Invalid json: Invalid literal token found, '%1'."). + arg( s ) + ); } } -bool Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput ) +bool Bu::Json::readChar( Bu::Json::ParseState &ps ) { - if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) + if( Bu::UtfString::readPoint( ps.sInput, ps.c ) == 0 && ps.sInput.isEos() ) return false; + + if( ps.c == '\n' ) + { + // Increment the line and set iChar to zero. This makes sense only + // beacuse we only complain after a charecter has been read, so this + // will be too large by one unless we start at zero. + ps.iLine++; + ps.iChar = 0; + } + else + { + ps.iChar++; + } + return true; } -void Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput, const char *sSection ) +void Bu::Json::readChar( Bu::Json::ParseState &ps, const char *sSection ) { - if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) + if( !readChar( ps ) ) { - throw Bu::ExceptionBase( sSection ); + ps.error( sSection ); } } @@ -781,9 +815,9 @@ bool Bu::Json::isWs( Bu::UtfChar c ) return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } -void Bu::Json::skipWs( Bu::UtfChar &c, Bu::Stream &sInput ) +void Bu::Json::skipWs( Bu::Json::ParseState &ps ) { - while( isWs( c ) ) + while( isWs( ps.c ) ) { next("whitespace"); } @@ -850,3 +884,11 @@ Bu::Formatter &Bu::operator<<( Bu::Formatter &f, const Bu::Json &j ) return f; } +void Bu::Json::ParseState::error( const Bu::String &sTxt ) +{ + throw Bu::ExceptionParse( + Bu::String("%1:%2: %3"). + arg( iLine ).arg( iChar ).arg( sTxt ).end().getStr() + ); +} + diff --git a/src/unstable/json.h b/src/unstable/json.h index 5373bcf..a973f74 100644 --- a/src/unstable/json.h +++ b/src/unstable/json.h @@ -16,7 +16,22 @@ namespace Bu class Json { private: - Json( Bu::UtfChar &c, Bu::Stream &sInput ); + class ParseState + { + public: + ParseState( Bu::Stream &sInput ) : + c( 0 ), sInput( sInput ), iLine( 1 ), iChar( 0 ) + { + } + + void error( const Bu::String &sTxt ); + + Bu::UtfChar c; + Bu::Stream &sInput; + int iLine; + int iChar; + }; + Json( ParseState &ps ); typedef Bu::Hash JsonHash; typedef Bu::Array JsonList; @@ -90,19 +105,17 @@ namespace Bu bool operator==( const Bu::String &rRhs ); private: - void parse( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseString( Bu::UtfChar &c, Bu::Stream &sInput, - Bu::UtfString &sOut ); - void parseString( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseObject( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseArray( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ); - void parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ); - bool readChar( Bu::UtfChar &c, Bu::Stream &sInput ); - void readChar( Bu::UtfChar &c, Bu::Stream &sInput, - const char *sSection ); + void parse( ParseState &ps ); + void parseString( ParseState &ps, Bu::UtfString &sOut ); + void parseString( ParseState &ps ); + void parseObject( ParseState &ps ); + void parseArray( ParseState &ps ); + void parseNumber( ParseState &ps ); + void parseLiteral( ParseState &ps ); + bool readChar( ParseState &ps ); + void readChar( ParseState &ps, const char *sSection ); bool isWs( Bu::UtfChar c ); - void skipWs( Bu::UtfChar &c, Bu::Stream &sInput ); + void skipWs( ParseState &ps ); void writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const; private: -- cgit v1.2.3 From 969e708d25351ea631e3ce9afb64313851869ec5 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Fri, 3 Jan 2020 10:07:11 -0800 Subject: Removed extra debug output from Bu::MyriadFs --- src/unstable/myriadfs.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/unstable/myriadfs.cpp b/src/unstable/myriadfs.cpp index 7a87662..6b51195 100644 --- a/src/unstable/myriadfs.cpp +++ b/src/unstable/myriadfs.cpp @@ -121,7 +121,7 @@ Bu::MyriadStream Bu::MyriadFs::open( const Bu::String &sPath, int /*iMode*/, try { iNode = lookupInode( sPath, iParent ); - sio << "File found." << sio.nl; +// sio << "File found." << sio.nl; // The file was found return openByInode( iNode ); } @@ -137,10 +137,10 @@ Bu::MyriadStream Bu::MyriadFs::open( const Bu::String &sPath, int /*iMode*/, // The file wasn't found, but the path leading up to it was. // first, figure out the final path element... Bu::String sName = filePart( sPath ); - sio << "End filename: " << sName << sio.nl; - sio << "Parent inode: " << iParent << sio.nl; +// sio << "End filename: " << sName << sio.nl; +// sio << "Parent inode: " << iParent << sio.nl; iNode = create( iParent, sName, (uPerms&permMask)|typeRegFile, 0 ); - sio << "New iNode: " << iNode << sio.nl; +// sio << "New iNode: " << iNode << sio.nl; return openByInode( iNode ); } } @@ -164,7 +164,7 @@ void Bu::MyriadFs::create( const Bu::String &sPath, uint16_t iPerms, try { iNode = lookupInode( sPath, iParent ); - sio << "File found." << sio.nl; +// sio << "File found." << sio.nl; } catch( Bu::MyriadFsException &e ) { @@ -178,10 +178,10 @@ void Bu::MyriadFs::create( const Bu::String &sPath, uint16_t iPerms, // The file wasn't found, but the path leading up to it was. // first, figure out the final path element... Bu::String sName = filePart( sPath ); - sio << "End filename: " << sName << sio.nl; - sio << "Parent inode: " << iParent << sio.nl; +// sio << "End filename: " << sName << sio.nl; +// sio << "Parent inode: " << iParent << sio.nl; iNode = create( iParent, sName, iPerms, uSpecial ); - sio << "New iNode: " << iNode << sio.nl; +// sio << "New iNode: " << iNode << sio.nl; } // The file was found //throw Bu::MyriadFsException("Path already exists."); @@ -213,10 +213,10 @@ void Bu::MyriadFs::mkSymLink( const Bu::String &sTarget, // The file wasn't found, but the path leading up to it was. // first, figure out the final path element... Bu::String sName = filePart( sPath ); - sio << "End filename: " << sName << sio.nl; - sio << "Parent inode: " << iParent << sio.nl; +// sio << "End filename: " << sName << sio.nl; +// sio << "Parent inode: " << iParent << sio.nl; iNode = create( iParent, sName, 0777|typeSymLink, 0 ); - sio << "New iNode: " << iNode << sio.nl; +// sio << "New iNode: " << iNode << sio.nl; MyriadStream ms = openByInode( iNode ); ms.write( sTarget ); return; @@ -249,8 +249,8 @@ void Bu::MyriadFs::mkHardLink( const Bu::String &sTarget, // The file wasn't found, but the path leading up to it was. // first, figure out the final path element... Bu::String sName = filePart( sPath ); - sio << "End filename: " << sName << sio.nl; - sio << "Parent inode: " << iParent << sio.nl; +// sio << "End filename: " << sName << sio.nl; +// sio << "Parent inode: " << iParent << sio.nl; addToDir( iParent, iNode, sName ); MyriadStream is = mStore.openStream( 2 ); RawStat rs; @@ -558,8 +558,8 @@ int32_t Bu::MyriadFs::allocInode( uint16_t uPerms, uint32_t uSpecial ) case typeDir: rs.uStreamIndex = mStore.createStream(); - sio << "Creating directory node, storage: " - << rs.uStreamIndex << sio.nl; +// sio << "Creating directory node, storage: " +// << rs.uStreamIndex << sio.nl; { Bu::MyriadStream msDir = mStore.openStream( rs.uStreamIndex -- cgit v1.2.3 From ed5e7684b766a3914b30c5b449608542695fd3b8 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Tue, 4 Feb 2020 09:01:45 -0800 Subject: Minor Bu::Hash updates and additions. Bu::Hash::KeyList has been added, I thought that was always there. Bu::Hash::rehash has been added. Rehashes can be triggered manually now. --- src/stable/hash.h | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/stable/hash.h b/src/stable/hash.h index 7f4066e..0c5bd9e 100644 --- a/src/stable/hash.h +++ b/src/stable/hash.h @@ -211,7 +211,7 @@ namespace Bu return 0; } - uint32_t probe( uint32_t hash, const key &k, bool &bFill, bool rehash=true ) + uint32_t probe( uint32_t hash, const key &k, bool &bFill, bool bRehash=true ) { init(); @@ -244,9 +244,9 @@ namespace Bu // This is our insurance, if the table is full, then go ahead and // rehash, then try again. - if( (isFilled( nCur ) || j == 32) && rehash == true ) + if( (isFilled( nCur ) || j == 32) && bRehash == true ) { - reHash( szCalc( nCapacity, nFilled, nDeleted ) ); + rehash( szCalc( nCapacity, nFilled, nDeleted ) ); // This is potentially dangerous, and could cause an infinite loop. // Be careful writing probe, eh? @@ -309,7 +309,7 @@ namespace Bu } } - void reHash( uint32_t nNewSize ) + void rehash( uint32_t nNewSize ) { //printf("--rehash: %d --> %d (%d, %d)\n", nCapacity, nNewSize, nFilled, nDeleted ); //printf("---REHASH---"); @@ -386,11 +386,13 @@ namespace Bu for( uint32_t j = 0; j < nCapacity; j++ ) { if( isFilled( j ) ) + { if( !isDeleted( j ) ) { va.destroy( &aValues[j] ); ka.destroy( &aKeys[j] ); } + } } va.deallocate( aValues, nCapacity ); ka.deallocate( aKeys, nCapacity ); @@ -501,6 +503,8 @@ namespace Bu { } + typedef Bu::List KeyList; + /** * Get the current hash table capacity. (Changes at re-hash) *@returns (uint32_t) The current capacity. @@ -1135,9 +1139,9 @@ namespace Bu * Get a list of all the keys in the hash table. *@returns (std::list) The list of keys in the hash table. */ - Bu::List getKeys() const + KeyList getKeys() const { - Bu::List lKeys; + KeyList lKeys; for( uint32_t j = 0; j < core->nCapacity; j++ ) { @@ -1171,6 +1175,30 @@ namespace Bu return lValues; } + /** + * This can be a very expensive operation, but when there are a decent + * number of deleted entries it can be good to be able to clean them + * up on your own terms. + * + * This will always allocate a new table and move all non-deleted items + * over to it. The size of the new table depends on which resizing + * calculator is selected. The default resize calculator will shrink + * the table if it's mostly deleted/empty space. + * + * This will be done by the system whenever it deems necesarry, but + * only during probing operations. That means that an insert could + * trigger a rehash, which could be very expensive. If you know, for + * example, that you'll be deleting most of the entries once a night + * during a low-usage time, that would probably be a good time to + * manually trigger a rehash and save the extra time on the next insert + * after the cleanup. This is partucularly true for systems like + * caches that need to be periodically cleaned up. + */ + void rehash() + { + core->rehash( core->szCalc( core->nCapacity, core->nFilled, core->nDeleted ) ); + } + bool operator==( const MyType &rhs ) const { if( this == &rhs ) -- cgit v1.2.3