From d61161061bd5998ef4b1fc2aaad2cf4bbd78e90d Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Thu, 11 Feb 2010 00:38:09 +0000 Subject: Started working on a CSV reader, it's pretty much done, the CSV writer is just a shell, but I may finish it soon, and started work on NewLine, a filter that converts newlines in text streams between the different OS standards. Also added some more helper operators to fbasicstring. --- src/csvreader.cpp | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/csvreader.h | 38 ++++++++++++++++++++ src/csvwriter.cpp | 39 +++++++++++++++++++++ src/csvwriter.h | 36 +++++++++++++++++++ src/fbasicstring.h | 9 ++++- src/newline.cpp | 56 ++++++++++++++++++++++++++++++ src/newline.h | 34 ++++++++++++++++++ src/tests/csv.cpp | 41 ++++++++++++++++++++++ 8 files changed, 352 insertions(+), 1 deletion(-) create mode 100644 src/csvreader.cpp create mode 100644 src/csvreader.h create mode 100644 src/csvwriter.cpp create mode 100644 src/csvwriter.h create mode 100644 src/newline.cpp create mode 100644 src/newline.h create mode 100644 src/tests/csv.cpp diff --git a/src/csvreader.cpp b/src/csvreader.cpp new file mode 100644 index 0000000..a28c2c3 --- /dev/null +++ b/src/csvreader.cpp @@ -0,0 +1,100 @@ +#include "bu/csvreader.h" +#include "bu/stream.h" + +#include "bu/sio.h" +using namespace Bu; + +Bu::CsvReader::CsvReader( Bu::Stream &sIn, Bu::CsvReader::Style eStyle ) : + sIn( sIn ) +{ + switch( eStyle ) + { + case styleExcel: + sDecode = Bu::slot( &decodeExcel ); + break; + + case styleC: + sDecode = Bu::slot( &decodeExcel ); + break; + } +} + +Bu::CsvReader::CsvReader( Bu::Stream &sIn, + Bu::CsvReader::DecodeSignal sDecode ) : + sIn( sIn ), + sDecode( sDecode ) +{ +} + +Bu::CsvReader::~CsvReader() +{ +} + +Bu::StrArray Bu::CsvReader::readLine() +{ + Bu::StrArray aVals; + + Bu::FString sLine = sIn.readLine(); + + for( Bu::FString::iterator i = sLine.begin(); i; i++ ) + { + if( *i == ',' ) + { + } + else + { + aVals.append( sDecode( i ) ); + } + } + + return aVals; +} + +Bu::FString Bu::CsvReader::decodeExcel( Bu::FString::iterator &i ) +{ + Bu::FString sRet; + + for(; i && (*i == ' ' || *i == '\t'); i++ ) { } + + if( *i == '\"' ) + { + for( i++ ; i; i++ ) + { + if( *i == '\"' ) + { + i++; + if( *i == '\"' ) + { + sRet += *i; + } + else + { + return sRet; + } + } + else + { + sRet += *i; + } + } + } + else + { + for( ; i; i++ ) + { + if( *i == ',' ) + { + return sRet; + } + sRet += *i; + } + } + + return sRet; +} + +Bu::FString Bu::CsvReader::decodeC( Bu::FString::iterator &i ) +{ + return ""; +} + diff --git a/src/csvreader.h b/src/csvreader.h new file mode 100644 index 0000000..d89fabe --- /dev/null +++ b/src/csvreader.h @@ -0,0 +1,38 @@ +#ifndef BU_CSV_READER_H +#define BU_CSV_READER_H + +#include "bu/fstring.h" +#include "bu/array.h" +#include "bu/signals.h" + +namespace Bu +{ + class Stream; + typedef Bu::Array StrArray; + + class CsvReader + { + public: + typedef Bu::Signal1 DecodeSignal; + enum Style + { + styleExcel, ///< Excel style quotes around things that need em + styleC ///< Escape things that need it C-style + }; + + CsvReader( Stream &sIn, Style eStyle=styleExcel ); + CsvReader( Stream &sIn, DecodeSignal sDecode ); + virtual ~CsvReader(); + + StrArray readLine(); + + private: + Stream &sIn; + DecodeSignal sDecode; + + static Bu::FString decodeExcel( Bu::FString::iterator &i ); + static Bu::FString decodeC( Bu::FString::iterator &i ); + }; +}; + +#endif diff --git a/src/csvwriter.cpp b/src/csvwriter.cpp new file mode 100644 index 0000000..b66dca8 --- /dev/null +++ b/src/csvwriter.cpp @@ -0,0 +1,39 @@ +#include "bu/csvwriter.h" +#include "bu/stream.h" + +Bu::CsvWriter::CsvWriter( Bu::Stream &sOut, Bu::CsvWriter::Style eStyle ) : + sOut( sOut ) +{ + switch( eStyle ) + { + case styleExcel: + sEncode = Bu::slot( &encodeExcel ); + break; + + case styleC: + sEncode = Bu::slot( &encodeExcel ); + break; + } +} + +Bu::CsvWriter::CsvWriter( Bu::Stream &sOut, + Bu::CsvWriter::EncodeSignal sEncode ) : + sOut( sOut ), + sEncode( sEncode ) +{ +} + +Bu::CsvWriter::~CsvWriter() +{ +} + +Bu::FString Bu::CsvWriter::encodeExcel( const Bu::FString &sIn ) +{ + return ""; +} + +Bu::FString Bu::CsvWriter::encodeC( const Bu::FString &sIn ) +{ + return ""; +} + diff --git a/src/csvwriter.h b/src/csvwriter.h new file mode 100644 index 0000000..82f36d7 --- /dev/null +++ b/src/csvwriter.h @@ -0,0 +1,36 @@ +#ifndef BU_CSV_WRITER_H +#define BU_CSV_WRITER_H + +#include "bu/fstring.h" +#include "bu/array.h" +#include "bu/signals.h" + +namespace Bu +{ + class Stream; + typedef Bu::Array StrArray; + + class CsvWriter + { + public: + typedef Bu::Signal1 EncodeSignal; + enum Style + { + styleExcel, ///< Excel style quotes around things that need em + styleC ///< Escape things that need it C-style + }; + + CsvWriter( Stream &sOut, Style eStyle=styleExcel ); + CsvWriter( Stream &sOut, EncodeSignal sEncode ); + virtual ~CsvWriter(); + + private: + Stream &sOut; + EncodeSignal sEncode; + + static Bu::FString encodeExcel( const Bu::FString &sIn ); + static Bu::FString encodeC( const Bu::FString &sIn ); + }; +}; + +#endif diff --git a/src/fbasicstring.h b/src/fbasicstring.h index 838fbc2..535df55 100644 --- a/src/fbasicstring.h +++ b/src/fbasicstring.h @@ -796,7 +796,7 @@ namespace Bu append( &cData, 1 ); } } - + /** * Append another FString to this one. *@param sData (MyType &) The FString to append. @@ -1280,6 +1280,13 @@ namespace Bu return (*this); } + MyType &operator+=( const MyType::const_iterator &i ) + { + append( i, i+1 ); + + return (*this); + } + /** * Plus equals operator for FString. *@param cData (const chr) The character to append to your FString. diff --git a/src/newline.cpp b/src/newline.cpp new file mode 100644 index 0000000..0dbbae5 --- /dev/null +++ b/src/newline.cpp @@ -0,0 +1,56 @@ +#include "bu/newline.h" + +Bu::NewLine::NewLine( Bu::Stream &rNext ) : + Bu::Filter( rNext ), + bExChar( false ) +{ +} + +Bu::NewLine::~NewLine() +{ +} + +void Bu::NewLine::start() +{ +} + +size_t Bu::NewLine::stop() +{ + return 0; +} + +size_t Bu::NewLine::read( void *pBuf, size_t iAmnt ) +{ + size_t iRead = rNext.read( pBuf, iAmnt ); + size_t iOffset = 0; + + for( size_t i = 0; i < iRead; i++ ) + { + if( pBuf[i] == '\r' ) + { + pBuf[i+iOffset] = '\n'; + if( pBuf[j+1] == '\n' ) + { + iOffset--; + } + } + else if( pBuf[i] == '\n' ) + { + if( pBuf[j+1] == '\r' ) + { + iOffset--; + } + } + else if( iOffset ) + { + pBuf[i+iOffset] = pBuf[i]; + } + } + + iRead += iOffset; +} + +size_t Bu::NewLine::write( const void *pBuf, size_t iAmnt ) +{ +} + diff --git a/src/newline.h b/src/newline.h new file mode 100644 index 0000000..8ee5779 --- /dev/null +++ b/src/newline.h @@ -0,0 +1,34 @@ +#ifndef BU_NEW_LINE_H +#define BU_NEW_LINE_H + +#include "bu/filter.h" + +namespace Bu +{ + /** + * Converts new-line characters from any standard convention into linefeeds + * (\n) on reading, and converts them to either your OS's standard or a + * specified standard, depending on how you construct the class. + * + * If you're reading in a text file, then this filter is practically + * required. + */ + class NewLine : public Bu::Filter + { + public: + NewLine( Bu::Stream &rNext ); + virtual ~NewLine(); + + virtual void start(); + virtual size_t stop(); + + virtual size_t read( void *pBuf, size_t iAmnt ); + virtual size_t write( const void *pBuf, size_t iAmnt ); + + private: + bool bExChar; + char cExChar; + }; +}; + +#endif diff --git a/src/tests/csv.cpp b/src/tests/csv.cpp new file mode 100644 index 0000000..03e1df8 --- /dev/null +++ b/src/tests/csv.cpp @@ -0,0 +1,41 @@ +#include "bu/optparser.h" +#include "bu/file.h" +#include "bu/newline.h" +#include "bu/csvreader.h" +#include "bu/sio.h" + +using namespace Bu; + +class Options : public OptParser +{ +public: + Options( int argc, char *argv[] ) + { + addOption( slot( this, &Options::onRead ), 'r', "read", + "Read and display a csv file." ); + + addHelpOption(); + + parse( argc, argv ); + } + + int onRead( StrArray aArgs ) + { + File fIn( aArgs[1], File::Read ); + NewLine nlIn( fIn ); + CsvReader rCsv( nlIn ); + while( !fIn.isEos() ) + { + sio << rCsv.readLine() << sio.nl; + } + sio << sio.nl; + return 1; + } +}; + +int main( int argc, char *argv[] ) +{ + Options opts( argc, argv ); + return 0; +} + -- cgit v1.2.3