From 9cdf227b84a18141de527a0ad85344a20914b974 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Fri, 5 Jul 2019 10:26:37 -0700 Subject: Started work on Text and friends. Text processing isn't trivial, and I want this iteration to be significantly more robust. This time I/O will be seperated out into codecs that will handle the encoding/decoding to/from different formats. --- src/unstable/text.cpp | 25 +++++++++++++++++ src/unstable/text.h | 64 ++++++++++++++++++++++++++++++++++++++++++ src/unstable/textcodec.cpp | 0 src/unstable/textcodec.h | 0 src/unstable/textcodecutf8.cpp | 0 src/unstable/textcodecutf8.h | 0 6 files changed, 89 insertions(+) create mode 100644 src/unstable/textcodec.cpp create mode 100644 src/unstable/textcodec.h create mode 100644 src/unstable/textcodecutf8.cpp create mode 100644 src/unstable/textcodecutf8.h (limited to 'src') diff --git a/src/unstable/text.cpp b/src/unstable/text.cpp index 73271f8..9e5670d 100644 --- a/src/unstable/text.cpp +++ b/src/unstable/text.cpp @@ -5,4 +5,29 @@ * terms of the license contained in the file LICENSE. */ +#include "bu/text.h" +#include + +Bu::Text::Text() : + pData( NULL ), + iSize( 0 ), + iCodePoints( 0 ) +{ +} + +Bu::Text::Text( const Text &rSrc ) : + pData( NULL ), + iSize( rSrc.iSize ), + iCodePoints( rSrc.iCodePoints ) +{ + pData = new uint16_t[iSize]; + memcpy( pData, rSrc.pData, sizeof(uint16_t)*iSize ); +} + +Bu::Text::~Text() +{ + delete[] pData; + pData = NULL; +} + diff --git a/src/unstable/text.h b/src/unstable/text.h index e69de29..1d623ff 100644 --- a/src/unstable/text.h +++ b/src/unstable/text.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#ifndef BU_TEXT_H +#define BU_TEXT_H + +#include "bu/config.h" + +namespace Bu +{ + /** + * Represents a string of text. Human readable language. This should be + * used any time you're dealing with actual text and not just binary + * data. If you neeed to transport raw binary data then consider using + * Blob and BlobBuilder. + * + * Text objects should be considered immutable. If you need to construct + * a Text object dynamically take a look at TextBuilder, and for + * serialization take a look at TextStream. + * + * A Text object is a sequence of Unicode code points. A code point is not + * one to one with a character. A single character can be represented with + * multilpe code points. In addition, a code point can also represent + * formatting or display inforamtion. + * + * Internally all data is stored in UTF-16, which is a fair compromise for + * mose text. All characters from all modern natural languages fit within + * the Basic Multilingual Plane, which requires only a single 16 bit value + * to represent it. However, when iterating through or addressing data + * in the Text object all work is done on a code point basis. + */ + class Text + { + public: + class iterator; + class const_iterator; + typedef uint32_t CodePoint; + + public: + Text(); + Text( const Text &rSrc ); + virtual ~Text(); + + bool isEmpty() const; + bool isBmpOnly() const; + int32_t getSize() const; + int32_t getSizeInBytes() const; + + uint16_t *getRawData() const; +// Text transform( (CodePoint *)(*pCallback)( CodePoint * ) ); + + private: + uint16_t *pData; + int32_t iSize; + int32_t iCodePoints; + }; + typedef Text::CodePoint CodePoint; +} + +#endif diff --git a/src/unstable/textcodec.cpp b/src/unstable/textcodec.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/unstable/textcodec.h b/src/unstable/textcodec.h new file mode 100644 index 0000000..e69de29 diff --git a/src/unstable/textcodecutf8.cpp b/src/unstable/textcodecutf8.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/unstable/textcodecutf8.h b/src/unstable/textcodecutf8.h new file mode 100644 index 0000000..e69de29 -- cgit v1.2.3