From 9cdf227b84a18141de527a0ad85344a20914b974 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Fri, 5 Jul 2019 10:26:37 -0700 Subject: Started work on Text and friends. Text processing isn't trivial, and I want this iteration to be significantly more robust. This time I/O will be seperated out into codecs that will handle the encoding/decoding to/from different formats. --- src/unstable/text.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'src/unstable/text.h') diff --git a/src/unstable/text.h b/src/unstable/text.h index e69de29..1d623ff 100644 --- a/src/unstable/text.h +++ b/src/unstable/text.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#ifndef BU_TEXT_H +#define BU_TEXT_H + +#include "bu/config.h" + +namespace Bu +{ + /** + * Represents a string of text. Human readable language. This should be + * used any time you're dealing with actual text and not just binary + * data. If you neeed to transport raw binary data then consider using + * Blob and BlobBuilder. + * + * Text objects should be considered immutable. If you need to construct + * a Text object dynamically take a look at TextBuilder, and for + * serialization take a look at TextStream. + * + * A Text object is a sequence of Unicode code points. A code point is not + * one to one with a character. A single character can be represented with + * multilpe code points. In addition, a code point can also represent + * formatting or display inforamtion. + * + * Internally all data is stored in UTF-16, which is a fair compromise for + * mose text. All characters from all modern natural languages fit within + * the Basic Multilingual Plane, which requires only a single 16 bit value + * to represent it. However, when iterating through or addressing data + * in the Text object all work is done on a code point basis. + */ + class Text + { + public: + class iterator; + class const_iterator; + typedef uint32_t CodePoint; + + public: + Text(); + Text( const Text &rSrc ); + virtual ~Text(); + + bool isEmpty() const; + bool isBmpOnly() const; + int32_t getSize() const; + int32_t getSizeInBytes() const; + + uint16_t *getRawData() const; +// Text transform( (CodePoint *)(*pCallback)( CodePoint * ) ); + + private: + uint16_t *pData; + int32_t iSize; + int32_t iCodePoints; + }; + typedef Text::CodePoint CodePoint; +} + +#endif -- cgit v1.2.3