/* * Copyright (C) 2007-2019 Xagasoft, All rights reserved. * * This file is part of the libbu++ library and is released under the * terms of the license contained in the file LICENSE. */ #ifndef BU_TEXT_H #define BU_TEXT_H #include "bu/config.h" namespace Bu { /** * Represents a string of text. Human readable language. This should be * used any time you're dealing with actual text and not just binary * data. If you neeed to transport raw binary data then consider using * Blob and BlobBuilder. * * Text objects should be considered immutable. If you need to construct * a Text object dynamically take a look at TextBuilder, and for * serialization take a look at TextStream. * * A Text object is a sequence of Unicode code points. A code point is not * one to one with a character. A single character can be represented with * multilpe code points. In addition, a code point can also represent * formatting or display inforamtion. * * Internally all data is stored in UTF-16, which is a fair compromise for * mose text. All characters from all modern natural languages fit within * the Basic Multilingual Plane, which requires only a single 16 bit value * to represent it. However, when iterating through or addressing data * in the Text object all work is done on a code point basis. */ class Text { public: class iterator; class const_iterator; typedef uint32_t CodePoint; public: Text(); Text( const Text &rSrc ); virtual ~Text(); bool isEmpty() const; bool isBmpOnly() const; int32_t getSize() const; int32_t getSizeInBytes() const; uint16_t *getRawData() const; // Text transform( (CodePoint *)(*pCallback)( CodePoint * ) ); private: uint16_t *pData; int32_t iSize; int32_t iCodePoints; }; typedef Text::CodePoint CodePoint; } #endif