diff options
author | Mike Buland <eichlan@xagasoft.com> | 2019-07-05 10:26:37 -0700 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2019-07-05 10:26:37 -0700 |
commit | 9cdf227b84a18141de527a0ad85344a20914b974 (patch) | |
tree | c1cb3d0e15b8ce96c0d680e2ce8360e3e452fbfd /src/unstable/text.h | |
parent | 14683979c43e17393dc4f902fe65ed22898b2bce (diff) | |
download | libbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.gz libbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.bz2 libbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.xz libbu++-9cdf227b84a18141de527a0ad85344a20914b974.zip |
Started work on Text and friends.
Text processing isn't trivial, and I want this iteration to be
significantly more robust. This time I/O will be seperated out into
codecs that will handle the encoding/decoding to/from different formats.
Diffstat (limited to 'src/unstable/text.h')
-rw-r--r-- | src/unstable/text.h | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/src/unstable/text.h b/src/unstable/text.h index e69de29..1d623ff 100644 --- a/src/unstable/text.h +++ b/src/unstable/text.h | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2019 Xagasoft, All rights reserved. | ||
3 | * | ||
4 | * This file is part of the libbu++ library and is released under the | ||
5 | * terms of the license contained in the file LICENSE. | ||
6 | */ | ||
7 | |||
8 | #ifndef BU_TEXT_H | ||
9 | #define BU_TEXT_H | ||
10 | |||
11 | #include "bu/config.h" | ||
12 | |||
13 | namespace Bu | ||
14 | { | ||
15 | /** | ||
16 | * Represents a string of text. Human readable language. This should be | ||
17 | * used any time you're dealing with actual text and not just binary | ||
18 | * data. If you neeed to transport raw binary data then consider using | ||
19 | * Blob and BlobBuilder. | ||
20 | * | ||
21 | * Text objects should be considered immutable. If you need to construct | ||
22 | * a Text object dynamically take a look at TextBuilder, and for | ||
23 | * serialization take a look at TextStream. | ||
24 | * | ||
25 | * A Text object is a sequence of Unicode code points. A code point is not | ||
26 | * one to one with a character. A single character can be represented with | ||
27 | * multilpe code points. In addition, a code point can also represent | ||
28 | * formatting or display inforamtion. | ||
29 | * | ||
30 | * Internally all data is stored in UTF-16, which is a fair compromise for | ||
31 | * mose text. All characters from all modern natural languages fit within | ||
32 | * the Basic Multilingual Plane, which requires only a single 16 bit value | ||
33 | * to represent it. However, when iterating through or addressing data | ||
34 | * in the Text object all work is done on a code point basis. | ||
35 | */ | ||
36 | class Text | ||
37 | { | ||
38 | public: | ||
39 | class iterator; | ||
40 | class const_iterator; | ||
41 | typedef uint32_t CodePoint; | ||
42 | |||
43 | public: | ||
44 | Text(); | ||
45 | Text( const Text &rSrc ); | ||
46 | virtual ~Text(); | ||
47 | |||
48 | bool isEmpty() const; | ||
49 | bool isBmpOnly() const; | ||
50 | int32_t getSize() const; | ||
51 | int32_t getSizeInBytes() const; | ||
52 | |||
53 | uint16_t *getRawData() const; | ||
54 | // Text transform( (CodePoint *)(*pCallback)( CodePoint * ) ); | ||
55 | |||
56 | private: | ||
57 | uint16_t *pData; | ||
58 | int32_t iSize; | ||
59 | int32_t iCodePoints; | ||
60 | }; | ||
61 | typedef Text::CodePoint CodePoint; | ||
62 | } | ||
63 | |||
64 | #endif | ||