aboutsummaryrefslogtreecommitdiff
path: root/src/unstable/text.h
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2019-07-05 10:26:37 -0700
committerMike Buland <eichlan@xagasoft.com>2019-07-05 10:26:37 -0700
commit9cdf227b84a18141de527a0ad85344a20914b974 (patch)
treec1cb3d0e15b8ce96c0d680e2ce8360e3e452fbfd /src/unstable/text.h
parent14683979c43e17393dc4f902fe65ed22898b2bce (diff)
downloadlibbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.gz
libbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.bz2
libbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.xz
libbu++-9cdf227b84a18141de527a0ad85344a20914b974.zip
Started work on Text and friends.
Text processing isn't trivial, and I want this iteration to be significantly more robust. This time I/O will be seperated out into codecs that will handle the encoding/decoding to/from different formats.
Diffstat (limited to 'src/unstable/text.h')
-rw-r--r--src/unstable/text.h64
1 files changed, 64 insertions, 0 deletions
diff --git a/src/unstable/text.h b/src/unstable/text.h
index e69de29..1d623ff 100644
--- a/src/unstable/text.h
+++ b/src/unstable/text.h
@@ -0,0 +1,64 @@
1/*
2 * Copyright (C) 2007-2019 Xagasoft, All rights reserved.
3 *
4 * This file is part of the libbu++ library and is released under the
5 * terms of the license contained in the file LICENSE.
6 */
7
8#ifndef BU_TEXT_H
9#define BU_TEXT_H
10
11#include "bu/config.h"
12
13namespace Bu
14{
15 /**
16 * Represents a string of text. Human readable language. This should be
17 * used any time you're dealing with actual text and not just binary
18 * data. If you neeed to transport raw binary data then consider using
19 * Blob and BlobBuilder.
20 *
21 * Text objects should be considered immutable. If you need to construct
22 * a Text object dynamically take a look at TextBuilder, and for
23 * serialization take a look at TextStream.
24 *
25 * A Text object is a sequence of Unicode code points. A code point is not
26 * one to one with a character. A single character can be represented with
27 * multilpe code points. In addition, a code point can also represent
28 * formatting or display inforamtion.
29 *
30 * Internally all data is stored in UTF-16, which is a fair compromise for
31 * mose text. All characters from all modern natural languages fit within
32 * the Basic Multilingual Plane, which requires only a single 16 bit value
33 * to represent it. However, when iterating through or addressing data
34 * in the Text object all work is done on a code point basis.
35 */
36 class Text
37 {
38 public:
39 class iterator;
40 class const_iterator;
41 typedef uint32_t CodePoint;
42
43 public:
44 Text();
45 Text( const Text &rSrc );
46 virtual ~Text();
47
48 bool isEmpty() const;
49 bool isBmpOnly() const;
50 int32_t getSize() const;
51 int32_t getSizeInBytes() const;
52
53 uint16_t *getRawData() const;
54// Text transform( (CodePoint *)(*pCallback)( CodePoint * ) );
55
56 private:
57 uint16_t *pData;
58 int32_t iSize;
59 int32_t iCodePoints;
60 };
61 typedef Text::CodePoint CodePoint;
62}
63
64#endif