aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2019-07-05 10:26:37 -0700
committerMike Buland <eichlan@xagasoft.com>2019-07-05 10:26:37 -0700
commit9cdf227b84a18141de527a0ad85344a20914b974 (patch)
treec1cb3d0e15b8ce96c0d680e2ce8360e3e452fbfd
parent14683979c43e17393dc4f902fe65ed22898b2bce (diff)
downloadlibbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.gz
libbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.bz2
libbu++-9cdf227b84a18141de527a0ad85344a20914b974.tar.xz
libbu++-9cdf227b84a18141de527a0ad85344a20914b974.zip
Started work on Text and friends.
Text processing isn't trivial, and I want this iteration to be significantly more robust. This time I/O will be seperated out into codecs that will handle the encoding/decoding to/from different formats.
-rw-r--r--src/unstable/text.cpp25
-rw-r--r--src/unstable/text.h64
-rw-r--r--src/unstable/textcodec.cpp0
-rw-r--r--src/unstable/textcodec.h0
-rw-r--r--src/unstable/textcodecutf8.cpp0
-rw-r--r--src/unstable/textcodecutf8.h0
6 files changed, 89 insertions, 0 deletions
diff --git a/src/unstable/text.cpp b/src/unstable/text.cpp
index 73271f8..9e5670d 100644
--- a/src/unstable/text.cpp
+++ b/src/unstable/text.cpp
@@ -5,4 +5,29 @@
5 * terms of the license contained in the file LICENSE. 5 * terms of the license contained in the file LICENSE.
6 */ 6 */
7 7
8#include "bu/text.h"
9#include <string.h>
10
11Bu::Text::Text() :
12 pData( NULL ),
13 iSize( 0 ),
14 iCodePoints( 0 )
15{
16}
17
18Bu::Text::Text( const Text &rSrc ) :
19 pData( NULL ),
20 iSize( rSrc.iSize ),
21 iCodePoints( rSrc.iCodePoints )
22{
23 pData = new uint16_t[iSize];
24 memcpy( pData, rSrc.pData, sizeof(uint16_t)*iSize );
25}
26
27Bu::Text::~Text()
28{
29 delete[] pData;
30 pData = NULL;
31}
32
8 33
diff --git a/src/unstable/text.h b/src/unstable/text.h
index e69de29..1d623ff 100644
--- a/src/unstable/text.h
+++ b/src/unstable/text.h
@@ -0,0 +1,64 @@
1/*
2 * Copyright (C) 2007-2019 Xagasoft, All rights reserved.
3 *
4 * This file is part of the libbu++ library and is released under the
5 * terms of the license contained in the file LICENSE.
6 */
7
8#ifndef BU_TEXT_H
9#define BU_TEXT_H
10
11#include "bu/config.h"
12
13namespace Bu
14{
15 /**
16 * Represents a string of text. Human readable language. This should be
17 * used any time you're dealing with actual text and not just binary
18 * data. If you neeed to transport raw binary data then consider using
19 * Blob and BlobBuilder.
20 *
21 * Text objects should be considered immutable. If you need to construct
22 * a Text object dynamically take a look at TextBuilder, and for
23 * serialization take a look at TextStream.
24 *
25 * A Text object is a sequence of Unicode code points. A code point is not
26 * one to one with a character. A single character can be represented with
27 * multilpe code points. In addition, a code point can also represent
28 * formatting or display inforamtion.
29 *
30 * Internally all data is stored in UTF-16, which is a fair compromise for
31 * mose text. All characters from all modern natural languages fit within
32 * the Basic Multilingual Plane, which requires only a single 16 bit value
33 * to represent it. However, when iterating through or addressing data
34 * in the Text object all work is done on a code point basis.
35 */
36 class Text
37 {
38 public:
39 class iterator;
40 class const_iterator;
41 typedef uint32_t CodePoint;
42
43 public:
44 Text();
45 Text( const Text &rSrc );
46 virtual ~Text();
47
48 bool isEmpty() const;
49 bool isBmpOnly() const;
50 int32_t getSize() const;
51 int32_t getSizeInBytes() const;
52
53 uint16_t *getRawData() const;
54// Text transform( (CodePoint *)(*pCallback)( CodePoint * ) );
55
56 private:
57 uint16_t *pData;
58 int32_t iSize;
59 int32_t iCodePoints;
60 };
61 typedef Text::CodePoint CodePoint;
62}
63
64#endif
diff --git a/src/unstable/textcodec.cpp b/src/unstable/textcodec.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/unstable/textcodec.cpp
diff --git a/src/unstable/textcodec.h b/src/unstable/textcodec.h
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/unstable/textcodec.h
diff --git a/src/unstable/textcodecutf8.cpp b/src/unstable/textcodecutf8.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/unstable/textcodecutf8.cpp
diff --git a/src/unstable/textcodecutf8.h b/src/unstable/textcodecutf8.h
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/unstable/textcodecutf8.h