Line data Source code
1 : /* src/toolbox/utf16_transform.inc - implementation of utf16 encoder
2 :
3 : Copyright (C) 1996-2013
4 : CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 :
6 : This file is part of CACAO.
7 :
8 : This program is free software; you can redistribute it and/or
9 : modify it under the terms of the GNU General Public License as
10 : published by the Free Software Foundation; either version 2, or (at
11 : your option) any later version.
12 :
13 : This program is distributed in the hope that it will be useful, but
14 : WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : General Public License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with this program; if not, write to the Free Software
20 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 : 02110-1301, USA.
22 :
23 : */
24 :
25 : #ifndef UTF16_TRANSFORM_INC
26 : #define UTF16_TRANSFORM_INC 1
27 :
28 : namespace utf16 {
29 :
30 : namespace impl {
31 : struct CopyUtf16ToUtf8 : utf8::VisitorBase<void, utf8::IGNORE_ERRORS> {
32 : typedef void ReturnType;
33 :
34 1997 : CopyUtf16ToUtf8(char *dst) : dst(dst) {}
35 :
36 161440 : void utf8(uint8_t c) { *dst++ = c; }
37 :
38 1997 : void finish() { *dst = '\0'; }
39 : private:
40 : char *dst;
41 : };
42 : } // end namespace impl
43 :
44 : template<typename Iterator, typename Fn>
45 26881 : inline typename Fn::ReturnType transform(Iterator it, Iterator end, Fn fn) {
46 811447 : for (; it != end; ++it) {
47 784566 : uint16_t c = *it;
48 :
49 784566 : fn.utf16(c);
50 :
51 1569132 : if ((c != 0) && (c < 0x80)) {
52 : // 1 character
53 784566 : fn.utf8((char) c);
54 0 : } else if (c < 0x800) {
55 : // 2 characters
56 0 : unsigned char high = c >> 6;
57 0 : unsigned char low = c & 0x3F;
58 :
59 0 : fn.utf8(high | 0xC0);
60 0 : fn.utf8(low | 0x80);
61 : } else {
62 : // 3 characters
63 0 : char low = c & 0x3f;
64 0 : char mid = (c >> 6) & 0x3F;
65 0 : char high = c >> 12;
66 :
67 0 : fn.utf8(high | 0xE0);
68 0 : fn.utf8(mid | 0x80);
69 0 : fn.utf8(low | 0x80);
70 : }
71 : }
72 :
73 26881 : return fn.finish();
74 : }
75 :
76 :
77 : template<typename Utf16Iterator>
78 1997 : inline void encode(Utf16Iterator begin, Utf16Iterator end, char *dst) {
79 1997 : ::utf16::transform(begin, end, ::utf16::impl::CopyUtf16ToUtf8(dst));
80 1997 : }
81 :
82 : } // end namespace utf16
83 :
84 :
85 : #endif // UTF16_TRANSFORM_INC
86 :
87 : /*
88 : * These are local overrides for various environment variables in Emacs.
89 : * Please do not remove this and leave it at the end of the file, where
90 : * Emacs will automagically detect them.
91 : * ---------------------------------------------------------------------
92 : * Local variables:
93 : * mode: c++
94 : * indent-tabs-mode: t
95 : * c-basic-offset: 4
96 : * tab-width: 4
97 : * End:
98 : * vim:noexpandtab:sw=4:ts=4:
99 : */
|