CACAO
utf16_transform.inc
Go to the documentation of this file.
1 /* src/toolbox/utf16_transform.inc - implementation of utf16 encoder
2 
3  Copyright (C) 1996-2013
4  CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 
6  This file is part of CACAO.
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2, or (at
11  your option) any later version.
12 
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21  02110-1301, USA.
22 
23 */
24 
25 #ifndef UTF16_TRANSFORM_INC
26 #define UTF16_TRANSFORM_INC 1
27 
28 namespace utf16 {
29 
30 namespace impl {
31  struct CopyUtf16ToUtf8 : utf8::VisitorBase<void, utf8::IGNORE_ERRORS> {
32  typedef void ReturnType;
33 
34  CopyUtf16ToUtf8(char *dst) : dst(dst) {}
35 
36  void utf8(uint8_t c) { *dst++ = c; }
37 
38  void finish() { *dst = '\0'; }
39  private:
40  char *dst;
41  };
42 } // end namespace impl
43 
44  template<typename Iterator, typename Fn>
45  inline typename Fn::ReturnType transform(Iterator it, Iterator end, Fn fn) {
46  for (; it != end; ++it) {
47  uint16_t c = *it;
48 
49  fn.utf16(c);
50 
51  if ((c != 0) && (c < 0x80)) {
52  // 1 character
53  fn.utf8((char) c);
54  } else if (c < 0x800) {
55  // 2 characters
56  unsigned char high = c >> 6;
57  unsigned char low = c & 0x3F;
58 
59  fn.utf8(high | 0xC0);
60  fn.utf8(low | 0x80);
61  } else {
62  // 3 characters
63  char low = c & 0x3f;
64  char mid = (c >> 6) & 0x3F;
65  char high = c >> 12;
66 
67  fn.utf8(high | 0xE0);
68  fn.utf8(mid | 0x80);
69  fn.utf8(low | 0x80);
70  }
71  }
72 
73  return fn.finish();
74  }
75 
76 
77  template<typename Utf16Iterator>
78  inline void encode(Utf16Iterator begin, Utf16Iterator end, char *dst) {
79  ::utf16::transform(begin, end, ::utf16::impl::CopyUtf16ToUtf8(dst));
80  }
81 
82 } // end namespace utf16
83 
84 
85 #endif // UTF16_TRANSFORM_INC
86 
87 /*
88  * These are local overrides for various environment variables in Emacs.
89  * Please do not remove this and leave it at the end of the file, where
90  * Emacs will automagically detect them.
91  * ---------------------------------------------------------------------
92  * Local variables:
93  * mode: c++
94  * indent-tabs-mode: t
95  * c-basic-offset: 4
96  * tab-width: 4
97  * End:
98  * vim:noexpandtab:sw=4:ts=4:
99  */
Fn::ReturnType transform(Iterator begin, Iterator end, Fn)
void encode(Utf16Iterator begin, Utf16Iterator end, char *dst)
Fn::ReturnType transform(Iterator begin, Iterator end, Fn)