CACAO
utf8.cpp
Go to the documentation of this file.
1 /* src/vm/utf8.cpp - utf8 string functions
2 
3  Copyright (C) 1996-2014
4  CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 
6  This file is part of CACAO.
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2, or (at
11  your option) any later version.
12 
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21  02110-1301, USA.
22 
23 */
24 
25 #include "vm/utf8.hpp"
26 #include <algorithm> // for std::equal
27 #include "mm/memory.hpp" // for mem_alloc, mem_free
28 #include "toolbox/logging.hpp" // for OStream
29 #include "toolbox/intern_table.hpp" // for InternTable
30 #include "toolbox/utf_utils.hpp" // for transform, Tag, etc
31 #include "vm/options.hpp"
32 #include "vm/statistics.hpp"
33 #include "toolbox/assert.hpp"
34 
35 using namespace cacao;
36 
37 STAT_REGISTER_VAR(int,count_utf_new,0,"utf new","Calls of utf_new")
38 STAT_DECLARE_VAR(int,count_utf_len,0)
39 
40 //****************************************************************************//
41 //***** GLOBAL UTF8-STRING INTERN TABLE *****//
42 //****************************************************************************//
43 
44 // used to for tag dispatch
45 struct utf8_tag {};
46 struct utf16_tag {};
47 
49  InternedUtf8String() : string(0) {}
50  InternedUtf8String(Utf8String u) : string(u) {}
51 
52  /// Interface to HashTable
53 
54  bool is_empty() const { return string == ((utf*) 0); }
55  bool is_occupied() const { return string != ((utf*) 0); }
56  bool is_deleted() const { return false; }
57 
58  template<typename T>
59  void set_occupied(const T& t) { string = t.get_string(); }
60 
61 // template<typename Iterator>
62 // bool operator==(const FromUtf16Builder<Iterator>& t) const;
63 
64  template<typename T>
65  bool operator==(const T& t) const {
66  return equal(t.hash(), t.size(), t.begin(), t.tag());
67  }
68 
69  template<typename Iterator>
70  bool equal(size_t _hash, size_t _size, Iterator it, utf8_tag) const {
71  return hash() == _hash
72  && size() == _size
73  && std::equal(it, it + _size, begin());
74  }
75 
76  template<typename Iterator>
77  bool equal(size_t _hash, size_t _size, Iterator it, utf16_tag) const {
78  return hash() == _hash
79  && utf16_size() == _size
80  && std::equal(it, it + _size, utf16_begin());
81  }
82 
83  /// used by operator==
84 
85  utf8_tag tag() const { return utf8_tag(); }
86 
87  size_t hash() const { return string.hash(); }
88  size_t size() const { return string.size(); }
89 
90  size_t utf16_size() const { return string.utf16_size(); }
91 
92  Utf8String::byte_iterator begin() const { return string.begin(); }
93  Utf8String::byte_iterator end() const { return string.end(); }
94 
95  Utf8String::utf16_iterator utf16_begin() const { return string.utf16_begin(); }
96  Utf8String::utf16_iterator utf16_end() const { return string.utf16_end(); }
97 
98  /// used by set_occupied
99 
100  Utf8String get_string() const { return string; }
101 private:
103 };
104 
106 
107 // initial size of intern table
108 #define HASHTABLE_UTF_SIZE 16384
109 
111 {
112  TRACESUBSYSTEMINITIALIZATION("utf8_init");
113 
114  assert(!is_initialized());
115 
116  intern_table.initialize(HASHTABLE_UTF_SIZE);
117 
118  STATISTICS(count_utf_len += sizeof(utf*) * HASHTABLE_UTF_SIZE);
119 
120  // create utf-symbols for pointer comparison of frequently used strings
121 
122 #define UTF8(NAME, STR) utf8::NAME = Utf8String::from_utf8(STR);
123 #include "vm/utf8.inc"
124 }
125 
126 
127 /* Utf8String::initialize ******************************************************
128 
129  Check if utf8 subsytem is initialized
130 
131 *******************************************************************************/
132 
134 {
135  return intern_table.is_initialized();
136 }
137 
138 //****************************************************************************//
139 //***** INTERNAL DATA REPRESENTATION *****//
140 //****************************************************************************//
141 
142 /// allocate a Utf8String with given hash and size
143 /// You still have to fill in the strings text!
145  size_t utf8_size,
146  size_t utf16_size) {
147  Data* str = (Data*) mem_alloc(offsetof(Data,text) + utf8_size + 1);
148 
149  STATISTICS(count_utf_new++);
150 
151  str->hash = hash;
152  str->utf8_size = utf8_size;
153  str->utf16_size = utf16_size;
154 
155  return str;
156 }
157 
158 
159 //****************************************************************************//
160 //***** HASHING *****//
161 //****************************************************************************//
162 
163 /* init/update/finish_hash *****************************************************
164 
165  These routines are used to compute the hash for a utf-8 string byte by byte.
166 
167  Use like this:
168  size_t hash = 0;
169 
170  for each byte in string:
171  hash = update_hash( hash, byte );
172 
173  hash = finish_hash(hash);
174 
175  The algorithm is the "One-at-a-time" algorithm as published
176  by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
177 
178 *******************************************************************************/
179 
180 static inline size_t update_hash(size_t hash, uint8_t byte)
181 {
182  hash += byte;
183  hash += (hash << 10);
184  hash ^= (hash >> 6);
185 
186  return hash;
187 }
188 
189 static inline size_t finish_hash(size_t hash)
190 {
191  hash += (hash << 3);
192  hash ^= (hash >> 11);
193  hash += (hash << 15);
194 
195  return hash;
196 }
197 
198 
199 //****************************************************************************//
200 //***** UTF-8 STRING *****//
201 //****************************************************************************//
202 
203 // create & intern string
204 
205 // Builds a new utf8 string.
206 // Only allocates a new string if the string was not already intern_table.
207 template<typename Iterator>
208 struct FromUtf8Builder : utf8::VisitorBase<Utf8String, utf8::ABORT_ON_ERROR> {
209  FromUtf8Builder(Iterator text, size_t utf8_size)
210  : _hash(0), _utf8_size(utf8_size), _utf16_size(0), _text(text) {}
211 
212  /// interface to utf8::transform
213 
215 
216  void utf8 (uint8_t c) {
217  _hash = update_hash(_hash, c);
218  }
219 
220  void utf16(uint16_t c) {
221  _utf16_size++;
222  }
223 
225  _hash = finish_hash(_hash);
226 
227  return intern_table.intern(*this).get_string();
228  }
229 
231  return 0;
232  }
233 
234  /// interface to HashTable
235 
236  size_t hash() const { return _hash; }
237 
238  /// interface to InternTableEntry
239 
240  utf8_tag tag() const { return utf8_tag(); }
241 
242  Iterator begin() const { return _text; }
243 
244  size_t size() const { return _utf8_size; }
245 
247  Utf8String::Data *u = Utf8String::alloc(_hash, _utf8_size, _utf16_size);
248  char *cs = u->text;
249 
250  cs = std::copy(_text, _text + _utf8_size, cs);
251  *cs = '\0';
252 
253  return (utf*) u;
254  }
255 private:
256  size_t _hash;
257  size_t _utf8_size;
258  size_t _utf16_size;
259  Iterator _text;
260 };
261 
262 
263 // Builds a new utf8 string from an utf16 string.
264 // Only allocates a new string if the string was not already intern_table.
265 template<typename Iterator>
266 struct FromUtf16Builder : utf8::VisitorBase<Utf8String, utf8::ABORT_ON_ERROR> {
267  FromUtf16Builder(Iterator text, size_t utf16_size)
268  : _hash(0), _utf8_size(0), _utf16_size(utf16_size), _text(text) {}
269 
270  /// interface to utf8::transform
271 
273 
274  void utf8 (uint8_t c) {
275  _hash = update_hash(_hash, c);
276  _utf8_size++;
277  }
278 
280  _hash = finish_hash(_hash);
281 
282  return intern_table.intern(*this).get_string();
283  }
284 
286  return 0;
287  }
288 
289  /// interface to HashTable
290 
291  size_t hash() const { return _hash; }
292 
293  /// interface to InternTableEntry
294 
295  utf16_tag tag() const { return utf16_tag(); }
296 
297  Iterator begin() const { return _text; }
298 
299  size_t size() const { return _utf16_size; }
300 
302  Utf8String::Data *u = Utf8String::alloc(_hash, _utf8_size, _utf16_size);
303  char *cs = u->text;
304 
305  utf16::encode(_text, _text + _utf16_size, cs);
306  cs[_utf8_size] = '\0';
307 
308  return (utf*) u;
309  }
310 private:
311  size_t _hash;
312  size_t _utf8_size;
313  size_t _utf16_size;
314  Iterator _text;
315 };
316 
317 
318 template<typename Iterator>
319 static inline Utf8String string_from_utf8(const char *cs, size_t size) {
320  Iterator begin = cs;
321  Iterator end = cs + size;
322 
323  return utf8::transform(begin, end, FromUtf8Builder<Iterator>(begin, size));
324 }
325 
326 template<typename Iterator>
327 static inline Utf8String string_from_utf16(const uint16_t *cs, size_t size) {
328  Iterator begin = cs;
329  Iterator end = cs + size;
330 
331  return utf16::transform(begin, end, FromUtf16Builder<Iterator>(begin, size));
332 }
333 
334 
335 Utf8String Utf8String::from_utf8(const char *cs, size_t sz) {
336  return string_from_utf8<const char*>(cs, sz);
337 }
338 
339 Utf8String Utf8String::from_utf8_dot_to_slash(const char *cs, size_t sz) {
340  return string_from_utf8<utf8::DotToSlash>(cs, sz);
341 }
342 
343 Utf8String Utf8String::from_utf8_slash_to_dot(const char *cs, size_t sz) {
344  return string_from_utf8<utf8::SlashToDot>(cs, sz);
345 }
346 
348  return string_from_utf8<utf8::SlashToDot>(u.begin(), u.size());
349 }
350 
351 Utf8String Utf8String::from_utf16(const uint16_t *cs, size_t sz) {
352  return string_from_utf16<const uint16_t*>(cs, sz);
353 }
354 
355 Utf8String Utf8String::from_utf16_dot_to_slash(const uint16_t *cs, size_t sz) {
356  return string_from_utf16<utf16::DotToSlash>(cs, sz);
357 }
358 
359 /* Utf8String::utf16_iterator **************************************************
360 
361  A forward iterator over the utf16 codepoints in a Utf8String
362 
363 *******************************************************************************/
364 
366 {
367  return utf8::decode_char(next);
368 }
369 
370 
371 /* Utf8String::substring *******************************************************
372 
373  Access last element, accessing a null or empty string leads to
374  undefined behaviour
375 
376 *******************************************************************************/
377 
379 {
380  return substring(from, size());
381 }
382 
383 Utf8String Utf8String::substring(size_t from, size_t to) const
384 {
385  EXPENSIVE_ASSERT(_data);
386  EXPENSIVE_ASSERT(from > 0);
387  EXPENSIVE_ASSERT(from <= to);
388  EXPENSIVE_ASSERT(to <= size());
389 
390  return Utf8String::from_utf8(begin() + from, to - from);
391 }
392 
394  Utf8String::byte_iterator it = this->begin();
395  Utf8String::byte_iterator end = this->end();
396 
397  for (; it != end; it++) {
398  unsigned char c = *it;
399 
400  if (c < 0x20)
401  return false; // disallow control characters
402  if (c == 0xc0 && ((unsigned char) it[1]) == 0x80)
403  return false; // disallow zero
404  }
405 
406  return true;
407 }
408 
409 //****************************************************************************//
410 //***** PUBLIC UTF-8 FUNCTIONS *****//
411 //****************************************************************************//
412 
413 /* Utf8String::initialize ******************************************************
414 
415  Initializes the utf8 subsystem.
416 
417 *******************************************************************************/
418 
419 /* utf8::num_codepoints ********************************************************
420 
421  Count number of UTF-16 code points in UTF-8 string.
422 
423  Returns -1 on error
424 
425 *******************************************************************************/
426 
427 struct SafeCodePointCounter : utf8::VisitorBase<long, utf8::ABORT_ON_ERROR> {
428  typedef long ReturnType;
429 
430  SafeCodePointCounter() : count(0) {}
431 
432  void utf16(uint16_t) { count++; }
433 
434  long finish() { return count; }
435  long abort() { return -1; }
436 private:
437  long count;
438 };
439 
440 long utf8::num_codepoints(const char *cs, size_t sz) {
441  return utf8::transform(cs, cs + sz, SafeCodePointCounter());
442 }
443 
444 /* utf8::num_bytes *************************************************************
445 
446  Calculate how many bytes a UTF-8 encoded version of a UTF-16 string
447  would need.
448 
449 *******************************************************************************/
450 
451 struct ByteCounter : utf8::VisitorBase<size_t, utf8::IGNORE_ERRORS> {
452  typedef size_t ReturnType;
453 
454  ByteCounter() : count(0) {}
455 
456  void utf8(uint8_t) { count++; }
457 
458  size_t finish() { return count; }
459 private:
460  size_t count;
461 };
462 
463 size_t utf8::num_bytes(const uint16_t *cs, size_t sz)
464 {
465  return utf16::transform(cs, cs + sz, ByteCounter());
466 }
467 
468 
469 /***
470  * Compute the hash of a UTF-16 string.
471  * The hash will be the same as for the UTF-8 encoded version of this string
472  */
473 struct Utf16Hasher : utf16::VisitorBase<size_t> {
474  typedef size_t ReturnType;
475 
476  Utf16Hasher() : hash(0) {}
477 
478  void utf8(uint8_t c) {
479  hash = update_hash(hash, c);
480  }
481 
482  size_t finish() { return finish_hash(hash); }
483 private:
484  size_t hash;
485 };
486 
487 size_t utf8::compute_hash(const uint16_t *cs, size_t sz) {
488  return utf16::transform(cs, cs + sz, Utf16Hasher());
489 }
490 
491 
492 //****************************************************************************//
493 //***** GLOBAL UTF8-STRING CONSTANTS *****//
494 //****************************************************************************//
495 
496 #define UTF8( NAME, STR ) Utf8String utf8::NAME;
497 #include "vm/utf8.inc"
498 
499 ////////////////////////////////////////////////////////////////////////////////
500 ////////////////////////////////////////////////////////////////////////////////
501 // LEGACY C API
502 ////////////////////////////////////////////////////////////////////////////////
503 ////////////////////////////////////////////////////////////////////////////////
504 
505 extern const char *utf8_text(utf *u) { return Utf8String(u).begin(); }
506 extern const char *utf8_end (utf *u) { return Utf8String(u).end(); }
507 
508 extern size_t utf8_size(utf *u) { return Utf8String(u).size(); }
509 extern size_t utf8_hash(utf *u) { return Utf8String(u).hash(); }
510 
511 /* utf_display_printable_ascii *************************************************
512 
513  Write utf symbol to stdout (for debugging purposes).
514  Non-printable and non-ASCII characters are printed as '?'.
515 
516 *******************************************************************************/
517 
518 struct DisplayPrintableAscii : utf8::VisitorBase<void, utf8::IGNORE_ERRORS> {
519  typedef void ReturnType;
520 
521  DisplayPrintableAscii(FILE *dst) : _dst(dst) {}
522 
523  void utf8(uint8_t c) {
524  fputc((c >= 32 && c <= 127) ? c : '?', _dst);
525  }
526 
527  void finish() {fflush(_dst);}
528 private:
529  FILE *_dst;
530 };
531 
533 {
534  if (u == NULL) {
535  printf("NULL");
536  fflush(stdout);
537  return;
538  }
539 
541 }
542 
543 
544 /* utf_display_printable_ascii_classname ***************************************
545 
546  Write utf symbol to stdout with `/' converted to `.' (for debugging
547  purposes).
548  Non-printable and non-ASCII characters are printed as '?'.
549 
550 *******************************************************************************/
551 
553 {
554  if (u == NULL) {
555  printf("NULL");
556  fflush(stdout);
557  return;
558  }
559 
561 }
562 
563 
564 /* utf_sprint_convert_to_latin1 ************************************************
565 
566  Write utf symbol into c-string (for debugging purposes).
567  Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
568  invalid results.
569 
570 *******************************************************************************/
571 
572 struct SprintConvertToLatin1 : utf8::VisitorBase<void, utf8::IGNORE_ERRORS> {
573  typedef void ReturnType;
574 
575  SprintConvertToLatin1(char* dst) : _dst(dst) {}
576 
577  void utf16(uint16_t c) { *_dst++ = c; }
578 
579  void finish() { *_dst = '\0'; }
580 private:
581  char *_dst;
582 };
583 
585 {
586  if (!u) {
587  strcpy(buffer, "NULL");
588  return;
589  }
590 
592 }
593 
594 
595 /* utf_sprint_convert_to_latin1_classname **************************************
596 
597  Write utf symbol into c-string with `/' converted to `.' (for debugging
598  purposes).
599  Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
600  invalid results.
601 
602 *******************************************************************************/
603 
605 {
606  if (!u) {
607  strcpy(buffer, "NULL");
608  return;
609  }
610 
612 }
613 
614 
615 /* utf_strcat_convert_to_latin1 ************************************************
616 
617  Like libc strcat, but uses an utf8 string.
618  Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
619  invalid results.
620 
621 *******************************************************************************/
622 
623 void utf_strcat_convert_to_latin1(char *buffer, utf *u)
624 {
625  utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
626 }
627 
628 
629 /* utf_strcat_convert_to_latin1_classname **************************************
630 
631  Like libc strcat, but uses an utf8 string.
632  Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
633  invalid results.
634 
635 *******************************************************************************/
636 
638 {
639  utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
640 }
641 
642 
643 /* utf_fprint_printable_ascii **************************************************
644 
645  Write utf symbol into file.
646  Non-printable and non-ASCII characters are printed as '?'.
647 
648 *******************************************************************************/
649 
651 {
652  if (!u) return;
653 
655 }
656 
657 
658 /* utf_fprint_printable_ascii_classname ****************************************
659 
660  Write utf symbol into file with `/' converted to `.'.
661  Non-printable and non-ASCII characters are printed as '?'.
662 
663 *******************************************************************************/
664 
666 {
667  if (!u) return;
668 
670 }
671 
672 const size_t Utf8String::sizeof_utf = sizeof(Utf8String::Data);
673 
674 namespace cacao {
675 
676 // OStream operators
678  return os << (u ? u.begin() : "(nil)");
679 }
680 
681 } // end namespace cacao
682 
683 /*
684  * These are local overrides for various environment variables in Emacs.
685  * Please do not remove this and leave it at the end of the file, where
686  * Emacs will automagically detect them.
687  * ---------------------------------------------------------------------
688  * Local variables:
689  * mode: c++
690  * indent-tabs-mode: t
691  * c-basic-offset: 4
692  * tab-width: 4
693  * End:
694  * vim:noexpandtab:sw=4:ts=4:
695  */
void utf_strcat_convert_to_latin1_classname(char *buffer, Utf8String u)
Definition: utf8.cpp:637
#define hash(_i1, _i2)
Definition: peephole.c:55
size_t utf16_size
Definition: utf8.hpp:187
utf_utils::Range< SlashToDot > slash_to_dot(T t)
Definition: utf_utils.hpp:217
uint16_t decode_char(const char *&)
Utf8String substring(size_t from) const
Definition: utf8.cpp:378
#define STATISTICS(x)
Wrapper for statistics only code.
Definition: statistics.hpp:975
static const size_t sizeof_utf
Definition: utf8.hpp:181
FromUtf8Builder(Iterator text, size_t utf8_size)
Definition: utf8.cpp:209
size_t utf8_hash(utf *u)
Definition: utf8.cpp:509
Utf8String finish()
Definition: utf8.cpp:279
utf8_tag tag() const
used by operator==
Definition: utf8.cpp:85
Utf8String finish()
Definition: utf8.cpp:224
Definition: os.hpp:123
static Utf8String from_utf8_dot_to_slash(const char *, size_t)
Definition: utf8.cpp:339
Fn::ReturnType transform(Iterator begin, Iterator end, Fn)
void utf8(uint8_t c)
Definition: utf8.cpp:274
argument_type from
static Data * alloc(size_t hash, size_t utf8_size, size_t utf16_size)
allocate a Utf8String with given hash and size You still have to fill in the strings text! ...
Definition: utf8.cpp:144
static Utf8String from_utf8_slash_to_dot(const char *, size_t)
Definition: utf8.cpp:343
size_t compute_hash(const uint16_t *cs, size_t)
Definition: utf8.cpp:487
byte_iterator end() const
Definition: utf8.hpp:107
void utf_display_printable_ascii_classname(Utf8String u)
Definition: utf8.cpp:552
size_t size() const
Definition: utf8.hpp:161
Utf8String::utf16_iterator utf16_begin() const
Definition: utf8.cpp:95
static void initialize()
Definition: utf8.cpp:110
void utf8(uint8_t c)
Definition: utf8.cpp:216
void set_occupied(const T &t)
Definition: utf8.cpp:59
size_t size() const
Definition: utf8.cpp:299
ByteCounter()
Definition: utf8.cpp:454
void encode(Utf16Iterator begin, Utf16Iterator end, char *dst)
size_t finish()
Definition: utf8.cpp:458
void utf_fprint_printable_ascii_classname(FILE *file, Utf8String u)
Definition: utf8.cpp:665
void utf8(uint8_t c)
Definition: utf8.cpp:523
size_t ReturnType
Definition: utf8.cpp:452
static Utf8String from_utf16_dot_to_slash(const uint16_t *, size_t)
Definition: utf8.cpp:355
Utf8String::utf16_iterator utf16_end() const
Definition: utf8.cpp:96
size_t size() const
Definition: utf8.cpp:88
void utf8(uint8_t c)
Definition: utf8.cpp:478
size_t hash() const
Definition: utf8.hpp:137
Utf8String::byte_iterator begin() const
Definition: utf8.cpp:92
size_t _utf16_size
Definition: utf8.cpp:313
utf16_tag tag() const
interface to InternTableEntry
Definition: utf8.cpp:295
JNIEnv jthread jobject jclass jlong size
Definition: jvmti.h:387
size_t _utf8_size
Definition: utf8.cpp:257
#define TRACESUBSYSTEMINITIALIZATION(text)
Definition: options.hpp:257
void utf_strcat_convert_to_latin1(char *buffer, utf *u)
Definition: utf8.cpp:623
Iterator _text
Definition: utf8.cpp:259
void utf_sprint_convert_to_latin1_classname(char *buffer, Utf8String u)
Definition: utf8.cpp:604
size_t _hash
Definition: utf8.cpp:256
bool operator==(const T &t) const
Definition: utf8.cpp:65
const char * byte_iterator
Definition: utf8.hpp:104
Iterator begin() const
Definition: utf8.cpp:297
Utf16Hasher()
Definition: utf8.cpp:476
size_t hash
Definition: utf8.hpp:185
size_t hash() const
interface to HashTable
Definition: utf8.cpp:236
size_t finish()
Definition: utf8.cpp:482
size_t hash() const
interface to HashTable
Definition: utf8.cpp:291
FromUtf16Builder(Iterator text, size_t utf16_size)
Definition: utf8.cpp:267
utf8_tag tag() const
interface to InternTableEntry
Definition: utf8.cpp:240
#define HASHTABLE_UTF_SIZE
Definition: utf8.cpp:108
This file contains the statistics framework.
static bool is_initialized()
Definition: utf8.cpp:133
const char * utf8_text(utf *u)
Definition: utf8.cpp:505
static Utf8String string_from_utf16(const uint16_t *cs, size_t size)
Definition: utf8.cpp:327
Simple stream class for formatted output.
Definition: OStream.hpp:141
bool is_valid_name() const
Definition: utf8.cpp:393
Utf8String abort()
Definition: utf8.cpp:285
void utf8(uint8_t)
Definition: utf8.cpp:456
Utf8String ReturnType
interface to utf8::transform
Definition: utf8.cpp:214
size_t _hash
Definition: utf8.cpp:311
size_t size() const
Definition: utf8.cpp:244
size_t ReturnType
Definition: utf8.cpp:474
static Utf8String from_utf8(const char *, size_t)
Definition: utf8.cpp:335
void utf_sprint_convert_to_latin1(char *buffer, Utf8String u)
Definition: utf8.cpp:584
static InternTable< InternedUtf8String > intern_table
Definition: utf8.cpp:105
Utf8String get_string() const
used by set_occupied
Definition: utf8.cpp:100
size_t hash() const
Definition: utf8.cpp:87
Utf8String get_string() const
Definition: utf8.cpp:246
Utf8String ReturnType
interface to utf8::transform
Definition: utf8.cpp:272
Utf8String get_string() const
Definition: utf8.cpp:301
SprintConvertToLatin1(char *dst)
Definition: utf8.cpp:575
bool equal(size_t _hash, size_t _size, Iterator it, utf8_tag) const
Definition: utf8.cpp:70
DisplayPrintableAscii(FILE *dst)
Definition: utf8.cpp:521
Utf8String string
Definition: utf8.cpp:102
OStream & operator<<(OStream &OS, const std::string &t)
Definition: OStream.hpp:459
#define EXPENSIVE_ASSERT(EXPR)
An assertion that performs computations too expensive even for a normal debug build.
Definition: assert.hpp:90
void utf_fprint_printable_ascii(FILE *file, Utf8String u)
Definition: utf8.cpp:650
void utf_display_printable_ascii(Utf8String u)
Definition: utf8.cpp:532
static Utf8String from_utf16(const uint16_t *, size_t)
Definition: utf8.cpp:351
Iterator begin() const
Definition: utf8.cpp:242
long num_codepoints(const char *, size_t)
Definition: utf8.cpp:440
byte_iterator begin() const
Definition: utf8.hpp:106
static size_t finish_hash(size_t hash)
Definition: utf8.cpp:189
Utf8String abort()
Definition: utf8.cpp:230
Additional assertion macros.
Fn::ReturnType transform(Iterator begin, Iterator end, Fn)
bool is_occupied() const
Definition: utf8.cpp:55
size_t count
Definition: utf8.cpp:460
size_t hash
Definition: utf8.cpp:484
InternedUtf8String(Utf8String u)
Definition: utf8.cpp:50
static size_t update_hash(size_t hash, uint8_t byte)
Definition: utf8.cpp:180
bool equal(size_t _hash, size_t _size, Iterator it, utf16_tag) const
Definition: utf8.cpp:77
bool is_deleted() const
Definition: utf8.cpp:56
static java_object_t * next
Definition: copy.c:43
size_t _utf8_size
Definition: utf8.cpp:312
Iterator _text
Definition: utf8.cpp:314
#define str(x)
size_t utf16_size() const
Definition: utf8.cpp:90
size_t utf8_size(utf *u)
Definition: utf8.cpp:508
#define STAT_REGISTER_VAR(type, var, init, name, description)
Register an external statistics variable.
Definition: statistics.hpp:966
#define STAT_DECLARE_VAR(type, var, init)
Declare an external statistics variable.
Definition: statistics.hpp:963
bool is_empty() const
Interface to HashTable.
Definition: utf8.cpp:54
void utf16(uint16_t c)
Definition: utf8.cpp:220
void * mem_alloc(int32_t size)
Definition: memory.cpp:86
char text[sizeof(void *)]
Definition: utf8.hpp:189
#define printf(...)
Definition: ssa2.cpp:40
const char * utf8_end(utf *u)
Definition: utf8.cpp:506
size_t utf8_size
Definition: utf8.hpp:186
Utf8String::byte_iterator end() const
Definition: utf8.cpp:93
size_t _utf16_size
Definition: utf8.cpp:258
void utf16(uint16_t c)
Definition: utf8.cpp:577
size_t num_bytes(const uint16_t *, size_t)
Definition: utf8.cpp:463
static Utf8String string_from_utf8(const char *cs, size_t size)
Definition: utf8.cpp:319
void utf16(uint16_t)
Definition: utf8.cpp:432