LCOV - code coverage report
Current view: top level - vm - utf8.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 127 187 67.9 %
Date: 2015-06-10 18:10:59 Functions: 97 127 76.4 %

          Line data    Source code
       1             : /* src/vm/utf8.cpp - utf8 string functions
       2             : 
       3             :    Copyright (C) 1996-2014
       4             :    CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
       5             : 
       6             :    This file is part of CACAO.
       7             : 
       8             :    This program is free software; you can redistribute it and/or
       9             :    modify it under the terms of the GNU General Public License as
      10             :    published by the Free Software Foundation; either version 2, or (at
      11             :    your option) any later version.
      12             : 
      13             :    This program is distributed in the hope that it will be useful, but
      14             :    WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :    General Public License for more details.
      17             : 
      18             :    You should have received a copy of the GNU General Public License
      19             :    along with this program; if not, write to the Free Software
      20             :    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
      21             :    02110-1301, USA.
      22             : 
      23             : */
      24             : 
      25             : #include "vm/utf8.hpp"
      26             : #include <algorithm>                    // for std::equal
      27             : #include "mm/memory.hpp"                // for mem_alloc, mem_free
      28             : #include "toolbox/logging.hpp"          // for OStream
      29             : #include "toolbox/intern_table.hpp"     // for InternTable
      30             : #include "toolbox/utf_utils.hpp"        // for transform, Tag, etc
      31             : #include "vm/options.hpp"
      32             : #include "vm/statistics.hpp"
      33             : #include "toolbox/assert.hpp"
      34             : 
      35             : using namespace cacao;
      36             : 
      37             : STAT_REGISTER_VAR(int,count_utf_new,0,"utf new","Calls of utf_new")
      38             : STAT_DECLARE_VAR(int,count_utf_len,0)
      39             : 
      40             : //****************************************************************************//
      41             : //*****          GLOBAL UTF8-STRING INTERN TABLE                         *****//
      42             : //****************************************************************************//
      43             : 
      44             : // used to for tag dispatch
      45             : struct utf8_tag  {};
      46             : struct utf16_tag {};
      47             : 
      48      114688 : struct InternedUtf8String {
      49     2899968 :         InternedUtf8String()             : string(0) {}
      50             :         InternedUtf8String(Utf8String u) : string(u) {}
      51             : 
      52             :         /// Interface to HashTable
      53             : 
      54    16540551 :         bool is_empty()    const { return string == ((utf*) 0); }
      55      114688 :         bool is_occupied() const { return string != ((utf*) 0); }
      56    12545787 :         bool is_deleted()  const { return false; }
      57             : 
      58             :         template<typename T>
      59     1994209 :         void set_occupied(const T& t) { string = t.get_string(); }
      60             : 
      61             : //      template<typename Iterator>
      62             : //      bool operator==(const FromUtf16Builder<Iterator>& t) const;
      63             : 
      64             :         template<typename T>
      65    12545787 :         bool operator==(const T& t) const {
      66    12545787 :                 return equal(t.hash(), t.size(), t.begin(), t.tag());
      67             :         }
      68             : 
      69             :         template<typename Iterator>
      70    12510102 :         bool equal(size_t _hash, size_t _size, Iterator it, utf8_tag) const {
      71             :                 return hash() == _hash
      72             :                     && size() == _size
      73    12510102 :                     && std::equal(it, it + _size, begin());
      74             :         }
      75             : 
      76             :         template<typename Iterator>
      77       35685 :         bool equal(size_t _hash, size_t _size, Iterator it, utf16_tag) const {
      78             :                 return hash()       == _hash
      79             :                     && utf16_size() == _size
      80       35685 :                     && std::equal(it, it + _size, utf16_begin());
      81             :         }
      82             : 
      83             :         /// used by operator==
      84             : 
      85             :         utf8_tag tag() const { return utf8_tag(); }
      86             : 
      87    12643255 :         size_t hash() const { return string.hash(); }
      88     4053467 :         size_t size() const { return string.size(); }
      89             : 
      90        7980 :         size_t utf16_size() const { return string.utf16_size(); }
      91             : 
      92     4053467 :         Utf8String::byte_iterator begin() const { return string.begin(); }
      93             :         Utf8String::byte_iterator end()   const { return string.end();   }
      94             : 
      95        7980 :         Utf8String::utf16_iterator utf16_begin() const { return string.utf16_begin(); }
      96             :         Utf8String::utf16_iterator utf16_end()   const { return string.utf16_end();   }
      97             : 
      98             :         /// used by set_occupied
      99             : 
     100     6055656 :         Utf8String get_string() const { return string; }
     101             : private:
     102             :         Utf8String string;
     103             : };
     104             : 
     105         165 : static InternTable<InternedUtf8String> intern_table;
     106             : 
     107             : // initial size of intern table
     108             : #define HASHTABLE_UTF_SIZE 16384
     109             : 
     110         163 : void Utf8String::initialize(void)
     111             : {
     112         163 :         TRACESUBSYSTEMINITIALIZATION("utf8_init");
     113             : 
     114         163 :         assert(!is_initialized());
     115             : 
     116         163 :         intern_table.initialize(HASHTABLE_UTF_SIZE);
     117             : 
     118             :         STATISTICS(count_utf_len += sizeof(utf*) * HASHTABLE_UTF_SIZE);
     119             : 
     120             :         // create utf-symbols for pointer comparison of frequently used strings
     121             : 
     122             : #define UTF8(NAME, STR) utf8::NAME = Utf8String::from_utf8(STR);
     123             : #include "vm/utf8.inc"
     124         163 : }
     125             : 
     126             : 
     127             : /* Utf8String::initialize ******************************************************
     128             : 
     129             :    Check if utf8 subsytem is initialized
     130             : 
     131             : *******************************************************************************/
     132             : 
     133         163 : bool Utf8String::is_initialized(void)
     134             : {
     135         163 :         return intern_table.is_initialized();
     136             : }
     137             : 
     138             : //****************************************************************************//
     139             : //*****          INTERNAL DATA REPRESENTATION                            *****//
     140             : //****************************************************************************//
     141             : 
     142             : /// allocate a Utf8String with given hash and size
     143             : /// You still have to fill in the strings text!
     144     1896741 : inline Utf8String::Data* Utf8String::alloc(size_t hash,
     145             :                                                    size_t utf8_size,
     146             :                                                    size_t utf16_size) {
     147     1896741 :         Data* str = (Data*) mem_alloc(offsetof(Data,text) + utf8_size + 1);
     148             : 
     149             :         STATISTICS(count_utf_new++);
     150             : 
     151     1896741 :         str->hash       = hash;
     152     1896741 :         str->utf8_size  = utf8_size;
     153     1896741 :         str->utf16_size = utf16_size;
     154             : 
     155     1896741 :         return str;
     156             : }
     157             : 
     158             : 
     159             : //****************************************************************************//
     160             : //*****          HASHING                                                 *****//
     161             : //****************************************************************************//
     162             : 
     163             : /* init/update/finish_hash *****************************************************
     164             : 
     165             :         These routines are used to compute the hash for a utf-8 string byte by byte.
     166             : 
     167             :         Use like this:
     168             :                 size_t hash = 0;
     169             : 
     170             :                 for each byte in string:
     171             :                         hash = update_hash( hash, byte );
     172             : 
     173             :                 hash = finish_hash(hash);
     174             : 
     175             :         The algorithm is the "One-at-a-time" algorithm as published
     176             :         by Bob Jenkins on http://burtleburtle.net/bob/hash/doobs.html.
     177             : 
     178             : *******************************************************************************/
     179             : 
     180   137364945 : static inline size_t update_hash(size_t hash, uint8_t byte)
     181             : {
     182   137364945 :         hash += byte;
     183   137364945 :         hash += (hash << 10);
     184   137364945 :         hash ^= (hash >> 6);
     185             : 
     186   137364945 :         return hash;
     187             : }
     188             : 
     189     5973095 : static inline size_t finish_hash(size_t hash)
     190             : {
     191     5973095 :         hash += (hash << 3);
     192     5973095 :         hash ^= (hash >> 11);
     193     5973095 :         hash += (hash << 15);
     194             : 
     195     5973095 :         return hash;
     196             : }
     197             : 
     198             : 
     199             : //****************************************************************************//
     200             : //*****          UTF-8 STRING                                            *****//
     201             : //****************************************************************************//
     202             : 
     203             : // create & intern string
     204             : 
     205             : // Builds a new utf8 string.
     206             : // Only allocates a new string if the string was not already intern_table.
     207             : template<typename Iterator>
     208             : struct FromUtf8Builder : utf8::VisitorBase<Utf8String, utf8::ABORT_ON_ERROR> {
     209     5948211 :         FromUtf8Builder(Iterator text, size_t utf8_size)
     210     5948211 :          : _hash(0), _utf8_size(utf8_size), _utf16_size(0), _text(text) {}
     211             : 
     212             :         /// interface to utf8::transform
     213             : 
     214             :         typedef Utf8String ReturnType;
     215             : 
     216   136741819 :         void utf8 (uint8_t  c) {
     217   136741819 :                 _hash = update_hash(_hash, c);
     218   136741819 :         }
     219             : 
     220   133202536 :         void utf16(uint16_t c) {
     221   133202536 :                 _utf16_size++;
     222   133202536 :         }
     223             : 
     224     5948211 :         Utf8String finish() {
     225     5948211 :                 _hash = finish_hash(_hash);
     226             : 
     227     5948211 :                 return intern_table.intern(*this).get_string();
     228             :         }
     229             : 
     230           0 :         Utf8String abort() {
     231           0 :                 return 0;
     232             :         }
     233             : 
     234             :         /// interface to HashTable
     235             : 
     236    24406524 :         size_t hash() const { return _hash; }
     237             : 
     238             :         /// interface to InternTableEntry
     239             : 
     240    12510102 :         utf8_tag tag() const { return utf8_tag(); }
     241             : 
     242    12510102 :         Iterator begin() const { return _text; }
     243             : 
     244    12510102 :         size_t size() const { return _utf8_size; }
     245             : 
     246     1894744 :         Utf8String get_string() const {
     247     1894744 :                 Utf8String::Data *u  = Utf8String::alloc(_hash, _utf8_size, _utf16_size);
     248     1894744 :                 char             *cs = u->text;
     249             : 
     250     1894744 :                 cs  = std::copy(_text, _text + _utf8_size, cs);
     251     1894744 :                 *cs = '\0';
     252             : 
     253     1894744 :                 return (utf*) u;
     254             :         }
     255             : private:
     256             :         size_t   _hash;
     257             :         size_t   _utf8_size;
     258             :         size_t   _utf16_size;
     259             :         Iterator _text;
     260             : };
     261             : 
     262             : 
     263             : // Builds a new utf8 string from an utf16 string.
     264             : // Only allocates a new string if the string was not already intern_table.
     265             : template<typename Iterator>
     266             : struct FromUtf16Builder : utf8::VisitorBase<Utf8String, utf8::ABORT_ON_ERROR> {
     267        9977 :         FromUtf16Builder(Iterator text, size_t utf16_size)
     268        9977 :          : _hash(0), _utf8_size(0), _utf16_size(utf16_size), _text(text) {}
     269             : 
     270             :         /// interface to utf8::transform
     271             : 
     272             :         typedef Utf8String ReturnType;
     273             : 
     274      498924 :         void utf8 (uint8_t  c) {
     275      498924 :                 _hash = update_hash(_hash, c);
     276      498924 :                 _utf8_size++;
     277      498924 :         }
     278             : 
     279        9977 :         Utf8String finish() {
     280        9977 :                 _hash = finish_hash(_hash);
     281             : 
     282        9977 :                 return intern_table.intern(*this).get_string();
     283             :         }
     284             : 
     285             :         Utf8String abort() {
     286             :                 return 0;
     287             :         }
     288             : 
     289             :         /// interface to HashTable
     290             : 
     291       55639 :         size_t hash() const { return _hash; }
     292             : 
     293             :         /// interface to InternTableEntry
     294             : 
     295       35685 :         utf16_tag tag() const { return utf16_tag(); }
     296             : 
     297       35685 :         Iterator begin() const { return _text; }
     298             : 
     299       35685 :         size_t size() const { return _utf16_size; }
     300             : 
     301        1997 :         Utf8String get_string() const {
     302        1997 :                 Utf8String::Data *u  = Utf8String::alloc(_hash, _utf8_size, _utf16_size);
     303        1997 :                 char             *cs = u->text;
     304             : 
     305        1997 :                 utf16::encode(_text, _text + _utf16_size, cs);
     306        1997 :                 cs[_utf8_size] = '\0';
     307             : 
     308        1997 :                 return (utf*) u;
     309             :         }
     310             : private:
     311             :         size_t   _hash;
     312             :         size_t   _utf8_size;
     313             :         size_t   _utf16_size;
     314             :         Iterator _text;
     315             : };
     316             : 
     317             : 
     318             : template<typename Iterator>
     319     5948211 : static inline Utf8String string_from_utf8(const char *cs, size_t size) {
     320     5948211 :         Iterator begin = cs;
     321     5948211 :         Iterator end   = cs + size;
     322             : 
     323     5948211 :         return utf8::transform(begin, end, FromUtf8Builder<Iterator>(begin, size));
     324             : }
     325             : 
     326             : template<typename Iterator>
     327        9977 : static inline Utf8String string_from_utf16(const uint16_t *cs, size_t size) {
     328        9977 :         Iterator begin = cs;
     329        9977 :         Iterator end   = cs + size;
     330             : 
     331        9977 :         return utf16::transform(begin, end, FromUtf16Builder<Iterator>(begin, size));
     332             : }
     333             : 
     334             : 
     335     5908640 : Utf8String Utf8String::from_utf8(const char *cs, size_t sz) {
     336     5908640 :         return string_from_utf8<const char*>(cs, sz);
     337             : }
     338             : 
     339         740 : Utf8String Utf8String::from_utf8_dot_to_slash(const char *cs, size_t sz) {
     340         740 :         return string_from_utf8<utf8::DotToSlash>(cs, sz);
     341             : }
     342             : 
     343       38831 : Utf8String Utf8String::from_utf8_slash_to_dot(const char *cs, size_t sz) {
     344       38831 :         return string_from_utf8<utf8::SlashToDot>(cs, sz);
     345             : }
     346             : 
     347           0 : Utf8String Utf8String::from_utf8_slash_to_dot(Utf8String u) {
     348           0 :         return string_from_utf8<utf8::SlashToDot>(u.begin(), u.size());
     349             : }
     350             : 
     351        5504 : Utf8String Utf8String::from_utf16(const uint16_t *cs, size_t sz) {
     352        5504 :         return string_from_utf16<const uint16_t*>(cs, sz);
     353             : }
     354             : 
     355        4473 : Utf8String Utf8String::from_utf16_dot_to_slash(const uint16_t *cs, size_t sz) {
     356        4473 :         return string_from_utf16<utf16::DotToSlash>(cs, sz);
     357             : }
     358             : 
     359             : /* Utf8String::utf16_iterator **************************************************
     360             : 
     361             :         A forward iterator over the utf16 codepoints in a Utf8String
     362             : 
     363             : *******************************************************************************/
     364             : 
     365      654039 : uint16_t Utf8String::utf16_iterator::operator*()
     366             : {
     367      654039 :         return utf8::decode_char(next);
     368             : }
     369             : 
     370             : 
     371             : /* Utf8String::substring *******************************************************
     372             : 
     373             :         Access last element, accessing a null or empty string leads to
     374             :         undefined behaviour
     375             : 
     376             : *******************************************************************************/
     377             : 
     378         569 : Utf8String Utf8String::substring(size_t from) const
     379             : {
     380         569 :         return substring(from, size());
     381             : }
     382             : 
     383         750 : Utf8String Utf8String::substring(size_t from, size_t to) const
     384             : {
     385             :         EXPENSIVE_ASSERT(_data);
     386             :         EXPENSIVE_ASSERT(from >  0);
     387             :         EXPENSIVE_ASSERT(from <= to);
     388             :         EXPENSIVE_ASSERT(to   <= size());
     389             : 
     390         750 :         return Utf8String::from_utf8(begin() + from, to - from);
     391             : }
     392             : 
     393     1918054 : bool Utf8String::is_valid_name() const {
     394     1918054 :         Utf8String::byte_iterator it  = this->begin();
     395     1918054 :         Utf8String::byte_iterator end = this->end();
     396             : 
     397    30307314 :         for (; it != end; it++) {
     398    28389260 :                 unsigned char c = *it;
     399             : 
     400    28389260 :                 if (c < 0x20)
     401           0 :                         return false; // disallow control characters
     402    28389260 :                 if (c == 0xc0 && ((unsigned char) it[1]) == 0x80)
     403           0 :                         return false; // disallow zero
     404             :         }
     405             : 
     406     1918054 :         return true;
     407             : }
     408             : 
     409             : //****************************************************************************//
     410             : //*****          PUBLIC UTF-8 FUNCTIONS                                  *****//
     411             : //****************************************************************************//
     412             : 
     413             : /* Utf8String::initialize ******************************************************
     414             : 
     415             :    Initializes the utf8 subsystem.
     416             : 
     417             : *******************************************************************************/
     418             : 
     419             : /* utf8::num_codepoints ********************************************************
     420             : 
     421             :         Count number of UTF-16 code points in UTF-8 string.
     422             : 
     423             :         Returns -1 on error
     424             : 
     425             : *******************************************************************************/
     426             : 
     427             : struct SafeCodePointCounter : utf8::VisitorBase<long, utf8::ABORT_ON_ERROR> {
     428             :         typedef long ReturnType;
     429             : 
     430       13699 :         SafeCodePointCounter() : count(0) {}
     431             : 
     432      340073 :         void utf16(uint16_t) { count++; }
     433             : 
     434       13699 :         long finish() { return count; }
     435           0 :         long abort()  { return -1;    }
     436             : private:
     437             :         long count;
     438             : };
     439             : 
     440       13699 : long utf8::num_codepoints(const char *cs, size_t sz) {
     441       13699 :         return utf8::transform(cs, cs + sz, SafeCodePointCounter());
     442             : }
     443             : 
     444             : /* utf8::num_bytes *************************************************************
     445             : 
     446             :         Calculate how many bytes a UTF-8 encoded version of a UTF-16 string
     447             :         would need.
     448             : 
     449             : *******************************************************************************/
     450             : 
     451             : struct ByteCounter : utf8::VisitorBase<size_t, utf8::IGNORE_ERRORS> {
     452             :         typedef size_t ReturnType;
     453             : 
     454           0 :         ByteCounter() : count(0) {}
     455             : 
     456           0 :         void utf8(uint8_t) { count++; }
     457             : 
     458           0 :         size_t finish() { return count; }
     459             : private:
     460             :         size_t count;
     461             : };
     462             : 
     463           0 : size_t utf8::num_bytes(const uint16_t *cs, size_t sz)
     464             : {
     465           0 :         return utf16::transform(cs, cs + sz, ByteCounter());
     466             : }
     467             : 
     468             : 
     469             : /***
     470             :  * Compute the hash of a UTF-16 string.
     471             :  * The hash will be the same as for the UTF-8 encoded version of this string
     472             :  */
     473             : struct Utf16Hasher : utf16::VisitorBase<size_t> {
     474             :         typedef size_t ReturnType;
     475             : 
     476       14907 :         Utf16Hasher() : hash(0) {}
     477             : 
     478      124202 :         void utf8(uint8_t c) {
     479      124202 :                 hash = update_hash(hash, c);
     480      124202 :         }
     481             : 
     482       14907 :         size_t finish() { return finish_hash(hash); }
     483             : private:
     484             :         size_t hash;
     485             : };
     486             : 
     487       14907 : size_t utf8::compute_hash(const uint16_t *cs, size_t sz) {
     488       14907 :         return utf16::transform(cs, cs + sz, Utf16Hasher());
     489             : }
     490             : 
     491             : 
     492             : //****************************************************************************//
     493             : //*****          GLOBAL UTF8-STRING CONSTANTS                            *****//
     494             : //****************************************************************************//
     495             : 
     496             : #define UTF8( NAME, STR ) Utf8String utf8::NAME;
     497             : #include "vm/utf8.inc"
     498             : 
     499             : ////////////////////////////////////////////////////////////////////////////////
     500             : ////////////////////////////////////////////////////////////////////////////////
     501             : // LEGACY C API
     502             : ////////////////////////////////////////////////////////////////////////////////
     503             : ////////////////////////////////////////////////////////////////////////////////
     504             : 
     505           0 : extern const char *utf8_text(utf *u) { return Utf8String(u).begin(); }
     506           0 : extern const char *utf8_end (utf *u) { return Utf8String(u).end();   }
     507             : 
     508           0 : extern size_t utf8_size(utf *u) { return Utf8String(u).size(); }
     509           0 : extern size_t utf8_hash(utf *u) { return Utf8String(u).hash(); }
     510             : 
     511             : /* utf_display_printable_ascii *************************************************
     512             : 
     513             :    Write utf symbol to stdout (for debugging purposes).
     514             :    Non-printable and non-ASCII characters are printed as '?'.
     515             : 
     516             : *******************************************************************************/
     517             : 
     518             : struct DisplayPrintableAscii : utf8::VisitorBase<void, utf8::IGNORE_ERRORS> {
     519             :         typedef void ReturnType;
     520             : 
     521           0 :         DisplayPrintableAscii(FILE *dst) : _dst(dst) {}
     522             : 
     523           0 :         void utf8(uint8_t c) {
     524           0 :                 fputc((c >= 32 && c <= 127) ? c : '?', _dst);
     525           0 :         }
     526             : 
     527           0 :         void finish() {fflush(_dst);}
     528             : private:
     529             :         FILE *_dst;
     530             : };
     531             : 
     532           0 : void utf_display_printable_ascii(Utf8String u)
     533             : {
     534           0 :         if (u == NULL) {
     535           0 :                 printf("NULL");
     536           0 :                 fflush(stdout);
     537           0 :                 return;
     538             :         }
     539             : 
     540           0 :         utf8::transform(u, DisplayPrintableAscii(stdout));
     541             : }
     542             : 
     543             : 
     544             : /* utf_display_printable_ascii_classname ***************************************
     545             : 
     546             :    Write utf symbol to stdout with `/' converted to `.' (for debugging
     547             :    purposes).
     548             :    Non-printable and non-ASCII characters are printed as '?'.
     549             : 
     550             : *******************************************************************************/
     551             : 
     552           0 : void utf_display_printable_ascii_classname(Utf8String u)
     553             : {
     554           0 :         if (u == NULL) {
     555           0 :                 printf("NULL");
     556           0 :                 fflush(stdout);
     557           0 :                 return;
     558             :         }
     559             : 
     560           0 :         utf8::transform(utf8::slash_to_dot(u), DisplayPrintableAscii(stdout));
     561             : }
     562             : 
     563             : 
     564             : /* utf_sprint_convert_to_latin1 ************************************************
     565             : 
     566             :    Write utf symbol into c-string (for debugging purposes).
     567             :    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
     568             :    invalid results.
     569             : 
     570             : *******************************************************************************/
     571             : 
     572             : struct SprintConvertToLatin1 : utf8::VisitorBase<void, utf8::IGNORE_ERRORS> {
     573             :         typedef void ReturnType;
     574             : 
     575           0 :         SprintConvertToLatin1(char* dst) : _dst(dst) {}
     576             : 
     577           0 :         void utf16(uint16_t c) { *_dst++ = c; }
     578             : 
     579           0 :         void finish() { *_dst = '\0'; }
     580             : private:
     581             :         char *_dst;
     582             : };
     583             : 
     584           0 : void utf_sprint_convert_to_latin1(char *buffer, Utf8String u)
     585             : {
     586           0 :         if (!u) {
     587           0 :                 strcpy(buffer, "NULL");
     588           0 :                 return;
     589             :         }
     590             : 
     591           0 :         utf8::transform(u, SprintConvertToLatin1(buffer));
     592             : }
     593             : 
     594             : 
     595             : /* utf_sprint_convert_to_latin1_classname **************************************
     596             : 
     597             :    Write utf symbol into c-string with `/' converted to `.' (for debugging
     598             :    purposes).
     599             :    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
     600             :    invalid results.
     601             : 
     602             : *******************************************************************************/
     603             : 
     604           0 : void utf_sprint_convert_to_latin1_classname(char *buffer, Utf8String u)
     605             : {
     606           0 :         if (!u) {
     607           0 :                 strcpy(buffer, "NULL");
     608           0 :                 return;
     609             :         }
     610             : 
     611           0 :         utf8::transform(utf8::slash_to_dot(u), SprintConvertToLatin1(buffer));
     612             : }
     613             : 
     614             : 
     615             : /* utf_strcat_convert_to_latin1 ************************************************
     616             : 
     617             :    Like libc strcat, but uses an utf8 string.
     618             :    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
     619             :    invalid results.
     620             : 
     621             : *******************************************************************************/
     622             : 
     623           0 : void utf_strcat_convert_to_latin1(char *buffer, utf *u)
     624             : {
     625           0 :         utf_sprint_convert_to_latin1(buffer + strlen(buffer), u);
     626           0 : }
     627             : 
     628             : 
     629             : /* utf_strcat_convert_to_latin1_classname **************************************
     630             : 
     631             :    Like libc strcat, but uses an utf8 string.
     632             :    Characters are converted to 8-bit Latin-1, non-Latin-1 characters yield
     633             :    invalid results.
     634             : 
     635             : *******************************************************************************/
     636             : 
     637           0 : void utf_strcat_convert_to_latin1_classname(char *buffer, Utf8String u)
     638             : {
     639           0 :         utf_sprint_convert_to_latin1_classname(buffer + strlen(buffer), u);
     640           0 : }
     641             : 
     642             : 
     643             : /* utf_fprint_printable_ascii **************************************************
     644             : 
     645             :    Write utf symbol into file.
     646             :    Non-printable and non-ASCII characters are printed as '?'.
     647             : 
     648             : *******************************************************************************/
     649             : 
     650           0 : void utf_fprint_printable_ascii(FILE *file, Utf8String u)
     651             : {
     652           0 :         if (!u) return;
     653             : 
     654           0 :         utf8::transform(u, DisplayPrintableAscii(file));
     655             : }
     656             : 
     657             : 
     658             : /* utf_fprint_printable_ascii_classname ****************************************
     659             : 
     660             :    Write utf symbol into file with `/' converted to `.'.
     661             :    Non-printable and non-ASCII characters are printed as '?'.
     662             : 
     663             : *******************************************************************************/
     664             : 
     665           0 : void utf_fprint_printable_ascii_classname(FILE *file, Utf8String u)
     666             : {
     667           0 :         if (!u) return;
     668             : 
     669           0 :         utf8::transform(utf8::slash_to_dot(u), DisplayPrintableAscii(file));
     670             : }
     671             : 
     672             : const size_t Utf8String::sizeof_utf = sizeof(Utf8String::Data);
     673             : 
     674             : namespace cacao {
     675             : 
     676             : // OStream operators
     677           0 : OStream& operator<<(OStream& os, const Utf8String &u) {
     678           0 :   return os << (u ? u.begin() : "(nil)");
     679             : }
     680             : 
     681         495 : } // end namespace cacao
     682             : 
     683             : /*
     684             :  * These are local overrides for various environment variables in Emacs.
     685             :  * Please do not remove this and leave it at the end of the file, where
     686             :  * Emacs will automagically detect them.
     687             :  * ---------------------------------------------------------------------
     688             :  * Local variables:
     689             :  * mode: c++
     690             :  * indent-tabs-mode: t
     691             :  * c-basic-offset: 4
     692             :  * tab-width: 4
     693             :  * End:
     694             :  * vim:noexpandtab:sw=4:ts=4:
     695             :  */

Generated by: LCOV version 1.11