LCOV - code coverage report
Current view: top level - vm - string.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 108 151 71.5 %
Date: 2017-07-14 10:03:36 Functions: 42 53 79.2 %

          Line data    Source code
       1             : /* src/vm/string.cpp - java.lang.String related functions
       2             : 
       3             :    Copyright (C) 1996-2013
       4             :    CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
       5             : 
       6             :    This file is part of CACAO.
       7             : 
       8             :    This program is free software; you can redistribute it and/or
       9             :    modify it under the terms of the GNU General Public License as
      10             :    published by the Free Software Foundation; either version 2, or (at
      11             :    your option) any later version.
      12             : 
      13             :    This program is distributed in the hope that it will be useful, but
      14             :    WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :    General Public License for more details.
      17             : 
      18             :    You should have received a copy of the GNU General Public License
      19             :    along with this program; if not, write to the Free Software
      20             :    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
      21             :    02110-1301, USA.
      22             : 
      23             : */
      24             : 
      25             : #include "vm/string.hpp"
      26             : 
      27             : #include <cassert>
      28             : 
      29             : #include "vm/array.hpp"
      30             : #include "vm/exceptions.hpp"
      31             : #include "vm/globals.hpp"
      32             : #include "vm/javaobjects.hpp"
      33             : #include "vm/options.hpp"
      34             : #include "vm/statistics.hpp"
      35             : 
      36             : #include "toolbox/intern_table.hpp"
      37             : #include "toolbox/logging.hpp"
      38             : #include "toolbox/OStream.hpp"
      39             : #include "toolbox/utf_utils.hpp"
      40             : 
      41             : using namespace cacao;
      42             : 
      43             : STAT_DECLARE_VAR(int,size_string,0)
      44             : 
      45             : //****************************************************************************//
      46             : //*****          GLOBAL JAVA/LANG/STRING INTERN TABLE                    *****//
      47             : //****************************************************************************//
      48             : 
      49           0 : struct InternedJavaString {
      50             :         /// Interface to HashTable
      51             : 
      52      667648 :         InternedJavaString() : _hash(0), _str(0) {}
      53             : 
      54       89163 :         size_t hash() const { return _hash; }
      55       52092 :         size_t size() const { return _str.size(); }
      56             : 
      57      217561 :         bool is_empty()    const { return _str == (java_object_t*) 0; }
      58           0 :         bool is_occupied() const { return _str != (java_object_t*) 0; }
      59       89163 :         bool is_deleted()  const { return false; }
      60             : 
      61             :         template<typename T>
      62       64199 :         void set_occupied(const T& t) {
      63       64199 :                 _hash = t.hash();
      64       64199 :                 _str  = t.get_string();
      65       64199 :         }
      66             : 
      67             :         template<typename T>
      68       89163 :         bool operator==(const T& t) const {
      69             :                 return hash() == t.hash()
      70             :                     && size() == t.size()
      71       89163 :                     && std::equal(begin(), end(), t.begin());
      72             :         }
      73             : 
      74       52092 :         const uint16_t *begin() const { return _str.begin(); }
      75       52092 :         const uint16_t *end()   const { return _str.end();   }
      76             : 
      77             :         /// used by set_occupied
      78             : 
      79      116291 :         JavaString get_string() const { return _str; }
      80             : private:
      81             :         size_t     _hash;
      82             :         JavaString _str;
      83             : };
      84             : 
      85             : 
      86         165 : static InternTable<InternedJavaString> intern_table;
      87             : 
      88             : //****************************************************************************//
      89             : //*****          JAVA STRING SUBSYSTEM INITIALIZATION                    *****//
      90             : //****************************************************************************//
      91             : 
      92             : /***
      93             :  * Initialize string subsystem
      94             :  */
      95         163 : void JavaString::initialize() {
      96         163 :         TRACESUBSYSTEMINITIALIZATION("string_init");
      97             : 
      98         163 :         assert(!is_initialized());
      99             : 
     100         163 :         intern_table.initialize(4096);
     101         163 : }
     102             : 
     103             : /***
     104             :  * Check is string subsystem is initialized
     105             :  */
     106         163 : bool JavaString::is_initialized() {
     107         163 :         return intern_table.is_initialized();
     108             : }
     109             : 
     110             : //****************************************************************************//
     111             : //*****          JAVA STRING CONSTRUCTORS                                *****//
     112             : //****************************************************************************//
     113             : 
     114             : /***
     115             :  *      Allocate a new java/lang/String object, fill it with string content
     116             :  *      and set its fields.
     117             :  *
     118             :  *      If input chars is NULL, a NullPointerException is raised.
     119             :  *
     120             :  *      @param src       iterator range that contain the text for the new string
     121             :  *      @param end       end of input
     122             :  *      @param dst_size  number of UTF-16 chars new string will contain.
     123             :  *
     124             :  *      @tparam Iterator A STL style iterator over utf8 chars.
     125             :  */
     126             : template<typename Iterator>
     127       20933 : static inline java_handle_t* makeJavaString(Iterator src, Iterator end, size_t dst_size) {
     128       20933 :         if (src == NULL) {
     129           0 :                 exceptions_throw_nullpointerexception();
     130           0 :                 return NULL;
     131             :         }
     132             : 
     133             :         // allocate new java/lang/String
     134       20933 :         java_handle_t *h = builtin_new(class_java_lang_String);
     135       20933 :         if (h == NULL) return NULL;
     136             : 
     137             :         // allocate char[] for strings text
     138       20933 :         CharArray ca(dst_size);
     139       20933 :         if (ca.is_null()) return NULL;
     140             : 
     141       20933 :         java_lang_String::set_fields(h, ca.get_handle());
     142             : 
     143             :         // copy text into char[]
     144             : 
     145       20933 :         if (!utf8::decode(src, end, ca.get_raw_data_ptr()))
     146           0 :                 return NULL;
     147             : 
     148       20933 :         return h;
     149             : }
     150             : 
     151             : 
     152       64199 : static inline JavaString allocate_on_system_heap(size_t size) {
     153             :         // allocate string
     154       64199 :         java_handle_t *h = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
     155       64199 :         if (h == NULL) return NULL;
     156             : 
     157             :         // set string VTABLE and lockword
     158       64199 :         Lockword(h->lockword).init();
     159       64199 :         h->vftbl = class_java_lang_String->vftbl;
     160             : 
     161             :         // allocate array
     162       64199 :         java_chararray_t *a = (java_chararray_t*) MNEW(uint8_t, sizeof(java_chararray_t) + sizeof(u2) * size);
     163             : 
     164             :         // set array VTABLE, lockword and length
     165       64199 :         a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
     166       64199 :         Lockword(a->header.objheader.lockword).init();
     167       64199 :         a->header.size            = size;
     168             : 
     169       64199 :         java_lang_String::set_fields(h, (java_handle_chararray_t*) a);
     170             : 
     171             :         STATISTICS(size_string += sizeof(class_java_lang_String->instancesize));
     172             : 
     173       64199 :         return h;
     174             : }
     175             : 
     176             : 
     177             : /* JavaString::from_utf8 *******************************************************
     178             : 
     179             :         Create a new java/lang/String filled with text decoded from an UTF-8 string.
     180             :         Returns NULL on error.
     181             : 
     182             : *******************************************************************************/
     183             : 
     184        1696 : JavaString JavaString::from_utf8(Utf8String u) {
     185        1696 :         return makeJavaString(u.begin(), u.end(), u.utf16_size());
     186             : }
     187             : 
     188       13699 : JavaString JavaString::from_utf8(const char *cs, size_t sz) {
     189       13699 :         return makeJavaString(cs, cs + sz, utf8::num_codepoints(cs, sz));
     190             : }
     191             : 
     192             : /* JavaString::from_utf8_slash_to_dot ******************************************
     193             : 
     194             :         Create a new java/lang/String filled with text decoded from an UTF-8 string.
     195             :         Replaces '/' with '.'.
     196             : 
     197             :         NOTE:
     198             :                 If the input is not valid UTF-8 the process aborts!
     199             : 
     200             : *******************************************************************************/
     201             : 
     202        5538 : JavaString JavaString::from_utf8_slash_to_dot(Utf8String u) {
     203        5538 :         return makeJavaString<utf8::SlashToDot>(u.begin(), u.end(), u.utf16_size());
     204             : }
     205             : 
     206             : /* JavaString::from_utf8_dot_to_slash ******************************************
     207             : 
     208             :         Create a new java/lang/String filled with text decoded from an UTF-8 string.
     209             :         Replaces '.' with '/'.
     210             : 
     211             :         NOTE:
     212             :                 If the input is not valid UTF-8 the process aborts!
     213             : 
     214             : *******************************************************************************/
     215             : 
     216           0 : JavaString JavaString::from_utf8_dot_to_slash(Utf8String u) {
     217           0 :         return makeJavaString<utf8::DotToSlash>(u.begin(), u.end(), u.utf16_size());
     218             : }
     219             : 
     220             : /* JavaString::literal *********************************************************
     221             : 
     222             :         Create and intern a java/lang/String filled with text decoded from an UTF-8
     223             :         string.
     224             : 
     225             :         NOTE:
     226             :                 because the intern table is allocated on the system heap the GC
     227             :                 can't see it and thus interned strings must also be allocated on
     228             :                 the system heap.
     229             : 
     230             : *******************************************************************************/
     231             : 
     232             : /// Used to lazily construct a java.lang.String literal
     233             : struct LiteralBuilder {
     234      101384 :         LiteralBuilder(Utf8String u) : _hash(u.hash()), _string(u) {}
     235             : 
     236      329167 :         size_t hash() const { return _hash; }
     237      101384 :         size_t size() const { return _string.utf16_size(); }
     238             : 
     239       37662 :         Utf8String::utf16_iterator begin() const { return _string.utf16_begin(); }
     240             :         Utf8String::utf16_iterator end()   const { return _string.utf16_end();   }
     241             : 
     242       63722 :         JavaString get_string() const {
     243       63722 :                 JavaString jstr = allocate_on_system_heap(size());
     244       63722 :                 assert(jstr);
     245             : 
     246       63722 :                 bool b = utf8::decode(_string.begin(), _string.end(), const_cast<uint16_t*>(jstr.begin()));
     247             :                 (void) b;
     248       63722 :                 assert(b);
     249             : 
     250       63722 :                 return jstr;
     251             :         }
     252             : private:
     253             :         const size_t     _hash;
     254             :         const Utf8String _string;
     255             : };
     256             : 
     257      101384 : JavaString JavaString::literal(Utf8String u) {
     258      101384 :         return intern_table.intern(LiteralBuilder(u)).get_string();
     259             : }
     260             : 
     261             : 
     262             : /* JavaString:from_utf16 *******************************************************
     263             : 
     264             :         Create a new java/lang/String filled with text copied from an UTF-16 string.
     265             :         Returns NULL on error.
     266             : 
     267             : *******************************************************************************/
     268             : 
     269           0 : JavaString JavaString::from_utf16(const uint16_t *cs, size_t sz) {
     270           0 :         return makeJavaString(cs, cs + sz, sz);
     271             : }
     272             : 
     273             : /* JavaString:from_utf16 *******************************************************
     274             : 
     275             :         Creates a new java/lang/String with a given char[]
     276             : 
     277             :         WARNING: the char[] is not copied or validated,
     278             :                  you must make sure it is never changed.
     279             : 
     280             : *******************************************************************************/
     281             : 
     282             : #ifdef WITH_JAVA_RUNTIME_LIBRARY_GNU_CLASSPATH
     283             : 
     284         499 : JavaString JavaString::from_array(java_handle_t *array, int32_t count, int32_t offset) {
     285         499 :         java_handle_t *str = builtin_new(class_java_lang_String);
     286         499 :         if (!str)
     287           0 :                 return NULL;
     288             : 
     289         499 :         java_lang_String jstr(str);
     290             : 
     291         499 :         jstr.set_value((java_handle_chararray_t*) array);
     292         499 :         jstr.set_count (count);
     293         499 :         jstr.set_offset(offset);
     294             : 
     295         499 :         return str;
     296             : }
     297             : 
     298             : #endif
     299             : 
     300             : /* JavaString::intern **********************************************************
     301             : 
     302             :         intern string in global intern table
     303             : 
     304             :         NOTE:
     305             :                 because the intern table is allocated on the system heap the GC
     306             :                 can't see it and thus interned strings must also be allocated on
     307             :                 the system heap.
     308             : 
     309             : *******************************************************************************/
     310             : 
     311             : /// Used to lazily copy a java.lang.String into the intern table
     312             : struct LazyStringCopy {
     313       14907 :         LazyStringCopy(JavaString str)
     314             :          : _hash(utf8::compute_hash(str.begin(), str.size())),
     315       14907 :            _string(str) {}
     316             : 
     317       56777 :         size_t hash() const { return _hash; }
     318       14907 :         size_t size() const { return _string.size(); }
     319             : 
     320       14907 :         const uint16_t *begin() const { return _string.begin(); }
     321         477 :         const uint16_t *end()   const { return _string.end(); }
     322             : 
     323         477 :         JavaString get_string() const {
     324         477 :                 JavaString jstr = allocate_on_system_heap(size());
     325             :                 EXPENSIVE_ASSERT(jstr);
     326             : 
     327         477 :                 std::copy(begin(), end(), const_cast<uint16_t*>(jstr.begin()));
     328             : 
     329         477 :                 return jstr;
     330             :         }
     331             : private:
     332             :         const size_t     _hash;
     333             :         const JavaString _string;
     334             : };
     335             : 
     336       14907 : JavaString JavaString::intern() const {
     337       14907 :         return intern_table.intern(LazyStringCopy(*this)).get_string();
     338             : }
     339             : 
     340             : //****************************************************************************//
     341             : //*****          JAVA STRING ACCESSORS                                   *****//
     342             : //****************************************************************************//
     343             : 
     344             : /* JavaString::begin ***********************************************************
     345             : 
     346             :         Get the utf-16 contents of string
     347             : 
     348             : *******************************************************************************/
     349             : 
     350      208693 : const uint16_t* JavaString::begin() const {
     351      208693 :         assert(str);
     352             : 
     353      208693 :         java_handle_chararray_t *array = java_lang_String::get_value(str);
     354             : 
     355      208693 :         if (array == NULL) {
     356             :                 // this can only happen if the string has been allocated by java code
     357             :                 // and <init> has not been called on it yet
     358           0 :                 return NULL;
     359             :         }
     360             : 
     361      208693 :         CharArray ca(array);
     362             : 
     363      208693 :         int32_t   offset = runtime_str_ops::get_string_offset(str);
     364      208693 :         uint16_t* ptr    = ca.get_raw_data_ptr();
     365             : 
     366      208693 :         return ptr + offset;
     367             : }
     368             : 
     369       52569 : const uint16_t* JavaString::end() const {
     370       52569 :         const uint16_t *ptr = begin();
     371             : 
     372       52569 :         return ptr ? ptr + size() : NULL;
     373             : }
     374             : 
     375             : 
     376             : /* JavaString::size ************************************************************
     377             : 
     378             :         Get the number of utf-16 characters in string
     379             : 
     380             : *******************************************************************************/
     381             : 
     382      144494 : size_t JavaString::size() const {
     383      144494 :         assert(str);
     384             : 
     385      144494 :         return runtime_str_ops::get_string_count(str);
     386             : }
     387             : 
     388             : /* JavaString::utf8_size *******************************************************
     389             : 
     390             :         Get the number of bytes this string would need in utf-8 encoding
     391             : 
     392             : *******************************************************************************/
     393             : 
     394           0 : size_t JavaString::utf8_size() const {
     395           0 :         assert(str);
     396             : 
     397           0 :         return utf8::num_bytes(begin(), size());
     398             : }
     399             : 
     400             : //****************************************************************************//
     401             : //*****          JAVA STRING CONVERSIONS                                 *****//
     402             : //****************************************************************************//
     403             : 
     404             : /* JavaString::to_chars ********************************************************
     405             : 
     406             :         Decodes java/lang/String into newly allocated string (debugging)
     407             : 
     408             :         NOTE:
     409             :                 You must free the string allocated yourself with MFREE
     410             : 
     411             : *******************************************************************************/
     412             : 
     413           0 : char *JavaString::to_chars() const {
     414           0 :         if (str == NULL) return MNEW(char, 1); // memory is zero initialized
     415             : 
     416           0 :         size_t sz = size();
     417             : 
     418           0 :         const uint16_t *src = begin();
     419           0 :         const uint16_t *end = src + sz;
     420             : 
     421           0 :         char *buf = MNEW(char, sz + 1);
     422           0 :         char *dst = buf;
     423             : 
     424           0 :         while (src != end) *dst++ = *src++;
     425             : 
     426           0 :         *dst = '\0';
     427             : 
     428           0 :         return buf;
     429             : }
     430             : 
     431             : /* JavaString::to_utf8() *******************************************************
     432             : 
     433             :         make utf symbol from java.lang.String
     434             : 
     435             : *******************************************************************************/
     436             : 
     437        5504 : Utf8String JavaString::to_utf8() const {
     438        5504 :         if (str == NULL) return utf8::empty;
     439             : 
     440        5504 :         return Utf8String::from_utf16(begin(), size());
     441             : }
     442             : 
     443             : /* JavaString::to_utf8_dot_to_slash() ******************************************
     444             : 
     445             :         make utf symbol from java.lang.String
     446             :         replace '/' with '.'
     447             : 
     448             : *******************************************************************************/
     449             : 
     450        4474 : Utf8String JavaString::to_utf8_dot_to_slash() const {
     451        4474 :         if (str == NULL) return utf8::empty;
     452             : 
     453        4474 :         return Utf8String::from_utf16_dot_to_slash(begin(), size());
     454             : }
     455             : 
     456             : //****************************************************************************//
     457             : //*****          JAVA STRING IO                                          *****//
     458             : //****************************************************************************//
     459             : 
     460             : /* JavaString::fprint **********************************************************
     461             : 
     462             :    Print the given Java string to the given stream.
     463             : 
     464             : *******************************************************************************/
     465             : 
     466          41 : void JavaString::fprint(FILE *stream) const
     467             : {
     468          41 :         const uint16_t* cs = begin();
     469          41 :         size_t          sz = size();
     470             : 
     471         205 :         for (size_t i = 0; i < sz; i++) {
     472         164 :                 char c = cs[i];
     473             : 
     474         164 :                 fputc(c, stream);
     475             :         }
     476          41 : }
     477             : 
     478           0 : void JavaString::fprint_printable_ascii(FILE *stream) const
     479             : {
     480           0 :         const uint16_t* cs = begin();
     481           0 :         size_t          sz = size();
     482             : 
     483           0 :         for (size_t i = 0; i < sz; i++) {
     484           0 :                 char c = cs[i];
     485             : 
     486           0 :                 c = (c >= 32 && (unsigned char)c <= 127) ? c : '?';
     487             : 
     488           0 :                 fputc(c, stream);
     489             :         }
     490           0 : }
     491             : 
     492           0 : OStream& operator<<(OStream& os, JavaString js) {
     493           0 :         if (!js)
     494           0 :                 return os << "<null string>";
     495             : 
     496           0 :         const u2 *cs = js.begin();
     497             : 
     498           0 :         if (cs == NULL) {
     499             :                 // string has been allocated by java code
     500             :                 // but <init> has not been called on it yet.
     501           0 :                 return os << "<uninitialized string>";
     502             :         } else {
     503           0 :                 os << '"';
     504             : 
     505           0 :                 for (const u2 *end = js.end(); cs != end; ++cs) {
     506           0 :                         os << ((char) *cs);
     507             :                 }
     508             : 
     509           0 :                 os << '"';
     510             : 
     511           0 :                 return os;
     512             :         }
     513         495 : }
     514             : 
     515             : 
     516             : /*
     517             :  * These are local overrides for various environment variables in Emacs.
     518             :  * Please do not remove this and leave it at the end of the file, where
     519             :  * Emacs will automagically detect them.
     520             :  * ---------------------------------------------------------------------
     521             :  * Local variables:
     522             :  * mode: c++
     523             :  * indent-tabs-mode: t
     524             :  * c-basic-offset: 4
     525             :  * tab-width: 4
     526             :  * End:
     527             :  * vim:noexpandtab:sw=4:ts=4:
     528             :  */

Generated by: LCOV version 1.11