CACAO
string.cpp
Go to the documentation of this file.
1 /* src/vm/string.cpp - java.lang.String related functions
2 
3  Copyright (C) 1996-2013
4  CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 
6  This file is part of CACAO.
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2, or (at
11  your option) any later version.
12 
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21  02110-1301, USA.
22 
23 */
24 
25 #include "vm/string.hpp"
26 
27 #include <cassert>
28 
29 #include "vm/array.hpp"
30 #include "vm/exceptions.hpp"
31 #include "vm/globals.hpp"
32 #include "vm/javaobjects.hpp"
33 #include "vm/options.hpp"
34 #include "vm/statistics.hpp"
35 
36 #include "toolbox/intern_table.hpp"
37 #include "toolbox/logging.hpp"
38 #include "toolbox/OStream.hpp"
39 #include "toolbox/utf_utils.hpp"
40 
41 using namespace cacao;
42 
43 STAT_DECLARE_VAR(int,size_string,0)
44 
45 //****************************************************************************//
46 //***** GLOBAL JAVA/LANG/STRING INTERN TABLE *****//
47 //****************************************************************************//
48 
50  /// Interface to HashTable
51 
52  InternedJavaString() : _hash(0), _str(0) {}
53 
54  size_t hash() const { return _hash; }
55  size_t size() const { return _str.size(); }
56 
57  bool is_empty() const { return _str == (java_object_t*) 0; }
58  bool is_occupied() const { return _str != (java_object_t*) 0; }
59  bool is_deleted() const { return false; }
60 
61  template<typename T>
62  void set_occupied(const T& t) {
63  _hash = t.hash();
64  _str = t.get_string();
65  }
66 
67  template<typename T>
68  bool operator==(const T& t) const {
69  return hash() == t.hash()
70  && size() == t.size()
71  && std::equal(begin(), end(), t.begin());
72  }
73 
74  const uint16_t *begin() const { return _str.begin(); }
75  const uint16_t *end() const { return _str.end(); }
76 
77  /// used by set_occupied
78 
79  JavaString get_string() const { return _str; }
80 private:
81  size_t _hash;
83 };
84 
85 
87 
88 //****************************************************************************//
89 //***** JAVA STRING SUBSYSTEM INITIALIZATION *****//
90 //****************************************************************************//
91 
92 /***
93  * Initialize string subsystem
94  */
96  TRACESUBSYSTEMINITIALIZATION("string_init");
97 
98  assert(!is_initialized());
99 
100  intern_table.initialize(4096);
101 }
102 
103 /***
104  * Check is string subsystem is initialized
105  */
107  return intern_table.is_initialized();
108 }
109 
110 //****************************************************************************//
111 //***** JAVA STRING CONSTRUCTORS *****//
112 //****************************************************************************//
113 
114 /***
115  * Allocate a new java/lang/String object, fill it with string content
116  * and set its fields.
117  *
118  * If input chars is NULL, a NullPointerException is raised.
119  *
120  * @param src iterator range that contain the text for the new string
121  * @param end end of input
122  * @param dst_size number of UTF-16 chars new string will contain.
123  *
124  * @tparam Iterator A STL style iterator over utf8 chars.
125  */
126 template<typename Iterator>
127 static inline java_handle_t* makeJavaString(Iterator src, Iterator end, size_t dst_size) {
128  if (src == NULL) {
130  return NULL;
131  }
132 
133  // allocate new java/lang/String
135  if (h == NULL) return NULL;
136 
137  // allocate char[] for strings text
138  CharArray ca(dst_size);
139  if (ca.is_null()) return NULL;
140 
142 
143  // copy text into char[]
144 
145  if (!utf8::decode(src, end, ca.get_raw_data_ptr()))
146  return NULL;
147 
148  return h;
149 }
150 
151 
152 static inline JavaString allocate_on_system_heap(size_t size) {
153  // allocate string
155  if (h == NULL) return NULL;
156 
157  // set string VTABLE and lockword
158  Lockword(h->lockword).init();
160 
161  // allocate array
162  java_chararray_t *a = (java_chararray_t*) MNEW(uint8_t, sizeof(java_chararray_t) + sizeof(u2) * size);
163 
164  // set array VTABLE, lockword and length
167  a->header.size = size;
168 
170 
171  STATISTICS(size_string += sizeof(class_java_lang_String->instancesize));
172 
173  return h;
174 }
175 
176 
177 /* JavaString::from_utf8 *******************************************************
178 
179  Create a new java/lang/String filled with text decoded from an UTF-8 string.
180  Returns NULL on error.
181 
182 *******************************************************************************/
183 
185  return makeJavaString(u.begin(), u.end(), u.utf16_size());
186 }
187 
188 JavaString JavaString::from_utf8(const char *cs, size_t sz) {
189  return makeJavaString(cs, cs + sz, utf8::num_codepoints(cs, sz));
190 }
191 
192 /* JavaString::from_utf8_slash_to_dot ******************************************
193 
194  Create a new java/lang/String filled with text decoded from an UTF-8 string.
195  Replaces '/' with '.'.
196 
197  NOTE:
198  If the input is not valid UTF-8 the process aborts!
199 
200 *******************************************************************************/
201 
203  return makeJavaString<utf8::SlashToDot>(u.begin(), u.end(), u.utf16_size());
204 }
205 
206 /* JavaString::from_utf8_dot_to_slash ******************************************
207 
208  Create a new java/lang/String filled with text decoded from an UTF-8 string.
209  Replaces '.' with '/'.
210 
211  NOTE:
212  If the input is not valid UTF-8 the process aborts!
213 
214 *******************************************************************************/
215 
217  return makeJavaString<utf8::DotToSlash>(u.begin(), u.end(), u.utf16_size());
218 }
219 
220 /* JavaString::literal *********************************************************
221 
222  Create and intern a java/lang/String filled with text decoded from an UTF-8
223  string.
224 
225  NOTE:
226  because the intern table is allocated on the system heap the GC
227  can't see it and thus interned strings must also be allocated on
228  the system heap.
229 
230 *******************************************************************************/
231 
232 /// Used to lazily construct a java.lang.String literal
234  LiteralBuilder(Utf8String u) : _hash(u.hash()), _string(u) {}
235 
236  size_t hash() const { return _hash; }
237  size_t size() const { return _string.utf16_size(); }
238 
239  Utf8String::utf16_iterator begin() const { return _string.utf16_begin(); }
240  Utf8String::utf16_iterator end() const { return _string.utf16_end(); }
241 
244  assert(jstr);
245 
246  bool b = utf8::decode(_string.begin(), _string.end(), const_cast<uint16_t*>(jstr.begin()));
247  (void) b;
248  assert(b);
249 
250  return jstr;
251  }
252 private:
253  const size_t _hash;
255 };
256 
258  return intern_table.intern(LiteralBuilder(u)).get_string();
259 }
260 
261 
262 /* JavaString:from_utf16 *******************************************************
263 
264  Create a new java/lang/String filled with text copied from an UTF-16 string.
265  Returns NULL on error.
266 
267 *******************************************************************************/
268 
269 JavaString JavaString::from_utf16(const uint16_t *cs, size_t sz) {
270  return makeJavaString(cs, cs + sz, sz);
271 }
272 
273 /* JavaString:from_utf16 *******************************************************
274 
275  Creates a new java/lang/String with a given char[]
276 
277  WARNING: the char[] is not copied or validated,
278  you must make sure it is never changed.
279 
280 *******************************************************************************/
281 
282 #ifdef WITH_JAVA_RUNTIME_LIBRARY_GNU_CLASSPATH
283 
284 JavaString JavaString::from_array(java_handle_t *array, int32_t count, int32_t offset) {
286  if (!str)
287  return NULL;
288 
289  java_lang_String jstr(str);
290 
291  jstr.set_value((java_handle_chararray_t*) array);
292  jstr.set_count (count);
293  jstr.set_offset(offset);
294 
295  return str;
296 }
297 
298 #endif
299 
300 /* JavaString::intern **********************************************************
301 
302  intern string in global intern table
303 
304  NOTE:
305  because the intern table is allocated on the system heap the GC
306  can't see it and thus interned strings must also be allocated on
307  the system heap.
308 
309 *******************************************************************************/
310 
311 /// Used to lazily copy a java.lang.String into the intern table
314  : _hash(utf8::compute_hash(str.begin(), str.size())),
315  _string(str) {}
316 
317  size_t hash() const { return _hash; }
318  size_t size() const { return _string.size(); }
319 
320  const uint16_t *begin() const { return _string.begin(); }
321  const uint16_t *end() const { return _string.end(); }
322 
325  EXPENSIVE_ASSERT(jstr);
326 
327  std::copy(begin(), end(), const_cast<uint16_t*>(jstr.begin()));
328 
329  return jstr;
330  }
331 private:
332  const size_t _hash;
334 };
335 
337  return intern_table.intern(LazyStringCopy(*this)).get_string();
338 }
339 
340 //****************************************************************************//
341 //***** JAVA STRING ACCESSORS *****//
342 //****************************************************************************//
343 
344 /* JavaString::begin ***********************************************************
345 
346  Get the utf-16 contents of string
347 
348 *******************************************************************************/
349 
350 const uint16_t* JavaString::begin() const {
351  assert(str);
352 
354 
355  if (array == NULL) {
356  // this can only happen if the string has been allocated by java code
357  // and <init> has not been called on it yet
358  return NULL;
359  }
360 
361  CharArray ca(array);
362 
363  int32_t offset = runtime_str_ops::get_string_offset(str);
364  uint16_t* ptr = ca.get_raw_data_ptr();
365 
366  return ptr + offset;
367 }
368 
369 const uint16_t* JavaString::end() const {
370  const uint16_t *ptr = begin();
371 
372  return ptr ? ptr + size() : NULL;
373 }
374 
375 
376 /* JavaString::size ************************************************************
377 
378  Get the number of utf-16 characters in string
379 
380 *******************************************************************************/
381 
382 size_t JavaString::size() const {
383  assert(str);
384 
386 }
387 
388 /* JavaString::utf8_size *******************************************************
389 
390  Get the number of bytes this string would need in utf-8 encoding
391 
392 *******************************************************************************/
393 
394 size_t JavaString::utf8_size() const {
395  assert(str);
396 
397  return utf8::num_bytes(begin(), size());
398 }
399 
400 //****************************************************************************//
401 //***** JAVA STRING CONVERSIONS *****//
402 //****************************************************************************//
403 
404 /* JavaString::to_chars ********************************************************
405 
406  Decodes java/lang/String into newly allocated string (debugging)
407 
408  NOTE:
409  You must free the string allocated yourself with MFREE
410 
411 *******************************************************************************/
412 
413 char *JavaString::to_chars() const {
414  if (str == NULL) return MNEW(char, 1); // memory is zero initialized
415 
416  size_t sz = size();
417 
418  const uint16_t *src = begin();
419  const uint16_t *end = src + sz;
420 
421  char *buf = MNEW(char, sz + 1);
422  char *dst = buf;
423 
424  while (src != end) *dst++ = *src++;
425 
426  *dst = '\0';
427 
428  return buf;
429 }
430 
431 /* JavaString::to_utf8() *******************************************************
432 
433  make utf symbol from java.lang.String
434 
435 *******************************************************************************/
436 
438  if (str == NULL) return utf8::empty;
439 
440  return Utf8String::from_utf16(begin(), size());
441 }
442 
443 /* JavaString::to_utf8_dot_to_slash() ******************************************
444 
445  make utf symbol from java.lang.String
446  replace '/' with '.'
447 
448 *******************************************************************************/
449 
451  if (str == NULL) return utf8::empty;
452 
453  return Utf8String::from_utf16_dot_to_slash(begin(), size());
454 }
455 
456 //****************************************************************************//
457 //***** JAVA STRING IO *****//
458 //****************************************************************************//
459 
460 /* JavaString::fprint **********************************************************
461 
462  Print the given Java string to the given stream.
463 
464 *******************************************************************************/
465 
466 void JavaString::fprint(FILE *stream) const
467 {
468  const uint16_t* cs = begin();
469  size_t sz = size();
470 
471  for (size_t i = 0; i < sz; i++) {
472  char c = cs[i];
473 
474  fputc(c, stream);
475  }
476 }
477 
478 void JavaString::fprint_printable_ascii(FILE *stream) const
479 {
480  const uint16_t* cs = begin();
481  size_t sz = size();
482 
483  for (size_t i = 0; i < sz; i++) {
484  char c = cs[i];
485 
486  c = (c >= 32 && (unsigned char)c <= 127) ? c : '?';
487 
488  fputc(c, stream);
489  }
490 }
491 
493  if (!js)
494  return os << "<null string>";
495 
496  const u2 *cs = js.begin();
497 
498  if (cs == NULL) {
499  // string has been allocated by java code
500  // but <init> has not been called on it yet.
501  return os << "<uninitialized string>";
502  } else {
503  os << '"';
504 
505  for (const u2 *end = js.end(); cs != end; ++cs) {
506  os << ((char) *cs);
507  }
508 
509  os << '"';
510 
511  return os;
512  }
513 }
514 
515 
516 /*
517  * These are local overrides for various environment variables in Emacs.
518  * Please do not remove this and leave it at the end of the file, where
519  * Emacs will automagically detect them.
520  * ---------------------------------------------------------------------
521  * Local variables:
522  * mode: c++
523  * indent-tabs-mode: t
524  * c-basic-offset: 4
525  * tab-width: 4
526  * End:
527  * vim:noexpandtab:sw=4:ts=4:
528  */
const u2 * begin() const
Definition: string.cpp:350
#define hash(_i1, _i2)
Definition: peephole.c:55
const uint16_t * end() const
Definition: string.cpp:75
bool is_occupied() const
Definition: string.cpp:58
#define STATISTICS(x)
Wrapper for statistics only code.
Definition: statistics.hpp:975
uint16_t * get_raw_data_ptr()
Definition: array.hpp:340
JavaString get_string() const
Definition: string.cpp:323
const size_t _hash
Definition: string.cpp:332
Definition: os.hpp:123
const uint16_t * end() const
Definition: string.cpp:321
Utf8String to_utf8() const
Definition: string.cpp:437
size_t utf16_size() const
Definition: utf8.hpp:164
virtual java_handle_array_t * get_handle() const
Definition: array.hpp:103
JavaString intern() const
Definition: string.cpp:336
size_t compute_hash(const uint16_t *cs, size_t)
Definition: utf8.cpp:487
static JavaString from_utf8(Utf8String)
Definition: string.cpp:184
byte_iterator end() const
Definition: utf8.hpp:107
static InternTable< InternedJavaString > intern_table
Definition: string.cpp:86
void init()
Definition: lockword.hpp:61
uintptr_t lockword
Definition: global.hpp:265
JavaString _str
Definition: string.cpp:82
java_handle_chararray_t * get_value() const
bool is_empty() const
Definition: string.cpp:57
typedef void(JNICALL *jvmtiEventSingleStep)(jvmtiEnv *jvmti_env
s4 instancesize
Definition: class.hpp:118
size_t hash() const
Definition: string.cpp:236
static Utf8String from_utf16_dot_to_slash(const uint16_t *, size_t)
Definition: utf8.cpp:355
void set_count(int32_t value)
bool operator==(const T &t) const
Definition: string.cpp:68
static classinfo * get_arrayclass_by_type(int type)
Returns the primitive array-class of the given type.
Definition: primitive.cpp:336
static void set_fields(java_handle_t *str, java_handle_chararray_t *value)
JavaString get_string() const
Definition: string.cpp:242
static JavaString from_array(java_handle_t *array, int32_t count, int32_t offset)
creates a new java/lang/String with a given char[] WARNING: the char[] is not copied or validated...
Definition: string.cpp:284
Used to lazily construct a java.lang.String literal.
Definition: string.cpp:233
const size_t _hash
Definition: string.cpp:253
JNIEnv jthread jobject jclass jlong size
Definition: jvmti.h:387
const u2 * end() const
Definition: string.cpp:369
size_t hash() const
Definition: string.cpp:54
#define TRACESUBSYSTEMINITIALIZATION(text)
Definition: options.hpp:258
java_handle_t * builtin_new(classinfo *c)
Definition: builtin.cpp:816
const uint16_t * begin() const
Definition: string.cpp:320
bool is_deleted() const
Definition: string.cpp:59
char * to_chars() const
Definition: string.cpp:413
size_t size() const
Definition: string.cpp:318
uint16_t u2
Definition: types.hpp:43
const Utf8String _string
Definition: string.cpp:254
size_t size() const
Definition: string.cpp:55
This file contains the statistics framework.
void exceptions_throw_nullpointerexception(void)
Simple stream class for formatted output.
Definition: OStream.hpp:141
void set_occupied(const T &t)
Definition: string.cpp:62
MIIterator i
java_object_t objheader
Definition: global.hpp:286
static JavaString allocate_on_system_heap(size_t size)
Definition: string.cpp:152
jsize get_string_offset(const java_lang_String &s)
void fprint(FILE *) const
Definition: string.cpp:466
OStream & operator<<(OStream &OS, const std::string &t)
Definition: OStream.hpp:459
#define EXPENSIVE_ASSERT(EXPR)
An assertion that performs computations too expensive even for a normal debug build.
Definition: assert.hpp:90
Utf8String::utf16_iterator begin() const
Definition: string.cpp:239
Used to lazily copy a java.lang.String into the intern table.
Definition: string.cpp:312
size_t utf8_size() const
Definition: string.cpp:394
static Utf8String from_utf16(const uint16_t *, size_t)
Definition: utf8.cpp:351
static JavaString from_utf8_slash_to_dot(Utf8String)
Definition: string.cpp:202
Utf8String::utf16_iterator end() const
Definition: string.cpp:240
Lockword.
Definition: lockword.hpp:37
vftbl_t * vftbl
Definition: class.hpp:121
classinfo * class_java_lang_String
Definition: globals.cpp:39
long num_codepoints(const char *, size_t)
Definition: utf8.cpp:440
byte_iterator begin() const
Definition: utf8.hpp:106
LiteralBuilder(Utf8String u)
Definition: string.cpp:234
GNU Classpath java/lang/String.
size_t size() const
Definition: string.cpp:237
void fprint_printable_ascii(FILE *) const
Definition: string.cpp:478
size_t size() const
Definition: string.cpp:382
#define MNEW(type, num)
Definition: memory.hpp:96
void set_value(java_handle_chararray_t *value)
static JavaString from_utf8_dot_to_slash(Utf8String)
Definition: string.cpp:216
void set_offset(int32_t value)
JavaString get_string() const
used by set_occupied
Definition: string.cpp:79
LazyStringCopy(JavaString str)
Definition: string.cpp:313
static void initialize()
Definition: string.cpp:95
bool decode(Utf8Iterator begin, Utf8Iterator end, uint16_t *dst)
jsize get_string_count(const java_lang_String &s)
java_array_t header
Definition: global.hpp:308
#define str(x)
const JavaString _string
Definition: string.cpp:333
bool is_null() const
Definition: array.hpp:203
const uint16_t * begin() const
Definition: string.cpp:74
static JavaString literal(Utf8String)
Definition: string.cpp:257
vftbl_t * vftbl
Definition: global.hpp:264
#define STAT_DECLARE_VAR(type, var, init)
Declare an external statistics variable.
Definition: statistics.hpp:963
size_t hash() const
Definition: string.cpp:317
static bool is_initialized()
Definition: string.cpp:106
InternedJavaString()
Interface to HashTable.
Definition: string.cpp:52
size_t num_bytes(const uint16_t *, size_t)
Definition: utf8.cpp:463
static java_handle_t * makeJavaString(Iterator src, Iterator end, size_t dst_size)
Definition: string.cpp:127
static JavaString from_utf16(const u2 *, size_t)
creates a new java/lang/String from a utf16-text
Definition: string.cpp:269
Utf8String to_utf8_dot_to_slash() const
Definition: string.cpp:450