Line data Source code
1 : /* src/vm/string.cpp - java.lang.String related functions
2 :
3 : Copyright (C) 1996-2013
4 : CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 :
6 : This file is part of CACAO.
7 :
8 : This program is free software; you can redistribute it and/or
9 : modify it under the terms of the GNU General Public License as
10 : published by the Free Software Foundation; either version 2, or (at
11 : your option) any later version.
12 :
13 : This program is distributed in the hope that it will be useful, but
14 : WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : General Public License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with this program; if not, write to the Free Software
20 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 : 02110-1301, USA.
22 :
23 : */
24 :
25 : #include "vm/string.hpp"
26 :
27 : #include <cassert>
28 :
29 : #include "vm/array.hpp"
30 : #include "vm/exceptions.hpp"
31 : #include "vm/globals.hpp"
32 : #include "vm/javaobjects.hpp"
33 : #include "vm/options.hpp"
34 : #include "vm/statistics.hpp"
35 :
36 : #include "toolbox/intern_table.hpp"
37 : #include "toolbox/logging.hpp"
38 : #include "toolbox/OStream.hpp"
39 : #include "toolbox/utf_utils.hpp"
40 :
41 : using namespace cacao;
42 :
43 : STAT_DECLARE_VAR(int,size_string,0)
44 :
45 : //****************************************************************************//
46 : //***** GLOBAL JAVA/LANG/STRING INTERN TABLE *****//
47 : //****************************************************************************//
48 :
49 0 : struct InternedJavaString {
50 : /// Interface to HashTable
51 :
52 667648 : InternedJavaString() : _hash(0), _str(0) {}
53 :
54 89163 : size_t hash() const { return _hash; }
55 52092 : size_t size() const { return _str.size(); }
56 :
57 217561 : bool is_empty() const { return _str == (java_object_t*) 0; }
58 0 : bool is_occupied() const { return _str != (java_object_t*) 0; }
59 89163 : bool is_deleted() const { return false; }
60 :
61 : template<typename T>
62 64199 : void set_occupied(const T& t) {
63 64199 : _hash = t.hash();
64 64199 : _str = t.get_string();
65 64199 : }
66 :
67 : template<typename T>
68 89163 : bool operator==(const T& t) const {
69 : return hash() == t.hash()
70 : && size() == t.size()
71 89163 : && std::equal(begin(), end(), t.begin());
72 : }
73 :
74 52092 : const uint16_t *begin() const { return _str.begin(); }
75 52092 : const uint16_t *end() const { return _str.end(); }
76 :
77 : /// used by set_occupied
78 :
79 116291 : JavaString get_string() const { return _str; }
80 : private:
81 : size_t _hash;
82 : JavaString _str;
83 : };
84 :
85 :
86 165 : static InternTable<InternedJavaString> intern_table;
87 :
88 : //****************************************************************************//
89 : //***** JAVA STRING SUBSYSTEM INITIALIZATION *****//
90 : //****************************************************************************//
91 :
92 : /***
93 : * Initialize string subsystem
94 : */
95 163 : void JavaString::initialize() {
96 163 : TRACESUBSYSTEMINITIALIZATION("string_init");
97 :
98 163 : assert(!is_initialized());
99 :
100 163 : intern_table.initialize(4096);
101 163 : }
102 :
103 : /***
104 : * Check is string subsystem is initialized
105 : */
106 163 : bool JavaString::is_initialized() {
107 163 : return intern_table.is_initialized();
108 : }
109 :
110 : //****************************************************************************//
111 : //***** JAVA STRING CONSTRUCTORS *****//
112 : //****************************************************************************//
113 :
114 : /***
115 : * Allocate a new java/lang/String object, fill it with string content
116 : * and set its fields.
117 : *
118 : * If input chars is NULL, a NullPointerException is raised.
119 : *
120 : * @param src iterator range that contain the text for the new string
121 : * @param end end of input
122 : * @param dst_size number of UTF-16 chars new string will contain.
123 : *
124 : * @tparam Iterator A STL style iterator over utf8 chars.
125 : */
126 : template<typename Iterator>
127 20933 : static inline java_handle_t* makeJavaString(Iterator src, Iterator end, size_t dst_size) {
128 20933 : if (src == NULL) {
129 0 : exceptions_throw_nullpointerexception();
130 0 : return NULL;
131 : }
132 :
133 : // allocate new java/lang/String
134 20933 : java_handle_t *h = builtin_new(class_java_lang_String);
135 20933 : if (h == NULL) return NULL;
136 :
137 : // allocate char[] for strings text
138 20933 : CharArray ca(dst_size);
139 20933 : if (ca.is_null()) return NULL;
140 :
141 20933 : java_lang_String::set_fields(h, ca.get_handle());
142 :
143 : // copy text into char[]
144 :
145 20933 : if (!utf8::decode(src, end, ca.get_raw_data_ptr()))
146 0 : return NULL;
147 :
148 20933 : return h;
149 : }
150 :
151 :
152 64199 : static inline JavaString allocate_on_system_heap(size_t size) {
153 : // allocate string
154 64199 : java_handle_t *h = (java_object_t*) MNEW(uint8_t, class_java_lang_String->instancesize);
155 64199 : if (h == NULL) return NULL;
156 :
157 : // set string VTABLE and lockword
158 64199 : Lockword(h->lockword).init();
159 64199 : h->vftbl = class_java_lang_String->vftbl;
160 :
161 : // allocate array
162 64199 : java_chararray_t *a = (java_chararray_t*) MNEW(uint8_t, sizeof(java_chararray_t) + sizeof(u2) * size);
163 :
164 : // set array VTABLE, lockword and length
165 64199 : a->header.objheader.vftbl = Primitive::get_arrayclass_by_type(ARRAYTYPE_CHAR)->vftbl;
166 64199 : Lockword(a->header.objheader.lockword).init();
167 64199 : a->header.size = size;
168 :
169 64199 : java_lang_String::set_fields(h, (java_handle_chararray_t*) a);
170 :
171 : STATISTICS(size_string += sizeof(class_java_lang_String->instancesize));
172 :
173 64199 : return h;
174 : }
175 :
176 :
177 : /* JavaString::from_utf8 *******************************************************
178 :
179 : Create a new java/lang/String filled with text decoded from an UTF-8 string.
180 : Returns NULL on error.
181 :
182 : *******************************************************************************/
183 :
184 1696 : JavaString JavaString::from_utf8(Utf8String u) {
185 1696 : return makeJavaString(u.begin(), u.end(), u.utf16_size());
186 : }
187 :
188 13699 : JavaString JavaString::from_utf8(const char *cs, size_t sz) {
189 13699 : return makeJavaString(cs, cs + sz, utf8::num_codepoints(cs, sz));
190 : }
191 :
192 : /* JavaString::from_utf8_slash_to_dot ******************************************
193 :
194 : Create a new java/lang/String filled with text decoded from an UTF-8 string.
195 : Replaces '/' with '.'.
196 :
197 : NOTE:
198 : If the input is not valid UTF-8 the process aborts!
199 :
200 : *******************************************************************************/
201 :
202 5538 : JavaString JavaString::from_utf8_slash_to_dot(Utf8String u) {
203 5538 : return makeJavaString<utf8::SlashToDot>(u.begin(), u.end(), u.utf16_size());
204 : }
205 :
206 : /* JavaString::from_utf8_dot_to_slash ******************************************
207 :
208 : Create a new java/lang/String filled with text decoded from an UTF-8 string.
209 : Replaces '.' with '/'.
210 :
211 : NOTE:
212 : If the input is not valid UTF-8 the process aborts!
213 :
214 : *******************************************************************************/
215 :
216 0 : JavaString JavaString::from_utf8_dot_to_slash(Utf8String u) {
217 0 : return makeJavaString<utf8::DotToSlash>(u.begin(), u.end(), u.utf16_size());
218 : }
219 :
220 : /* JavaString::literal *********************************************************
221 :
222 : Create and intern a java/lang/String filled with text decoded from an UTF-8
223 : string.
224 :
225 : NOTE:
226 : because the intern table is allocated on the system heap the GC
227 : can't see it and thus interned strings must also be allocated on
228 : the system heap.
229 :
230 : *******************************************************************************/
231 :
232 : /// Used to lazily construct a java.lang.String literal
233 : struct LiteralBuilder {
234 101384 : LiteralBuilder(Utf8String u) : _hash(u.hash()), _string(u) {}
235 :
236 329167 : size_t hash() const { return _hash; }
237 101384 : size_t size() const { return _string.utf16_size(); }
238 :
239 37662 : Utf8String::utf16_iterator begin() const { return _string.utf16_begin(); }
240 : Utf8String::utf16_iterator end() const { return _string.utf16_end(); }
241 :
242 63722 : JavaString get_string() const {
243 63722 : JavaString jstr = allocate_on_system_heap(size());
244 63722 : assert(jstr);
245 :
246 63722 : bool b = utf8::decode(_string.begin(), _string.end(), const_cast<uint16_t*>(jstr.begin()));
247 : (void) b;
248 63722 : assert(b);
249 :
250 63722 : return jstr;
251 : }
252 : private:
253 : const size_t _hash;
254 : const Utf8String _string;
255 : };
256 :
257 101384 : JavaString JavaString::literal(Utf8String u) {
258 101384 : return intern_table.intern(LiteralBuilder(u)).get_string();
259 : }
260 :
261 :
262 : /* JavaString:from_utf16 *******************************************************
263 :
264 : Create a new java/lang/String filled with text copied from an UTF-16 string.
265 : Returns NULL on error.
266 :
267 : *******************************************************************************/
268 :
269 0 : JavaString JavaString::from_utf16(const uint16_t *cs, size_t sz) {
270 0 : return makeJavaString(cs, cs + sz, sz);
271 : }
272 :
273 : /* JavaString:from_utf16 *******************************************************
274 :
275 : Creates a new java/lang/String with a given char[]
276 :
277 : WARNING: the char[] is not copied or validated,
278 : you must make sure it is never changed.
279 :
280 : *******************************************************************************/
281 :
282 : #ifdef WITH_JAVA_RUNTIME_LIBRARY_GNU_CLASSPATH
283 :
284 499 : JavaString JavaString::from_array(java_handle_t *array, int32_t count, int32_t offset) {
285 499 : java_handle_t *str = builtin_new(class_java_lang_String);
286 499 : if (!str)
287 0 : return NULL;
288 :
289 499 : java_lang_String jstr(str);
290 :
291 499 : jstr.set_value((java_handle_chararray_t*) array);
292 499 : jstr.set_count (count);
293 499 : jstr.set_offset(offset);
294 :
295 499 : return str;
296 : }
297 :
298 : #endif
299 :
300 : /* JavaString::intern **********************************************************
301 :
302 : intern string in global intern table
303 :
304 : NOTE:
305 : because the intern table is allocated on the system heap the GC
306 : can't see it and thus interned strings must also be allocated on
307 : the system heap.
308 :
309 : *******************************************************************************/
310 :
311 : /// Used to lazily copy a java.lang.String into the intern table
312 : struct LazyStringCopy {
313 14907 : LazyStringCopy(JavaString str)
314 : : _hash(utf8::compute_hash(str.begin(), str.size())),
315 14907 : _string(str) {}
316 :
317 56777 : size_t hash() const { return _hash; }
318 14907 : size_t size() const { return _string.size(); }
319 :
320 14907 : const uint16_t *begin() const { return _string.begin(); }
321 477 : const uint16_t *end() const { return _string.end(); }
322 :
323 477 : JavaString get_string() const {
324 477 : JavaString jstr = allocate_on_system_heap(size());
325 : EXPENSIVE_ASSERT(jstr);
326 :
327 477 : std::copy(begin(), end(), const_cast<uint16_t*>(jstr.begin()));
328 :
329 477 : return jstr;
330 : }
331 : private:
332 : const size_t _hash;
333 : const JavaString _string;
334 : };
335 :
336 14907 : JavaString JavaString::intern() const {
337 14907 : return intern_table.intern(LazyStringCopy(*this)).get_string();
338 : }
339 :
340 : //****************************************************************************//
341 : //***** JAVA STRING ACCESSORS *****//
342 : //****************************************************************************//
343 :
344 : /* JavaString::begin ***********************************************************
345 :
346 : Get the utf-16 contents of string
347 :
348 : *******************************************************************************/
349 :
350 208693 : const uint16_t* JavaString::begin() const {
351 208693 : assert(str);
352 :
353 208693 : java_handle_chararray_t *array = java_lang_String::get_value(str);
354 :
355 208693 : if (array == NULL) {
356 : // this can only happen if the string has been allocated by java code
357 : // and <init> has not been called on it yet
358 0 : return NULL;
359 : }
360 :
361 208693 : CharArray ca(array);
362 :
363 208693 : int32_t offset = runtime_str_ops::get_string_offset(str);
364 208693 : uint16_t* ptr = ca.get_raw_data_ptr();
365 :
366 208693 : return ptr + offset;
367 : }
368 :
369 52569 : const uint16_t* JavaString::end() const {
370 52569 : const uint16_t *ptr = begin();
371 :
372 52569 : return ptr ? ptr + size() : NULL;
373 : }
374 :
375 :
376 : /* JavaString::size ************************************************************
377 :
378 : Get the number of utf-16 characters in string
379 :
380 : *******************************************************************************/
381 :
382 144494 : size_t JavaString::size() const {
383 144494 : assert(str);
384 :
385 144494 : return runtime_str_ops::get_string_count(str);
386 : }
387 :
388 : /* JavaString::utf8_size *******************************************************
389 :
390 : Get the number of bytes this string would need in utf-8 encoding
391 :
392 : *******************************************************************************/
393 :
394 0 : size_t JavaString::utf8_size() const {
395 0 : assert(str);
396 :
397 0 : return utf8::num_bytes(begin(), size());
398 : }
399 :
400 : //****************************************************************************//
401 : //***** JAVA STRING CONVERSIONS *****//
402 : //****************************************************************************//
403 :
404 : /* JavaString::to_chars ********************************************************
405 :
406 : Decodes java/lang/String into newly allocated string (debugging)
407 :
408 : NOTE:
409 : You must free the string allocated yourself with MFREE
410 :
411 : *******************************************************************************/
412 :
413 0 : char *JavaString::to_chars() const {
414 0 : if (str == NULL) return MNEW(char, 1); // memory is zero initialized
415 :
416 0 : size_t sz = size();
417 :
418 0 : const uint16_t *src = begin();
419 0 : const uint16_t *end = src + sz;
420 :
421 0 : char *buf = MNEW(char, sz + 1);
422 0 : char *dst = buf;
423 :
424 0 : while (src != end) *dst++ = *src++;
425 :
426 0 : *dst = '\0';
427 :
428 0 : return buf;
429 : }
430 :
431 : /* JavaString::to_utf8() *******************************************************
432 :
433 : make utf symbol from java.lang.String
434 :
435 : *******************************************************************************/
436 :
437 5504 : Utf8String JavaString::to_utf8() const {
438 5504 : if (str == NULL) return utf8::empty;
439 :
440 5504 : return Utf8String::from_utf16(begin(), size());
441 : }
442 :
443 : /* JavaString::to_utf8_dot_to_slash() ******************************************
444 :
445 : make utf symbol from java.lang.String
446 : replace '/' with '.'
447 :
448 : *******************************************************************************/
449 :
450 4474 : Utf8String JavaString::to_utf8_dot_to_slash() const {
451 4474 : if (str == NULL) return utf8::empty;
452 :
453 4474 : return Utf8String::from_utf16_dot_to_slash(begin(), size());
454 : }
455 :
456 : //****************************************************************************//
457 : //***** JAVA STRING IO *****//
458 : //****************************************************************************//
459 :
460 : /* JavaString::fprint **********************************************************
461 :
462 : Print the given Java string to the given stream.
463 :
464 : *******************************************************************************/
465 :
466 41 : void JavaString::fprint(FILE *stream) const
467 : {
468 41 : const uint16_t* cs = begin();
469 41 : size_t sz = size();
470 :
471 205 : for (size_t i = 0; i < sz; i++) {
472 164 : char c = cs[i];
473 :
474 164 : fputc(c, stream);
475 : }
476 41 : }
477 :
478 0 : void JavaString::fprint_printable_ascii(FILE *stream) const
479 : {
480 0 : const uint16_t* cs = begin();
481 0 : size_t sz = size();
482 :
483 0 : for (size_t i = 0; i < sz; i++) {
484 0 : char c = cs[i];
485 :
486 0 : c = (c >= 32 && (unsigned char)c <= 127) ? c : '?';
487 :
488 0 : fputc(c, stream);
489 : }
490 0 : }
491 :
492 0 : OStream& operator<<(OStream& os, JavaString js) {
493 0 : if (!js)
494 0 : return os << "<null string>";
495 :
496 0 : const u2 *cs = js.begin();
497 :
498 0 : if (cs == NULL) {
499 : // string has been allocated by java code
500 : // but <init> has not been called on it yet.
501 0 : return os << "<uninitialized string>";
502 : } else {
503 0 : os << '"';
504 :
505 0 : for (const u2 *end = js.end(); cs != end; ++cs) {
506 0 : os << ((char) *cs);
507 : }
508 :
509 0 : os << '"';
510 :
511 0 : return os;
512 : }
513 495 : }
514 :
515 :
516 : /*
517 : * These are local overrides for various environment variables in Emacs.
518 : * Please do not remove this and leave it at the end of the file, where
519 : * Emacs will automagically detect them.
520 : * ---------------------------------------------------------------------
521 : * Local variables:
522 : * mode: c++
523 : * indent-tabs-mode: t
524 : * c-basic-offset: 4
525 : * tab-width: 4
526 : * End:
527 : * vim:noexpandtab:sw=4:ts=4:
528 : */
|