Line data Source code
1 : /* src/toolbox/buffer.hpp - String buffer header
2 :
3 : Copyright (C) 1996-2013
4 : CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 :
6 : This file is part of CACAO.
7 :
8 : This program is free software; you can redistribute it and/or
9 : modify it under the terms of the GNU General Public License as
10 : published by the Free Software Foundation; either version 2, or (at
11 : your option) any later version.
12 :
13 : This program is distributed in the hope that it will be useful, but
14 : WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : General Public License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with this program; if not, write to the Free Software
20 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 : 02110-1301, USA.
22 :
23 : */
24 :
25 :
26 : #ifndef BUFFER_HPP_
27 : #define BUFFER_HPP_ 1
28 :
29 : #include <inttypes.h>
30 : #include <cstdarg>
31 : #include "mm/memory.hpp"
32 : #include "toolbox/utf_utils.hpp"
33 : #include "vm/global.hpp" // for MAX
34 : #include "vm/utf8.hpp"
35 : #include "vm/string.hpp"
36 :
37 : /* Buffer **********************************************************************
38 :
39 : An in memory buffer.
40 :
41 : The buffer automatically grows to fit it's input.
42 :
43 : When a buffer goes out of scope all memory associated with it will be freed
44 :
45 : Don't forget to zero terminate contents of buffer if you use them directly.
46 :
47 : TEMPLATE PARAMETERS:
48 : Allocator ... The type of allocator to use for allocating the buffer.
49 : Must support the non-standard reallocate function.
50 :
51 : *******************************************************************************/
52 :
53 : template<typename Allocator = MemoryAllocator<uint8_t> >
54 : class Buffer {
55 : public:
56 : // construct a buffer with size
57 : Buffer(size_t initial_size = 64);
58 :
59 : // free content of buffer
60 : ~Buffer();
61 :
62 : // *********************************************************************
63 : // ***** WRITE TO BUFFER
64 :
65 : // write to buffer byte-by-byte
66 : inline Buffer& write(char);
67 : inline Buffer& write(Utf8String);
68 : inline Buffer& write(JavaString);
69 : inline Buffer& write(const char*);
70 : inline Buffer& write(const char*, size_t);
71 : inline Buffer& write(const uint16_t*, size_t);
72 :
73 : /// write to buffer, replacing '/' by '.'
74 : inline Buffer& write_slash_to_dot(const char*);
75 : inline Buffer& write_slash_to_dot(Utf8String);
76 :
77 : /// write to buffer, replacing '.' by '/'
78 : inline Buffer& write_dot_to_slash(Utf8String);
79 :
80 : /// copy contents of buffer (calls c_str() on src)
81 : template<typename A>
82 35915 : inline Buffer& write(Buffer<A>& src) { return write(src.c_str()); }
83 :
84 : /// write address of pointer as hex to buffer
85 : inline Buffer& write_ptr(void*);
86 :
87 : /// write number to buffer as decimal
88 : inline Buffer& write_dec(s4);
89 : inline Buffer& write_dec(s8);
90 : inline Buffer& write_dec(float);
91 : inline Buffer& write_dec(double);
92 :
93 : /// write number to buffer as hexadecimal
94 : inline Buffer& write_hex(s4);
95 : inline Buffer& write_hex(s8);
96 : inline Buffer& write_hex(float);
97 : inline Buffer& write_hex(double);
98 :
99 : // like printf
100 : inline Buffer& writef(const char* fmt, ...);
101 : inline Buffer& writevf(const char* fmt, va_list ap);
102 :
103 : /// ensure string in buffer is zero terminated
104 : inline Buffer& zero_terminate();
105 :
106 : // *********************************************************************
107 : // ***** GET CONTENTS OF BUFFER
108 :
109 : /// get contents of buffer as zero-terminated c-style-string
110 : /// This strings lifetime is tied to it's buffer.
111 : inline const char* c_str();
112 :
113 : /// get copy contents of buffer as zero-terminated c-style-string
114 : /// You must free the returned string yourself (
115 : /// use a copy of the buffer's allocator via get_allocator())
116 : inline const char* c_str_copy();
117 :
118 : /// get utf-8 string contents of buffer as utf8-string
119 : inline Utf8String utf8_str();
120 :
121 : /// get raw contents of buffer (not necessarily zero terminated).
122 : inline uint8_t* data();
123 : inline const uint8_t* data() const;
124 :
125 : /// get size of buffer contents
126 : inline size_t size();
127 :
128 : // *********************************************************************
129 : // ***** CHANGE BUFFER POSITION
130 :
131 : /// Reset buffer position to start of buffer.
132 : /// This effectively clears the buffer.
133 : /// O(1)
134 : inline void reset();
135 :
136 : /// advance buffer position by n bytes
137 : /// O(1)
138 : inline void skip(size_t num_bytes);
139 :
140 : /// remove data from the back of this buffer.
141 : /// O(1)
142 : inline void rewind(size_t bytes_to_drop);
143 :
144 : // *********************************************************************
145 : // ***** MISC
146 :
147 : /// ensure buffer contains space for at least sz bytes
148 : void ensure_capacity(size_t sz);
149 : private:
150 : /// non-copyable
151 : Buffer(const Buffer&);
152 : ///non-assignable
153 : Buffer& operator=(const Buffer&);
154 :
155 : uint8_t *_start, *_end, *_pos;
156 : Allocator _alloc;
157 :
158 : // used to encode utf16 strings to utf8
159 : struct Encode : utf16::VisitorBase<void> {
160 : typedef void ReturnType;
161 :
162 0 : Encode(Buffer& dst) : _dst(dst) {}
163 :
164 0 : void utf8 (uint8_t c) { _dst.write(c); }
165 : private:
166 : Buffer& _dst;
167 : };
168 : };
169 :
170 : //****************************************************************************//
171 : //***** IMPLEMENTATION *****//
172 : //****************************************************************************//
173 :
174 : /* Buffer::Buffer **************************************************************
175 :
176 : Construct a new Buffer with a given size.
177 :
178 : IN:
179 : buf_size ....... The number of bytes that should be preallocated
180 : for input in the buffer.
181 :
182 : *******************************************************************************/
183 :
184 : template<typename Allocator>
185 176416 : Buffer<Allocator>::Buffer(size_t initial_size)
186 : {
187 176416 : _start = _alloc.allocate(initial_size);
188 176416 : _end = _start + initial_size;
189 176416 : _pos = _start;
190 176416 : }
191 :
192 : template<typename Allocator>
193 176416 : Buffer<Allocator>::~Buffer()
194 : {
195 176416 : _alloc.deallocate(_start, _end - _start);
196 176416 : _start = _end = _pos = 0;
197 176416 : }
198 :
199 : /* Buffer::write(Utf8String) ***************************************************
200 :
201 : Insert a utf-8 string into buffer byte by byte.
202 : Does NOT inserts a zero terminator.
203 :
204 : *******************************************************************************/
205 :
206 : template<typename Allocator>
207 226961 : Buffer<Allocator>& Buffer<Allocator>::write(Utf8String u)
208 : {
209 226961 : return write(u.begin(), u.size());
210 : }
211 :
212 : /* Buffer::write(JavaString) ***************************************************
213 :
214 : Decode a java lang string into buffer
215 : Does NOT inserts a zero terminator.
216 :
217 : *******************************************************************************/
218 :
219 : template<typename Allocator>
220 0 : Buffer<Allocator>& Buffer<Allocator>::write(JavaString js)
221 : {
222 0 : return write(js.begin(), js.size());
223 : }
224 :
225 : /* Buffer::write(const char*) **************************************************
226 :
227 : Insert a zero terminated string into buffer byte by byte
228 : Does NOT inserts a zero terminator.
229 :
230 : *******************************************************************************/
231 :
232 : template<typename Allocator>
233 126416 : Buffer<Allocator>& Buffer<Allocator>::write(const char *cs)
234 : {
235 126416 : return write(cs, strlen(cs));
236 : }
237 :
238 : /* Buffer::write(const char*, size_t) ********************************************
239 :
240 : Insert string with a given length into buffer byte by byte
241 : Does NOT inserts a zero terminator.
242 :
243 : *******************************************************************************/
244 :
245 : template<typename Allocator>
246 353377 : Buffer<Allocator>& Buffer<Allocator>::write(const char *cs, size_t sz)
247 : {
248 353377 : ensure_capacity(sz);
249 :
250 353377 : memcpy(_pos, cs, sizeof(char) * sz);
251 :
252 353377 : _pos += sz;
253 :
254 353377 : return *this;
255 : }
256 :
257 : /* Buffer::write(const uint16_t*, size_t) **************************************
258 :
259 : Encode utf-16 string with a given length into buffer
260 : Does NOT inserts a zero terminator.
261 :
262 : *******************************************************************************/
263 :
264 : template<typename Allocator>
265 0 : Buffer<Allocator>& Buffer<Allocator>::write(const uint16_t *cs, size_t sz)
266 : {
267 0 : utf16::transform(cs, cs + sz, Encode(*this));
268 :
269 0 : return *this;
270 : }
271 :
272 : /* Buffer::write(char) *********************************************************
273 :
274 : Write a char into buffer.
275 : Does NOT inserts a zero terminator.
276 :
277 : *******************************************************************************/
278 :
279 : template<typename Allocator>
280 382937 : Buffer<Allocator>& Buffer<Allocator>::write(char c)
281 : {
282 382937 : ensure_capacity(1);
283 :
284 382937 : *_pos++ = c;
285 :
286 382937 : return *this;
287 : }
288 :
289 : /* Buffer::write_slash_to_dot(Utf8String) **************************************
290 :
291 : Insert a utf-8 string into buffer byte by byte, replacing '/' by '.'.
292 : Does NOT inserts a zero terminator.
293 :
294 : *******************************************************************************/
295 :
296 : template<typename Allocator>
297 : Buffer<Allocator>& Buffer<Allocator>::write_slash_to_dot(const char *cs) {
298 : size_t sz = std::strlen(cs);
299 :
300 : ensure_capacity(sz);
301 :
302 : const char* src = cs;
303 : const char* end = cs + sz;
304 :
305 : for ( ; src != end; ++_pos, ++src ) {
306 : char c = *src;
307 :
308 : *_pos = (c == '/') ? '.' : c;
309 : }
310 :
311 : return *this;
312 : }
313 :
314 : template<typename Allocator>
315 92320 : Buffer<Allocator>& Buffer<Allocator>::write_slash_to_dot(Utf8String u) {
316 92320 : ensure_capacity(u.size());
317 :
318 92320 : const char* src = u.begin();
319 92320 : const char* end = u.end();
320 :
321 2297400 : for ( ; src != end; ++_pos, ++src ) {
322 2205080 : char c = *src;
323 :
324 2205080 : *_pos = (c == '/') ? '.' : c;
325 : }
326 :
327 92320 : return *this;
328 : }
329 :
330 : /* Buffer::write_dot_to_slash(Utf8String) **************************************
331 :
332 : Insert a utf-8 string into buffer byte by byte, replacing '.' by '/'.
333 : Does NOT inserts a zero terminator.
334 :
335 : *******************************************************************************/
336 :
337 : template<typename Allocator>
338 : Buffer<Allocator>& Buffer<Allocator>::write_dot_to_slash(Utf8String u) {
339 : ensure_capacity(u.size());
340 :
341 : const char* src = u.begin();
342 : const char* end = u.end();
343 :
344 : for ( ; src != end; ++_pos, ++src ) {
345 : char c = *src;
346 :
347 : *_pos = (c == '.') ? '/' : c;
348 : }
349 :
350 : return *this;
351 : }
352 :
353 : /* Buffer::write_ptr/dec/hex **************************************************
354 :
355 :
356 : Like (v)snprintf but for buffers.
357 :
358 : *******************************************************************************/
359 :
360 : template<typename Allocator>
361 0 : Buffer<Allocator>& Buffer<Allocator>::write_ptr(void *ptr) {
362 0 : return writef("0x%" PRIxPTR, (uintptr_t) ptr);
363 : }
364 :
365 : template<typename Allocator>
366 0 : Buffer<Allocator>& Buffer<Allocator>::write_dec(s4 n) {
367 0 : return writef("%d", n);
368 : }
369 :
370 : template<typename Allocator>
371 0 : Buffer<Allocator>& Buffer<Allocator>::write_dec(s8 n) {
372 0 : return writef("0x%" PRId64, n);
373 : }
374 :
375 : template<typename Allocator>
376 0 : Buffer<Allocator>& Buffer<Allocator>::write_dec(float n) {
377 0 : return writef("%g", n);
378 : }
379 :
380 : template<typename Allocator>
381 0 : Buffer<Allocator>& Buffer<Allocator>::write_dec(double n) {
382 0 : return writef("%g", n);
383 : }
384 :
385 : template<typename Allocator>
386 0 : Buffer<Allocator>& Buffer<Allocator>::write_hex(s4 n) {
387 0 : return writef("%08x", n);
388 : }
389 :
390 : template<typename Allocator>
391 0 : Buffer<Allocator>& Buffer<Allocator>::write_hex(s8 n) {
392 0 : return writef("0x%" PRIx64, n);
393 : }
394 :
395 : template<typename Allocator>
396 0 : Buffer<Allocator>& Buffer<Allocator>::write_hex(float n) {
397 : union {
398 : float f;
399 : s4 i;
400 : } u;
401 :
402 0 : u.f = n;
403 :
404 0 : return write_hex(u.i);
405 : }
406 :
407 : template<typename Allocator>
408 0 : Buffer<Allocator>& Buffer<Allocator>::write_hex(double n) {
409 : union {
410 : double d;
411 : s8 l;
412 : } u;
413 :
414 0 : u.d = n;
415 :
416 0 : return write_hex(u.l);
417 : }
418 :
419 : /* Buffer::writef/writevf ******************************************************
420 :
421 : Like (v)snprintf but for buffers.
422 :
423 : *******************************************************************************/
424 :
425 : template<typename Allocator>
426 0 : Buffer<Allocator>& Buffer<Allocator>::writef(const char *fmt, ...)
427 : {
428 : va_list ap;
429 :
430 0 : va_start(ap, fmt);
431 0 : writevf(fmt,ap);
432 0 : va_end(ap);
433 :
434 0 : return *this;
435 : }
436 :
437 : template<typename Allocator>
438 28 : Buffer<Allocator>& Buffer<Allocator>::writevf(const char *fmt, va_list ap)
439 : {
440 : va_list ap2;
441 28 : __va_copy(ap2, ap); // unfortunately va_copy is only exposed for C99/C++11 or later
442 :
443 28 : size_t size = _end - _pos;
444 28 : size_t written = vsnprintf((char*) _pos, size, fmt, ap);
445 :
446 28 : if (written > size) {
447 : // buffer was too small (+1 for zero byte)
448 10 : ensure_capacity(written + 1);
449 :
450 10 : size = _end - _pos;
451 10 : written = vsnprintf((char*) _pos, size, fmt, ap2);
452 10 : assert(written <= size);
453 : }
454 :
455 28 : _pos += written;
456 :
457 28 : va_end(ap2);
458 :
459 28 : return *this;
460 : }
461 :
462 : /* Buffer::zero_terminate ******************************************************
463 :
464 : Ensure content of buffer is a zero terminated string.
465 : Does not alter the buffers position.
466 :
467 : *******************************************************************************/
468 :
469 : template<typename Allocator>
470 178839 : Buffer<Allocator>& Buffer<Allocator>::zero_terminate()
471 : {
472 178839 : ensure_capacity(1);
473 :
474 178839 : *_pos = '\0';
475 :
476 178839 : return *this;
477 : }
478 :
479 : /* Buffer::c_str() *************************************************************
480 :
481 : Returns the buffers contents as read only c-style string.
482 : The string remains valid only as long as the buffer exists.
483 :
484 : Automatically ensures that this buffers contents are zero terminated
485 :
486 : *******************************************************************************/
487 :
488 : template<typename Allocator>
489 167126 : const char* Buffer<Allocator>::c_str()
490 : {
491 167126 : zero_terminate();
492 :
493 167125 : return (const char*) _start;
494 : }
495 :
496 : /* Buffer::utf8_str() **********************************************************
497 :
498 : Create a new Utf8String whose contents are equal to the contents of this
499 : buffer.
500 :
501 : Automatically ensures that this buffers contents are zero terminated
502 :
503 : *******************************************************************************/
504 :
505 : template<typename Allocator>
506 11713 : Utf8String Buffer<Allocator>::utf8_str()
507 : {
508 11713 : zero_terminate();
509 :
510 11713 : return Utf8String::from_utf8((char*) _start, _pos - _start);
511 : }
512 :
513 : /* Buffer::data() **************************************************************
514 :
515 : get raw contents of buffer (not necessarily zero terminated).
516 : The pointer remains valid only as long as the buffer exists.
517 :
518 : Returns NULL if buffer is empty
519 :
520 : *******************************************************************************/
521 :
522 : template<typename Allocator>
523 : uint8_t* Buffer<Allocator>::data()
524 : {
525 : return _start;
526 : }
527 :
528 : template<typename Allocator>
529 : const uint8_t* Buffer<Allocator>::data() const
530 : {
531 : return _start;
532 : }
533 :
534 : /* Buffer::size() **************************************************************
535 :
536 : get size of buffer contents
537 :
538 : *******************************************************************************/
539 :
540 : template<typename Allocator>
541 : size_t Buffer<Allocator>::size()
542 : {
543 : return _pos - _start;
544 : }
545 :
546 : /* Buffer::reset ***************************************************************
547 :
548 : Reset buffer position to start of buffer.
549 :
550 : *******************************************************************************/
551 :
552 : template<typename Allocator>
553 35915 : void Buffer<Allocator>::reset()
554 : {
555 35915 : _pos = _start;
556 35915 : }
557 :
558 : /* Buffer::rewind **************************************************************
559 :
560 : Undo write operations by removing data of the back of this buffer.
561 : Does not perform a bounds check.
562 :
563 : IN:
564 : bytes_to_drop ... how many bytes of content should be removed from the
565 : back of the buffer.
566 :
567 : NOTE:
568 : The content of the buffer is not necesserily valid utf-8 or
569 : null terminated after calling this.
570 :
571 : *******************************************************************************/
572 :
573 : template<typename Allocator>
574 : void Buffer<Allocator>::rewind(size_t bytes_to_drop)
575 : {
576 : _pos -= bytes_to_drop;
577 : }
578 :
579 : /* Buffer::ensure_capacity *****************************************************
580 :
581 : Automatically grows buffer if doesn't have enough space.
582 :
583 : IN:
584 : write_size ... the number of bytes that will be written by the next
585 : write operation. Buffer will be resized if it doesn't
586 : have enough space to satisfy that write.
587 :
588 : *******************************************************************************/
589 :
590 : template<typename Allocator>
591 1007483 : void Buffer<Allocator>::ensure_capacity(size_t write_size)
592 : {
593 1007483 : size_t free_space = _end - _pos;
594 :
595 1007483 : if (free_space < write_size) {
596 : // increase capacity
597 63873 : size_t old_size = _pos - _start;
598 63873 : size_t old_capacity = _end - _start;
599 :
600 63873 : size_t new_capacity = MAX(old_capacity, write_size) * 2 + 1;
601 63873 : assert(new_capacity > (old_capacity + write_size));
602 :
603 : // enlarge buffer
604 63873 : _start = _alloc.reallocate(_start, old_capacity, new_capacity);
605 63873 : _end = _start + new_capacity;
606 63873 : _pos = _start + old_size;
607 : }
608 1007483 : }
609 :
610 :
611 : #endif // CACAO_BUFFER_HPP_
612 :
613 :
614 : /*
615 : * These are local overrides for various environment variables in Emacs.
616 : * Please do not remove this and leave it at the end of the file, where
617 : * Emacs will automagically detect them.
618 : * ---------------------------------------------------------------------
619 : * Local variables:
620 : * mode: c++
621 : * indent-tabs-mode: t
622 : * c-basic-offset: 4
623 : * tab-width: 4
624 : * End:
625 : * vim:noexpandtab:sw=4:ts=4:
626 : */
|