Line data Source code
1 : /* src/vm/zip.cpp - ZIP file handling for bootstrap classloader
2 :
3 : Copyright (C) 1996-2005, 2006, 2007, 2008
4 : CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 :
6 : This file is part of CACAO.
7 :
8 : This program is free software; you can redistribute it and/or
9 : modify it under the terms of the GNU General Public License as
10 : published by the Free Software Foundation; either version 2, or (at
11 : your option) any later version.
12 :
13 : This program is distributed in the hope that it will be useful, but
14 : WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : General Public License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with this program; if not, write to the Free Software
20 : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 : 02110-1301, USA.
22 :
23 : */
24 :
25 :
26 : #include "vm/zip.hpp"
27 : #include "config.h"
28 :
29 : #include <cassert>
30 : #include <cerrno>
31 : #include <unistd.h>
32 : #include <zlib.h>
33 :
34 : #include "mm/memory.hpp"
35 :
36 : #include "toolbox/endianess.hpp"
37 : #include "toolbox/hashtable.hpp"
38 :
39 : #include "mm/memory.hpp"
40 :
41 : #include "vm/os.hpp"
42 : #include "vm/suck.hpp"
43 : #include "vm/types.hpp"
44 : #include "vm/utf8.hpp"
45 : #include "vm/vm.hpp"
46 : #include "vm/zip.hpp"
47 :
48 : using namespace cacao;
49 :
50 :
51 : /* start size for classes hashtable *******************************************/
52 :
53 : #define HASHTABLE_CLASSES_SIZE (1 << 10)
54 :
55 :
56 : /* info taken from:
57 : http://www.pkware.com/business_and_developers/developer/popups/appnote.txt
58 : */
59 :
60 : /* all signatures in the ZIP file have a length of 4 bytes ********************/
61 :
62 : #define SIGNATURE_LENGTH 4
63 :
64 : /* Central directory structure *************************************************
65 :
66 : [file header 1]
67 : .
68 : .
69 : .
70 : [file header n]
71 : [digital signature]
72 :
73 : File header:
74 :
75 : central file header signature 4 bytes (0x02014b50)
76 : version made by 2 bytes
77 : version needed to extract 2 bytes
78 : general purpose bit flag 2 bytes
79 : compression method 2 bytes
80 : last mod file time 2 bytes
81 : last mod file date 2 bytes
82 : crc-32 4 bytes
83 : compressed size 4 bytes
84 : uncompressed size 4 bytes
85 : file name length 2 bytes
86 : extra field length 2 bytes
87 : file comment length 2 bytes
88 : disk number start 2 bytes
89 : internal file attributes 2 bytes
90 : external file attributes 4 bytes
91 : relative offset of local header 4 bytes
92 :
93 : file name (variable size)
94 : extra field (variable size)
95 : file comment (variable size)
96 :
97 : Digital signature:
98 :
99 : header signature 4 bytes (0x05054b50)
100 : size of data 2 bytes
101 : signature data (variable size)
102 :
103 : *******************************************************************************/
104 :
105 : #define CDSFH_HEADER_SIZE 46
106 :
107 : #define CDSFH_SIGNATURE 0x02014b50
108 : #define CDSFH_COMPRESSION_METHOD 10
109 : #define CDSFH_COMPRESSED_SIZE 20
110 : #define CDSFH_UNCOMPRESSED_SIZE 24
111 : #define CDSFH_FILE_NAME_LENGTH 28
112 : #define CDSFH_EXTRA_FIELD_LENGTH 30
113 : #define CDSFH_FILE_COMMENT_LENGTH 32
114 : #define CDSFH_RELATIVE_OFFSET 42
115 : #define CDSFH_FILENAME 46
116 :
117 : struct cdsfh {
118 : u2 compressionmethod;
119 : u4 compressedsize;
120 : u4 uncompressedsize;
121 : u2 filenamelength;
122 : u2 extrafieldlength;
123 : u2 filecommentlength;
124 : u4 relativeoffset;
125 : };
126 :
127 :
128 : /* End of central directory record *********************************************
129 :
130 : end of central dir signature 4 bytes (0x06054b50)
131 : number of this disk 2 bytes
132 : number of the disk with the
133 : start of the central directory 2 bytes
134 : total number of entries in the
135 : central directory on this disk 2 bytes
136 : total number of entries in
137 : the central directory 2 bytes
138 : size of the central directory 4 bytes
139 : offset of start of central
140 : directory with respect to
141 : the starting disk number 4 bytes
142 : .ZIP file comment length 2 bytes
143 : .ZIP file comment (variable size)
144 :
145 : *******************************************************************************/
146 :
147 : #define EOCDR_SIGNATURE 0x06054b50
148 : #define EOCDR_ENTRIES 10
149 : #define EOCDR_OFFSET 16
150 :
151 : struct eocdr {
152 : u2 entries;
153 : u4 offset;
154 : };
155 :
156 :
157 : /***
158 : * Load zip file into memory
159 : */
160 164 : ZipFile *ZipFile::open(const char *path) {
161 : int fd;
162 : u1 lfh_signature[SIGNATURE_LENGTH];
163 : off_t len;
164 : u1 *p;
165 : eocdr eocdr;
166 : cdsfh cdsfh;
167 :
168 : // first of all, open the file
169 :
170 164 : if ((fd = ::open(path, O_RDONLY)) == -1)
171 0 : return NULL;
172 :
173 : // check for signature in first local file header
174 :
175 164 : if (read(fd, lfh_signature, SIGNATURE_LENGTH) != SIGNATURE_LENGTH)
176 0 : return NULL;
177 :
178 164 : if (read_u4_le(lfh_signature) != LFH_SIGNATURE)
179 0 : return NULL;
180 :
181 : // get the file length
182 :
183 164 : if ((len = lseek(fd, 0, SEEK_END)) == -1)
184 0 : return NULL;
185 :
186 : // we better mmap the file
187 :
188 164 : u1 *filep = (u1*) mmap(0, len, PROT_READ, MAP_PRIVATE, fd, 0);
189 :
190 : // some older compilers, like DEC OSF cc, don't like comparisons
191 : // on void* type
192 :
193 164 : if ((ptrint) filep == (ptrint) MAP_FAILED)
194 0 : return NULL;
195 :
196 : // find end of central directory record
197 :
198 3772 : for (p = filep + len; p >= filep; p--)
199 3772 : if (read_u4_le(p) == EOCDR_SIGNATURE)
200 164 : break;
201 :
202 : // get number of entries in central directory
203 :
204 164 : eocdr.entries = read_u2_le(p + EOCDR_ENTRIES);
205 164 : eocdr.offset = read_u4_le(p + EOCDR_OFFSET);
206 :
207 : // create hashtable for filenames
208 :
209 164 : ZipFile *file = new ZipFile(HASHTABLE_CLASSES_SIZE);
210 :
211 : // add all file entries into the hashtable
212 :
213 164 : p = filep + eocdr.offset;
214 :
215 1320468 : for (s4 i = 0; i < eocdr.entries; i++) {
216 : // check file header signature
217 :
218 1320304 : if (read_u4_le(p) != CDSFH_SIGNATURE)
219 0 : return NULL;
220 :
221 : // we found an entry
222 :
223 1320304 : cdsfh.compressionmethod = read_u2_le(p + CDSFH_COMPRESSION_METHOD);
224 1320304 : cdsfh.compressedsize = read_u4_le(p + CDSFH_COMPRESSED_SIZE);
225 1320304 : cdsfh.uncompressedsize = read_u4_le(p + CDSFH_UNCOMPRESSED_SIZE);
226 1320304 : cdsfh.filenamelength = read_u2_le(p + CDSFH_FILE_NAME_LENGTH);
227 1320304 : cdsfh.extrafieldlength = read_u2_le(p + CDSFH_EXTRA_FIELD_LENGTH);
228 1320304 : cdsfh.filecommentlength = read_u2_le(p + CDSFH_FILE_COMMENT_LENGTH);
229 1320304 : cdsfh.relativeoffset = read_u4_le(p + CDSFH_RELATIVE_OFFSET);
230 :
231 : // create utf8 string of filename, strip .class from classes
232 :
233 1320304 : const char *filename = (const char *) (p + CDSFH_FILENAME);
234 1320304 : const char *classext = filename + cdsfh.filenamelength - strlen(".class");
235 :
236 : // skip directory entries
237 :
238 1320304 : if (filename[cdsfh.filenamelength - 1] != '/') {
239 1253961 : Utf8String u;
240 :
241 1253961 : if (strncmp(classext, ".class", strlen(".class")) == 0)
242 1195117 : u = Utf8String::from_utf8(filename, cdsfh.filenamelength - strlen(".class"));
243 : else
244 58844 : u = Utf8String::from_utf8(filename, cdsfh.filenamelength);
245 :
246 : // create zip entry
247 :
248 1253961 : ZipFileEntry entry;
249 :
250 1253961 : entry.filename = u;
251 1253961 : entry.compressionmethod = cdsfh.compressionmethod;
252 1253961 : entry.compressedsize = cdsfh.compressedsize;
253 1253961 : entry.uncompressedsize = cdsfh.uncompressedsize;
254 1253961 : entry.data = filep + cdsfh.relativeoffset;
255 :
256 : // insert into hashtable
257 :
258 1253961 : file->table.insert(entry);
259 : }
260 :
261 : // move to next central directory structure file header
262 :
263 : p = p
264 : + CDSFH_HEADER_SIZE
265 : + cdsfh.filenamelength
266 : + cdsfh.extrafieldlength
267 1320304 : + cdsfh.filecommentlength;
268 : }
269 :
270 : // return pointer to hashtable
271 :
272 164 : return file;
273 : }
274 :
275 :
276 : /***
277 : * Load file from zip archive into memory
278 : */
279 32723 : void ZipFileEntry::get(uint8_t *dst) const {
280 : lfh lfh;
281 : z_stream zs;
282 : int err;
283 :
284 : // read stuff from local file header
285 :
286 32723 : lfh.filenamelength = read_u2_le(data + LFH_FILE_NAME_LENGTH);
287 32723 : lfh.extrafieldlength = read_u2_le(data + LFH_EXTRA_FIELD_LENGTH);
288 :
289 : u1 *indata = data
290 : + LFH_HEADER_SIZE
291 : + lfh.filenamelength
292 32723 : + lfh.extrafieldlength;
293 :
294 : // how is the file stored?
295 :
296 32723 : switch (compressionmethod) {
297 : case Z_DEFLATED:
298 : // fill z_stream structure
299 :
300 32723 : zs.next_in = indata;
301 32723 : zs.avail_in = compressedsize;
302 32723 : zs.next_out = dst;
303 32723 : zs.avail_out = uncompressedsize;
304 :
305 32723 : zs.zalloc = Z_NULL;
306 32723 : zs.zfree = Z_NULL;
307 32723 : zs.opaque = Z_NULL;
308 :
309 : // initialize this inflate run
310 :
311 32723 : if (inflateInit2(&zs, -MAX_WBITS) != Z_OK)
312 0 : vm_abort("zip_get: inflateInit2 failed: %s", strerror(errno));
313 :
314 : // decompress the file into buffer
315 :
316 32723 : err = inflate(&zs, Z_SYNC_FLUSH);
317 :
318 32723 : if ((err != Z_STREAM_END) && (err != Z_OK))
319 0 : vm_abort("zip_get: inflate failed: %s", strerror(errno));
320 :
321 : // finish this inflate run
322 :
323 32723 : if (inflateEnd(&zs) != Z_OK)
324 0 : vm_abort("zip_get: inflateEnd failed: %s", strerror(errno));
325 32723 : break;
326 :
327 : case 0:
328 : // uncompressed file, just copy the data
329 0 : MCOPY(dst, indata, u1, compressedsize);
330 0 : break;
331 :
332 : default:
333 0 : vm_abort("zip_get: unknown compression method %d", compressionmethod);
334 : break;
335 : }
336 32723 : }
337 :
338 : /*
339 : * These are local overrides for various environment variables in Emacs.
340 : * Please do not remove this and leave it at the end of the file, where
341 : * Emacs will automagically detect them.
342 : * ---------------------------------------------------------------------
343 : * Local variables:
344 : * mode: c++
345 : * indent-tabs-mode: t
346 : * c-basic-offset: 4
347 : * tab-width: 4
348 : * End:
349 : * vim:noexpandtab:sw=4:ts=4:
350 : */
|