CACAO
zip.cpp
Go to the documentation of this file.
1 /* src/vm/zip.cpp - ZIP file handling for bootstrap classloader
2 
3  Copyright (C) 1996-2005, 2006, 2007, 2008
4  CACAOVM - Verein zur Foerderung der freien virtuellen Maschine CACAO
5 
6  This file is part of CACAO.
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2, or (at
11  your option) any later version.
12 
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21  02110-1301, USA.
22 
23 */
24 
25 
26 #include "vm/zip.hpp"
27 #include "config.h"
28 
29 #include <cassert>
30 #include <cerrno>
31 #include <unistd.h>
32 #include <zlib.h>
33 
34 #include "mm/memory.hpp"
35 
36 #include "toolbox/endianess.hpp"
37 #include "toolbox/hashtable.hpp"
38 
39 #include "mm/memory.hpp"
40 
41 #include "vm/os.hpp"
42 #include "vm/suck.hpp"
43 #include "vm/types.hpp"
44 #include "vm/utf8.hpp"
45 #include "vm/vm.hpp"
46 #include "vm/zip.hpp"
47 
48 using namespace cacao;
49 
50 
51 /* start size for classes hashtable *******************************************/
52 
53 #define HASHTABLE_CLASSES_SIZE (1 << 10)
54 
55 
56 /* info taken from:
57  http://www.pkware.com/business_and_developers/developer/popups/appnote.txt
58 */
59 
60 /* all signatures in the ZIP file have a length of 4 bytes ********************/
61 
62 #define SIGNATURE_LENGTH 4
63 
64 /* Central directory structure *************************************************
65 
66  [file header 1]
67  .
68  .
69  .
70  [file header n]
71  [digital signature]
72 
73  File header:
74 
75  central file header signature 4 bytes (0x02014b50)
76  version made by 2 bytes
77  version needed to extract 2 bytes
78  general purpose bit flag 2 bytes
79  compression method 2 bytes
80  last mod file time 2 bytes
81  last mod file date 2 bytes
82  crc-32 4 bytes
83  compressed size 4 bytes
84  uncompressed size 4 bytes
85  file name length 2 bytes
86  extra field length 2 bytes
87  file comment length 2 bytes
88  disk number start 2 bytes
89  internal file attributes 2 bytes
90  external file attributes 4 bytes
91  relative offset of local header 4 bytes
92 
93  file name (variable size)
94  extra field (variable size)
95  file comment (variable size)
96 
97  Digital signature:
98 
99  header signature 4 bytes (0x05054b50)
100  size of data 2 bytes
101  signature data (variable size)
102 
103 *******************************************************************************/
104 
105 #define CDSFH_HEADER_SIZE 46
106 
107 #define CDSFH_SIGNATURE 0x02014b50
108 #define CDSFH_COMPRESSION_METHOD 10
109 #define CDSFH_COMPRESSED_SIZE 20
110 #define CDSFH_UNCOMPRESSED_SIZE 24
111 #define CDSFH_FILE_NAME_LENGTH 28
112 #define CDSFH_EXTRA_FIELD_LENGTH 30
113 #define CDSFH_FILE_COMMENT_LENGTH 32
114 #define CDSFH_RELATIVE_OFFSET 42
115 #define CDSFH_FILENAME 46
116 
117 struct cdsfh {
125 };
126 
127 
128 /* End of central directory record *********************************************
129 
130  end of central dir signature 4 bytes (0x06054b50)
131  number of this disk 2 bytes
132  number of the disk with the
133  start of the central directory 2 bytes
134  total number of entries in the
135  central directory on this disk 2 bytes
136  total number of entries in
137  the central directory 2 bytes
138  size of the central directory 4 bytes
139  offset of start of central
140  directory with respect to
141  the starting disk number 4 bytes
142  .ZIP file comment length 2 bytes
143  .ZIP file comment (variable size)
144 
145 *******************************************************************************/
146 
147 #define EOCDR_SIGNATURE 0x06054b50
148 #define EOCDR_ENTRIES 10
149 #define EOCDR_OFFSET 16
150 
151 struct eocdr {
154 };
155 
156 
157 /***
158  * Load zip file into memory
159  */
160 ZipFile *ZipFile::open(const char *path) {
161  int fd;
162  u1 lfh_signature[SIGNATURE_LENGTH];
163  off_t len;
164  u1 *p;
165  eocdr eocdr;
166  cdsfh cdsfh;
167 
168  // first of all, open the file
169 
170  if ((fd = ::open(path, O_RDONLY)) == -1)
171  return NULL;
172 
173  // check for signature in first local file header
174 
175  if (read(fd, lfh_signature, SIGNATURE_LENGTH) != SIGNATURE_LENGTH)
176  return NULL;
177 
178  if (read_u4_le(lfh_signature) != LFH_SIGNATURE)
179  return NULL;
180 
181  // get the file length
182 
183  if ((len = lseek(fd, 0, SEEK_END)) == -1)
184  return NULL;
185 
186  // we better mmap the file
187 
188  u1 *filep = (u1*) mmap(0, len, PROT_READ, MAP_PRIVATE, fd, 0);
189 
190  // some older compilers, like DEC OSF cc, don't like comparisons
191  // on void* type
192 
193  if ((ptrint) filep == (ptrint) MAP_FAILED)
194  return NULL;
195 
196  // find end of central directory record
197 
198  for (p = filep + len; p >= filep; p--)
199  if (read_u4_le(p) == EOCDR_SIGNATURE)
200  break;
201 
202  // get number of entries in central directory
203 
204  eocdr.entries = read_u2_le(p + EOCDR_ENTRIES);
205  eocdr.offset = read_u4_le(p + EOCDR_OFFSET);
206 
207  // create hashtable for filenames
208 
210 
211  // add all file entries into the hashtable
212 
213  p = filep + eocdr.offset;
214 
215  for (s4 i = 0; i < eocdr.entries; i++) {
216  // check file header signature
217 
218  if (read_u4_le(p) != CDSFH_SIGNATURE)
219  return NULL;
220 
221  // we found an entry
222 
230 
231  // create utf8 string of filename, strip .class from classes
232 
233  const char *filename = (const char *) (p + CDSFH_FILENAME);
234  const char *classext = filename + cdsfh.filenamelength - strlen(".class");
235 
236  // skip directory entries
237 
238  if (filename[cdsfh.filenamelength - 1] != '/') {
239  Utf8String u;
240 
241  if (strncmp(classext, ".class", strlen(".class")) == 0)
242  u = Utf8String::from_utf8(filename, cdsfh.filenamelength - strlen(".class"));
243  else
244  u = Utf8String::from_utf8(filename, cdsfh.filenamelength);
245 
246  // create zip entry
247 
248  ZipFileEntry entry;
249 
250  entry.filename = u;
251  entry.compressionmethod = cdsfh.compressionmethod;
252  entry.compressedsize = cdsfh.compressedsize;
253  entry.uncompressedsize = cdsfh.uncompressedsize;
254  entry.data = filep + cdsfh.relativeoffset;
255 
256  // insert into hashtable
257 
258  file->table.insert(entry);
259  }
260 
261  // move to next central directory structure file header
262 
263  p = p
265  + cdsfh.filenamelength
266  + cdsfh.extrafieldlength
267  + cdsfh.filecommentlength;
268  }
269 
270  // return pointer to hashtable
271 
272  return file;
273 }
274 
275 
276 /***
277  * Load file from zip archive into memory
278  */
279 void ZipFileEntry::get(uint8_t *dst) const {
280  lfh lfh;
281  z_stream zs;
282  int err;
283 
284  // read stuff from local file header
285 
288 
289  u1 *indata = data
291  + lfh.filenamelength
292  + lfh.extrafieldlength;
293 
294  // how is the file stored?
295 
296  switch (compressionmethod) {
297  case Z_DEFLATED:
298  // fill z_stream structure
299 
300  zs.next_in = indata;
301  zs.avail_in = compressedsize;
302  zs.next_out = dst;
303  zs.avail_out = uncompressedsize;
304 
305  zs.zalloc = Z_NULL;
306  zs.zfree = Z_NULL;
307  zs.opaque = Z_NULL;
308 
309  // initialize this inflate run
310 
311  if (inflateInit2(&zs, -MAX_WBITS) != Z_OK)
312  vm_abort("zip_get: inflateInit2 failed: %s", strerror(errno));
313 
314  // decompress the file into buffer
315 
316  err = inflate(&zs, Z_SYNC_FLUSH);
317 
318  if ((err != Z_STREAM_END) && (err != Z_OK))
319  vm_abort("zip_get: inflate failed: %s", strerror(errno));
320 
321  // finish this inflate run
322 
323  if (inflateEnd(&zs) != Z_OK)
324  vm_abort("zip_get: inflateEnd failed: %s", strerror(errno));
325  break;
326 
327  case 0:
328  // uncompressed file, just copy the data
329  MCOPY(dst, indata, u1, compressedsize);
330  break;
331 
332  default:
333  vm_abort("zip_get: unknown compression method %d", compressionmethod);
334  break;
335  }
336 }
337 
338 /*
339  * These are local overrides for various environment variables in Emacs.
340  * Please do not remove this and leave it at the end of the file, where
341  * Emacs will automagically detect them.
342  * ---------------------------------------------------------------------
343  * Local variables:
344  * mode: c++
345  * indent-tabs-mode: t
346  * c-basic-offset: 4
347  * tab-width: 4
348  * End:
349  * vim:noexpandtab:sw=4:ts=4:
350  */
u2 filecommentlength
Definition: zip.cpp:123
#define CDSFH_RELATIVE_OFFSET
Definition: zip.cpp:114
u2 extrafieldlength
Definition: zip.hpp:72
#define SIGNATURE_LENGTH
Definition: zip.cpp:62
u2 filenamelength
Definition: zip.cpp:121
u4 uncompressedsize
Definition: zip.hpp:81
u2 compressionmethod
Definition: zip.cpp:118
#define CDSFH_EXTRA_FIELD_LENGTH
Definition: zip.cpp:112
u4 compressedsize
Definition: zip.cpp:119
u2 entries
Definition: zip.cpp:152
Entry & insert(const T &t)
Definition: hashtable.hpp:273
Utf8String filename
Definition: zip.hpp:78
uint8_t u1
Definition: types.hpp:40
Definition: zip.hpp:98
Table table
Definition: zip.hpp:116
#define CDSFH_UNCOMPRESSED_SIZE
Definition: zip.cpp:110
#define CDSFH_HEADER_SIZE
Definition: zip.cpp:105
#define CDSFH_SIGNATURE
Definition: zip.cpp:107
void vm_abort(const char *text,...)
Definition: vm.cpp:2586
#define LFH_SIGNATURE
Definition: zip.hpp:63
u4 uncompressedsize
Definition: zip.cpp:120
#define CDSFH_COMPRESSION_METHOD
Definition: zip.cpp:108
#define LFH_FILE_NAME_LENGTH
Definition: zip.hpp:64
void get(uint8_t *dst) const
Definition: zip.cpp:279
u1 * data
Definition: zip.hpp:82
uint16_t u2
Definition: types.hpp:43
u2 compressionmethod
Definition: zip.hpp:79
#define HASHTABLE_CLASSES_SIZE
Definition: zip.cpp:53
#define EOCDR_SIGNATURE
Definition: zip.cpp:147
static Utf8String from_utf8(const char *, size_t)
Definition: utf8.cpp:335
u4 offset
Definition: zip.cpp:153
MIIterator i
u4 compressedsize
Definition: zip.hpp:80
int32_t s4
Definition: types.hpp:45
Definition: zip.hpp:67
#define LFH_HEADER_SIZE
Definition: zip.hpp:61
OStream & err()
Definition: OStream.cpp:33
#define CDSFH_FILE_NAME_LENGTH
Definition: zip.cpp:111
uint32_t u4
Definition: types.hpp:46
Definition: zip.cpp:117
static uint16_t read_u2_le(const uint8_t *src)
Definition: endianess.hpp:51
#define EOCDR_OFFSET
Definition: zip.cpp:149
u2 filenamelength
Definition: zip.hpp:71
Definition: zip.cpp:151
static ZipFile * open(const char *path)
Load zip archive.
Definition: zip.cpp:160
#define MCOPY(dest, src, type, num)
Definition: memory.hpp:103
#define EOCDR_ENTRIES
Definition: zip.cpp:148
uintptr_t ptrint
Definition: types.hpp:54
#define LFH_EXTRA_FIELD_LENGTH
Definition: zip.hpp:65
u4 relativeoffset
Definition: zip.cpp:124
static uint32_t read_u4_le(const uint8_t *src)
Definition: endianess.hpp:56
#define CDSFH_FILENAME
Definition: zip.cpp:115
u2 extrafieldlength
Definition: zip.cpp:122
#define CDSFH_FILE_COMMENT_LENGTH
Definition: zip.cpp:113
#define CDSFH_COMPRESSED_SIZE
Definition: zip.cpp:109