libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit b4ef30267b385b51949df757c5ae5a85d764cba3
parent d491e2066f59c74df2b3b8ac4d5450210826618f
Author: Christian Grothoff <christian@grothoff.org>
Date:   Tue, 14 Aug 2012 16:53:21 +0000

implemented zip, sid, nsf, nsfe ad odf extractors

Diffstat:
Msrc/common/unzip.c | 977+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Msrc/common/unzip.h | 20++++++++++++++++----
Msrc/plugins/Makefile.am | 71++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/plugins/deb_extractor.c | 466+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Msrc/plugins/nsf_extractor.c | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
Msrc/plugins/nsfe_extractor.c | 330++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Msrc/plugins/odf_extractor.c | 85++++++++++++++++++++++++++++---------------------------------------------------
Msrc/plugins/sid_extractor.c | 198+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
Asrc/plugins/test_deb.c | 150+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/test_odf.c | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/test_zip.c | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/testdata/deb_bzip2.deb | 0
Asrc/plugins/testdata/odf_cg.odt | 0
Asrc/plugins/testdata/zip_test.zip | 0
Msrc/plugins/zip_extractor.c | 469++++++++++++++++---------------------------------------------------------------
15 files changed, 1881 insertions(+), 1253 deletions(-)

diff --git a/src/common/unzip.c b/src/common/unzip.c @@ -25,6 +25,39 @@ * This code is based in part on * unzip 1.00 Copyright 1998-2003 Gilles Vollant * http://www.winimage.com/zLibDll" + * + * + * The filenames for each file in a zipfile are stored in two locations. + * There is one at the start of each entry, just before the compressed data, + * and another at the end in a 'central directory structure'. + * + * In order to catch self-extracting executables, we scan backwards from the end + * of the file looking for the central directory structure. The previous version + * of this went forewards through the local headers, but that only works for plain + * vanilla zip's and I don't feel like writing a special case for each of the dozen + * self-extracting executable stubs. + * + * This assumes that the zip file is considered to be non-corrupt/non-truncated. + * If it is truncated then it's not considered to be a zip and skipped. + * + * ZIP format description from appnote.iz and appnote.txt (more or less): + * + * (this is why you always need to put in the last floppy if you span disks) + * + * 0- 3 end of central dir signature 4 bytes (0x06054b50) P K ^E ^F + * 4- 5 number of this disk 2 bytes + * 6- 7 number of the disk with the + * start of the central directory 2 bytes + * 8- 9 total number of entries in + * the central dir on this disk 2 bytes + * 10-11 total number of entries in + * the central dir 2 bytes + * 12-15 size of the central directory 4 bytes + * 16-19 offset of start of central + * directory with respect to + * the starting disk number 4 bytes + * 20-21 zipfile comment length 2 bytes + * 22-?? zipfile comment (variable size) max length 65536 bytes */ #include "platform.h" #include <ctype.h> @@ -49,27 +82,27 @@ /** * IO callbacks for access to the ZIP data. */ -struct EXTRACTOR_UnzipFileFuncDefs +struct FileFuncDefs { /** * Callback for reading 'size' bytes from the ZIP archive into buf. */ - uLong ( *zread_file) (voidpf opaque, void* buf, uLong size); + uLong (*zread_file) (voidpf opaque, void* buf, uLong size); /** * Callback to obtain the current read offset in the ZIP archive. */ - long ( *ztell_file) (voidpf opaque); + long (*ztell_file) (voidpf opaque); /** * Callback for seeking to a different position in the ZIP archive. */ - long ( *zseek_file) (voidpf opaque, uLong offset, int origin); + long (*zseek_file) (voidpf opaque, uLong offset, int origin); /** * Opaque argument to pass to all IO functions. */ - voidpf opaque; + voidpf opaque; }; @@ -119,6 +152,11 @@ struct GlobalInfo * size of the global comment of the zipfile */ uLong size_comment; + + /** + * offset of the global comment in the zipfile + */ + uLong offset_comment; }; @@ -198,9 +236,9 @@ struct FileInZipReadInfo uLong rest_read_uncompressed; /** - * IO functions. (FIXME: where is this assigned?) + * IO functions. */ - struct EXTRACTOR_UnzipFileFuncDefs z_filefunc; + struct FileFuncDefs z_filefunc; /** * compression method (0==store) @@ -223,7 +261,7 @@ struct EXTRACTOR_UnzipFile /** * io structore of the zipfile */ - struct EXTRACTOR_UnzipFileFuncDefs z_filefunc; + struct FileFuncDefs z_filefunc; /** * public global information @@ -279,7 +317,7 @@ struct EXTRACTOR_UnzipFile /** * structure about the current file if we are decompressing it */ - struct FileInZipReadInfo* pfile_in_zip_read; + struct FileInZipReadInfo *pfile_in_zip_read; /** * Is the file encrypted? @@ -292,33 +330,46 @@ struct EXTRACTOR_UnzipFile * Read a byte from a gz_stream; update next_in and avail_in. Return EOF * for end of file. * IN assertion: the stream s has been sucessfully opened for reading. + * + * @param ffd functions for performing IO operations + * @param pi where to store the byte that was read + * @return EXTRACTOR_UNZIP_OK on success, or EXTRACTOR_UNZIP_EOF */ static int -unzlocal_getByte (const struct EXTRACTOR_UnzipFileFuncDefs* pzlib_filefunc_def, - int *pi) +read_byte_from_ffd (const struct FileFuncDefs *ffd, + int *pi) { unsigned char c; - if (1 != ZREAD (*pzlib_filefunc_def, &c, 1)) + if (1 != ZREAD (*ffd, &c, 1)) return EXTRACTOR_UNZIP_EOF; *pi = (int) c; return EXTRACTOR_UNZIP_OK; } +/** + * Read a short (2 bytes) from a gz_stream; update next_in and avail_in. Return EOF + * for end of file. + * IN assertion: the stream s has been sucessfully opened for reading. + * + * @param ffd functions for performing IO operations + * @param pi where to store the short that was read + * @return EXTRACTOR_UNZIP_OK on success, or EXTRACTOR_UNZIP_EOF + */ static int -unzlocal_getShort (const struct EXTRACTOR_UnzipFileFuncDefs* pzlib_filefunc_def, - uLong *pX) +read_short_from_ffd (const struct FileFuncDefs *ffd, + uLong *pX) { uLong x; int i; int err; *pX = 0; - if (EXTRACTOR_UNZIP_OK != (err = unzlocal_getByte (pzlib_filefunc_def, &i))) + if (EXTRACTOR_UNZIP_OK != (err = read_byte_from_ffd (ffd, &i))) return err; x = (uLong) i; - if (EXTRACTOR_UNZIP_OK != (err = unzlocal_getByte (pzlib_filefunc_def, &i))) + if (EXTRACTOR_UNZIP_OK != (err = read_byte_from_ffd (ffd, &i))) return err; x += ((uLong) i) << 8; *pX = x; @@ -326,25 +377,34 @@ unzlocal_getShort (const struct EXTRACTOR_UnzipFileFuncDefs* pzlib_filefunc_def, } +/** + * Read a 'long' (4 bytes) from a gz_stream; update next_in and avail_in. Return EOF + * for end of file. + * IN assertion: the stream s has been sucessfully opened for reading. + * + * @param ffd functions for performing IO operations + * @param pi where to store the long that was read + * @return EXTRACTOR_UNZIP_OK on success, or EXTRACTOR_UNZIP_EOF + */ static int -unzlocal_getLong (const struct EXTRACTOR_UnzipFileFuncDefs* pzlib_filefunc_def, - uLong *pX) +read_long_from_ffd (const struct FileFuncDefs *ffd, + uLong *pX) { uLong x; int i; int err; *pX = 0; - if (EXTRACTOR_UNZIP_OK != (err = unzlocal_getByte (pzlib_filefunc_def, &i))) + if (EXTRACTOR_UNZIP_OK != (err = read_byte_from_ffd (ffd, &i))) return err; x = (uLong) i; - if (EXTRACTOR_UNZIP_OK != (err = unzlocal_getByte (pzlib_filefunc_def, &i))) + if (EXTRACTOR_UNZIP_OK != (err = read_byte_from_ffd (ffd, &i))) return err; x += ((uLong) i) << 8; - if (EXTRACTOR_UNZIP_OK != (err = unzlocal_getByte (pzlib_filefunc_def, &i))) + if (EXTRACTOR_UNZIP_OK != (err = read_byte_from_ffd (ffd, &i))) return err; x += ((uLong) i) << 16; - if (EXTRACTOR_UNZIP_OK != (err = unzlocal_getByte (pzlib_filefunc_def, &i))) + if (EXTRACTOR_UNZIP_OK != (err = read_byte_from_ffd (ffd, &i))) return err; x += ((uLong) i) << 24; *pX = x; @@ -366,7 +426,7 @@ unzlocal_getLong (const struct EXTRACTOR_UnzipFileFuncDefs* pzlib_filefunc_def, /** - * Compare two filename (fileName1,fileName2). + * Compare two filenames (fileName1, fileName2). * * @param filename1 name of first file * @param filename2 name of second file @@ -394,27 +454,24 @@ EXTRACTOR_common_unzip_string_file_name_compare (const char* fileName1, /** + * Locate the central directory in the ZIP file. * + * @param ffd IO functions + * @return offset of central directory, 0 on error */ static uLong -unzlocal_SearchCentralDir (const struct EXTRACTOR_UnzipFileFuncDefs* pzlib_filefunc_def) +locate_central_directory (const struct FileFuncDefs *ffd) { - unsigned char *buf; + unsigned char buf[BUFREADCOMMENT + 4]; uLong uSizeFile; uLong uBackRead; uLong uMaxBack = 0xffff; /* maximum size of global comment */ - uLong uPosFound = 0; - if (0 != ZSEEK (*pzlib_filefunc_def, 0, SEEK_END)) + if (0 != ZSEEK (*ffd, 0, SEEK_END)) return 0; - uSizeFile = ZTELL (*pzlib_filefunc_def); - + uSizeFile = ZTELL (*ffd); if (uMaxBack > uSizeFile) uMaxBack = uSizeFile; - - if (NULL == (buf = malloc(BUFREADCOMMENT + 4))) - return 0; - uBackRead = 4; while (uBackRead < uMaxBack) { @@ -427,156 +484,153 @@ unzlocal_SearchCentralDir (const struct EXTRACTOR_UnzipFileFuncDefs* pzlib_filef else uBackRead += BUFREADCOMMENT; uReadPos = uSizeFile - uBackRead; - uReadSize = ((BUFREADCOMMENT + 4) < (uSizeFile - uReadPos)) ? (BUFREADCOMMENT + 4) : (uSizeFile - uReadPos); - if (0 != ZSEEK (*pzlib_filefunc_def, uReadPos, SEEK_SET)) + if (0 != ZSEEK (*ffd, uReadPos, SEEK_SET)) break; - - if (ZREAD (*pzlib_filefunc_def, buf, uReadSize) != uReadSize) + if (ZREAD (*ffd, buf, uReadSize) != uReadSize) break; - i = (int) uReadSize - 3; while (i-- > 0) if ( (0x50 == (*(buf+i))) && (0x4b == (*(buf+i+1))) && (0x05 == (*(buf+i+2))) && (0x06 == (*(buf+i+3))) ) - { - uPosFound = uReadPos + i; - break; - } - if (0 != uPosFound) - break; + return uReadPos + i; } - free (buf); - return uPosFound; + return 0; } /** * Translate date/time from Dos format to struct * EXTRACTOR_UnzipDateTimeInfo (readable more easilty) + * + * @param ulDosDate time in DOS format (input) + * @param ptm where to write time in readable format */ static void -unzlocal_DosDateToTmuDate (uLong ulDosDate, +dos_date_to_tmu_date (uLong ulDosDate, struct EXTRACTOR_UnzipDateTimeInfo* ptm) { uLong uDate; uDate = (uLong) (ulDosDate >> 16); ptm->tm_mday = (uInt) (uDate & 0x1f); - ptm->tm_mon = (uInt) ((((uDate) & 0x1E0) / 0x20) - 1); + ptm->tm_mon = (uInt) ((((uDate) & 0x1E0) / 0x20) - 1); ptm->tm_year = (uInt) (((uDate & 0x0FE00) / 0x0200) + 1980); ptm->tm_hour = (uInt) ((ulDosDate & 0xF800) / 0x800); - ptm->tm_min = (uInt) ((ulDosDate & 0x7E0) / 0x20); - ptm->tm_sec = (uInt) (2 * (ulDosDate & 0x1f)); + ptm->tm_min = (uInt) ((ulDosDate & 0x7E0) / 0x20); + ptm->tm_sec = (uInt) (2 * (ulDosDate & 0x1f)); } +/** + * Write info about the ZipFile in the *pglobal_info structure. + * No preparation of the structure is needed. + * + * @param file zipfile to manipulate + * @param pfile_info file information to initialize + * @param pfile_info_internal internal file information to initialize + * @param szFileName where to write the name of the current file + * @param fileNameBufferSize number of bytes available in szFileName + * @param extraField where to write extra data + * @param extraFieldBufferSize number of bytes available in extraField + * @param szComment where to write the comment on the current file + * @param commentBufferSize number of bytes available in szComment + * @return EXTRACTOR_UNZIP_OK if there is no problem. + */ static int -unzlocal_GetCurrentFileInfoInternal (struct EXTRACTOR_UnzipFile *s, - struct EXTRACTOR_UnzipFileInfo *pfile_info, - struct UnzipFileInfoInternal *pfile_info_internal, - char *szFileName, - uLong fileNameBufferSize, - void *extraField, - uLong extraFieldBufferSize, - char *szComment, - uLong commentBufferSize) +get_current_file_info (struct EXTRACTOR_UnzipFile *file, + struct EXTRACTOR_UnzipFileInfo *pfile_info, + struct UnzipFileInfoInternal *pfile_info_internal, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize) { struct EXTRACTOR_UnzipFileInfo file_info; struct UnzipFileInfoInternal file_info_internal; - int err = EXTRACTOR_UNZIP_OK; uLong uMagic; long lSeek = 0; - if (s == NULL) + if (NULL == file) return EXTRACTOR_UNZIP_PARAMERROR; - if (0 != ZSEEK (s->z_filefunc, - s->pos_in_central_dir + s->byte_before_the_zipfile, + if (0 != ZSEEK (file->z_filefunc, + file->pos_in_central_dir + file->byte_before_the_zipfile, SEEK_SET)) - err = EXTRACTOR_UNZIP_ERRNO; + return EXTRACTOR_UNZIP_ERRNO; /* we check the magic */ - if (EXTRACTOR_UNZIP_OK == err) - { - if (unzlocal_getLong(&s->z_filefunc, &uMagic) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - else if (uMagic!=0x02014b50) - err=EXTRACTOR_UNZIP_BADZIPFILE; - } - if (unzlocal_getShort (&s->z_filefunc, &file_info.version) != EXTRACTOR_UNZIP_OK) - err = EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.version_needed) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.flag) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.compression_method) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getLong(&s->z_filefunc, &file_info.dosDate) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - unzlocal_DosDateToTmuDate(file_info.dosDate,&file_info.tmu_date); - - if (unzlocal_getLong(&s->z_filefunc, &file_info.crc) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getLong(&s->z_filefunc, &file_info.compressed_size) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getLong(&s->z_filefunc, &file_info.uncompressed_size) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.size_filename) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.size_file_extra) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.size_file_comment) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.disk_num_start) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getShort(&s->z_filefunc, &file_info.internal_fa) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if (unzlocal_getLong(&s->z_filefunc, &file_info.external_fa) != EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_long_from_ffd(&file->z_filefunc, &uMagic)) + return EXTRACTOR_UNZIP_ERRNO; + if (0x02014b50 != uMagic) + return EXTRACTOR_UNZIP_BADZIPFILE; - if (unzlocal_getLong (&s->z_filefunc, - &file_info_internal.offset_curfile) != EXTRACTOR_UNZIP_OK) - err = EXTRACTOR_UNZIP_ERRNO; + if ( (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&file->z_filefunc, &file_info.version)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&file->z_filefunc, &file_info.version_needed)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&file->z_filefunc, &file_info.flag)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&file->z_filefunc, &file_info.compression_method)) || + (EXTRACTOR_UNZIP_OK != + read_long_from_ffd (&file->z_filefunc, &file_info.dosDate)) ) + return EXTRACTOR_UNZIP_ERRNO; + dos_date_to_tmu_date (file_info.dosDate, + &file_info.tmu_date); + if ( (EXTRACTOR_UNZIP_OK != + read_long_from_ffd(&file->z_filefunc, &file_info.crc)) || + (EXTRACTOR_UNZIP_OK != + read_long_from_ffd(&file->z_filefunc, &file_info.compressed_size)) || + (EXTRACTOR_UNZIP_OK != + read_long_from_ffd(&file->z_filefunc, &file_info.uncompressed_size)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd(&file->z_filefunc, &file_info.size_filename)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd(&file->z_filefunc, &file_info.size_file_extra)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd(&file->z_filefunc, &file_info.size_file_comment)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd(&file->z_filefunc, &file_info.disk_num_start)) || + (EXTRACTOR_UNZIP_OK != + read_short_from_ffd(&file->z_filefunc, &file_info.internal_fa)) || + (EXTRACTOR_UNZIP_OK != + read_long_from_ffd(&file->z_filefunc, &file_info.external_fa)) || + (EXTRACTOR_UNZIP_OK != + read_long_from_ffd (&file->z_filefunc, + &file_info_internal.offset_curfile)) ) + return EXTRACTOR_UNZIP_ERRNO; lSeek += file_info.size_filename; - if ((err==EXTRACTOR_UNZIP_OK) && (szFileName!=NULL)) + if (NULL != szFileName) { uLong uSizeRead; + if (file_info.size_filename < fileNameBufferSize) { - *(szFileName+file_info.size_filename) = '\0'; + *(szFileName + file_info.size_filename) = '\0'; uSizeRead = file_info.size_filename; } else uSizeRead = fileNameBufferSize; - if ((file_info.size_filename > 0) && (fileNameBufferSize > 0)) - if (ZREAD(s->z_filefunc, szFileName, uSizeRead) != uSizeRead) - err = EXTRACTOR_UNZIP_ERRNO; + if ( (file_info.size_filename > 0) && + (fileNameBufferSize > 0) ) + if (ZREAD(file->z_filefunc, szFileName, uSizeRead) != uSizeRead) + return EXTRACTOR_UNZIP_ERRNO; lSeek -= uSizeRead; } - - if ((err==EXTRACTOR_UNZIP_OK) && (extraField!=NULL)) + if (NULL != extraField) { uLong uSizeRead; + if (file_info.size_file_extra<extraFieldBufferSize) uSizeRead = file_info.size_file_extra; else @@ -584,53 +638,55 @@ unzlocal_GetCurrentFileInfoInternal (struct EXTRACTOR_UnzipFile *s, if (0 != lSeek) { - if (0 == ZSEEK (s->z_filefunc, lSeek, SEEK_CUR)) + if (0 == ZSEEK (file->z_filefunc, lSeek, SEEK_CUR)) lSeek = 0; else - err = EXTRACTOR_UNZIP_ERRNO; + return EXTRACTOR_UNZIP_ERRNO; } - if ((file_info.size_file_extra>0) && (extraFieldBufferSize>0)) - if (ZREAD(s->z_filefunc, extraField,uSizeRead)!=uSizeRead) - err=EXTRACTOR_UNZIP_ERRNO; + if ( (file_info.size_file_extra > 0) && + (extraFieldBufferSize > 0) && + (ZREAD (file->z_filefunc, + extraField, + uSizeRead) != uSizeRead) ) + return EXTRACTOR_UNZIP_ERRNO; lSeek += file_info.size_file_extra - uSizeRead; } else - lSeek+=file_info.size_file_extra; - + lSeek += file_info.size_file_extra; - if ((err==EXTRACTOR_UNZIP_OK) && (szComment!=NULL)) + if (NULL != szComment) { - uLong uSizeRead ; - if (file_info.size_file_comment<commentBufferSize) + uLong uSizeRead; + + if (file_info.size_file_comment < commentBufferSize) { - *(szComment+file_info.size_file_comment)='\0'; + *(szComment+file_info.size_file_comment) = '\0'; uSizeRead = file_info.size_file_comment; } else uSizeRead = commentBufferSize; - - if (lSeek!=0) + + if (0 != lSeek) { - if (ZSEEK(s->z_filefunc, lSeek, SEEK_CUR)==0) - lSeek=0; + if (0 == ZSEEK (file->z_filefunc, lSeek, SEEK_CUR)) + lSeek = 0; else - err=EXTRACTOR_UNZIP_ERRNO; + return EXTRACTOR_UNZIP_ERRNO; } - if ((file_info.size_file_comment>0) && (commentBufferSize>0)) - if (ZREAD(s->z_filefunc, szComment,uSizeRead)!=uSizeRead) - err=EXTRACTOR_UNZIP_ERRNO; - lSeek+=file_info.size_file_comment - uSizeRead; + if ( (file_info.size_file_comment > 0) && + (commentBufferSize > 0) && + (ZREAD (file->z_filefunc, szComment, uSizeRead) != uSizeRead) ) + return EXTRACTOR_UNZIP_ERRNO; + lSeek += file_info.size_file_comment - uSizeRead; } else - lSeek+=file_info.size_file_comment; + lSeek += file_info.size_file_comment; - if ((err==EXTRACTOR_UNZIP_OK) && (pfile_info!=NULL)) - *pfile_info=file_info; - - if ((err==EXTRACTOR_UNZIP_OK) && (pfile_info_internal!=NULL)) - *pfile_info_internal=file_info_internal; - - return err; + if (NULL != pfile_info) + *pfile_info = file_info; + if (NULL != pfile_info_internal) + *pfile_info_internal = file_info_internal; + return EXTRACTOR_UNZIP_OK; } @@ -649,11 +705,11 @@ EXTRACTOR_common_unzip_go_to_first_file (struct EXTRACTOR_UnzipFile *file) return EXTRACTOR_UNZIP_PARAMERROR; file->pos_in_central_dir = file->offset_central_dir; file->num_file = 0; - err = unzlocal_GetCurrentFileInfoInternal (file, - &file->cur_file_info, - &file->cur_file_info_internal, - NULL, 0, NULL, 0, NULL, 0); - file->current_file_ok = (err == EXTRACTOR_UNZIP_OK); + err = get_current_file_info (file, + &file->cur_file_info, + &file->cur_file_info_internal, + NULL, 0, NULL, 0, NULL, 0); + file->current_file_ok = (EXTRACTOR_UNZIP_OK == err); return err; } @@ -661,14 +717,14 @@ EXTRACTOR_common_unzip_go_to_first_file (struct EXTRACTOR_UnzipFile *file) /** * Open a Zip file. * - * @param pzlib_filefunc_def IO functions + * @param ffd IO functions * @return NULL on error */ static struct EXTRACTOR_UnzipFile * -EXTRACTOR_common_unzip_open2 (struct EXTRACTOR_UnzipFileFuncDefs *pzlib_filefunc_def) +unzip_open_using_ffd (struct FileFuncDefs *ffd) { struct EXTRACTOR_UnzipFile us; - struct EXTRACTOR_UnzipFile *s; + struct EXTRACTOR_UnzipFile *file; uLong central_pos; uLong uL; uLong number_disk; /* number of the current dist, used for @@ -678,78 +734,76 @@ EXTRACTOR_common_unzip_open2 (struct EXTRACTOR_UnzipFileFuncDefs *pzlib_filefunc uLong number_entry_CD; /* total number of entries in the central dir (same than number_entry on nospan) */ - - int err = EXTRACTOR_UNZIP_OK; memset (&us, 0, sizeof(us)); - us.z_filefunc = *pzlib_filefunc_def; - - central_pos = unzlocal_SearchCentralDir (&us.z_filefunc); - if (central_pos==0) - err=EXTRACTOR_UNZIP_ERRNO; - - if (ZSEEK(us.z_filefunc, - central_pos, SEEK_SET)!=0) - err=EXTRACTOR_UNZIP_ERRNO; + us.z_filefunc = *ffd; + central_pos = locate_central_directory (&us.z_filefunc); + if (0 == central_pos) + return NULL; + if (0 != ZSEEK (us.z_filefunc, + central_pos, SEEK_SET)) + return NULL; /* the signature, already checked */ - if (unzlocal_getLong(&us.z_filefunc, &uL)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_long_from_ffd (&us.z_filefunc, &uL)) + return NULL; /* number of this disk */ - if (unzlocal_getShort(&us.z_filefunc, &number_disk)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&us.z_filefunc, &number_disk)) + return NULL; /* number of the disk with the start of the central directory */ - if (unzlocal_getShort(&us.z_filefunc, &number_disk_with_CD)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&us.z_filefunc, &number_disk_with_CD)) + return NULL; /* total number of entries in the central dir on this disk */ - if (unzlocal_getShort(&us.z_filefunc, &us.gi.number_entry)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&us.z_filefunc, &us.gi.number_entry)) + return NULL; /* total number of entries in the central dir */ - if (unzlocal_getShort(&us.z_filefunc, &number_entry_CD)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&us.z_filefunc, &number_entry_CD)) + return NULL; - if ((number_entry_CD!=us.gi.number_entry) || - (number_disk_with_CD!=0) || - (number_disk!=0)) - err=EXTRACTOR_UNZIP_BADZIPFILE; + if ( (number_entry_CD != us.gi.number_entry) || + (0 != number_disk_with_CD) || + (0 != number_disk) ) + return NULL; /* size of the central directory */ - if (unzlocal_getLong(&us.z_filefunc, &us.size_central_dir)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_long_from_ffd (&us.z_filefunc, &us.size_central_dir)) + return NULL; /* offset of start of central directory with respect to the starting disk number */ - if (unzlocal_getLong(&us.z_filefunc, &us.offset_central_dir)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != + read_long_from_ffd (&us.z_filefunc, &us.offset_central_dir)) + return NULL; /* zipfile comment length */ - if (unzlocal_getShort(&us.z_filefunc, &us.gi.size_comment)!=EXTRACTOR_UNZIP_OK) - err=EXTRACTOR_UNZIP_ERRNO; - - if ((central_pos<us.offset_central_dir+us.size_central_dir) && - (err==EXTRACTOR_UNZIP_OK)) - err=EXTRACTOR_UNZIP_BADZIPFILE; + if (EXTRACTOR_UNZIP_OK != + read_short_from_ffd (&us.z_filefunc, &us.gi.size_comment)) + return NULL; + us.gi.offset_comment = ZTELL (us.z_filefunc); + if ((central_pos < us.offset_central_dir + us.size_central_dir)) + return NULL; - if (err!=EXTRACTOR_UNZIP_OK) - { - return NULL; - } - us.byte_before_the_zipfile = central_pos - - (us.offset_central_dir+us.size_central_dir); + (us.offset_central_dir + us.size_central_dir); us.central_pos = central_pos; us.pfile_in_zip_read = NULL; us.encrypted = 0; - if (NULL == (s = malloc (sizeof(struct EXTRACTOR_UnzipFile)))) + if (NULL == (file = malloc (sizeof(struct EXTRACTOR_UnzipFile)))) return NULL; - *s=us; - EXTRACTOR_common_unzip_go_to_first_file(s); - return s; + *file = us; + EXTRACTOR_common_unzip_go_to_first_file (file); + return file; } @@ -759,48 +813,32 @@ EXTRACTOR_common_unzip_open2 (struct EXTRACTOR_UnzipFileFuncDefs *pzlib_filefunc * @return EXTRACTOR_UNZIP_CRCERROR if all the file was read but the CRC is not good */ int -EXTRACTOR_common_unzip_close_current_file (struct EXTRACTOR_UnzipFile * file) +EXTRACTOR_common_unzip_close_current_file (struct EXTRACTOR_UnzipFile *file) { - int err=EXTRACTOR_UNZIP_OK; - - struct EXTRACTOR_UnzipFile* s; - struct FileInZipReadInfo* pfile_in_zip_read_info; - if (file==NULL) - return EXTRACTOR_UNZIP_PARAMERROR; - s=(struct EXTRACTOR_UnzipFile*)file; - pfile_in_zip_read_info=s->pfile_in_zip_read; - - if (pfile_in_zip_read_info==NULL) - return EXTRACTOR_UNZIP_PARAMERROR; - - - if (pfile_in_zip_read_info->rest_read_uncompressed == 0) - { - if (pfile_in_zip_read_info->crc32 != pfile_in_zip_read_info->crc32_wait) - err=EXTRACTOR_UNZIP_CRCERROR; - } - - - if (NULL != pfile_in_zip_read_info->read_buffer) - free(pfile_in_zip_read_info->read_buffer); - pfile_in_zip_read_info->read_buffer = NULL; - if (pfile_in_zip_read_info->stream_initialised) - inflateEnd(&pfile_in_zip_read_info->stream); - - pfile_in_zip_read_info->stream_initialised = 0; - free(pfile_in_zip_read_info); - - s->pfile_in_zip_read=NULL; - - return err; + struct FileInZipReadInfo* pfile_in_zip_read_info; + int err = EXTRACTOR_UNZIP_OK; + + if (NULL == file) + return EXTRACTOR_UNZIP_PARAMERROR; + if (NULL == (pfile_in_zip_read_info = file->pfile_in_zip_read)) + return EXTRACTOR_UNZIP_PARAMERROR; + if ( (0 == pfile_in_zip_read_info->rest_read_uncompressed) && + (pfile_in_zip_read_info->crc32 != pfile_in_zip_read_info->crc32_wait) ) + err = EXTRACTOR_UNZIP_CRCERROR; + if (NULL != pfile_in_zip_read_info->read_buffer) + free (pfile_in_zip_read_info->read_buffer); + pfile_in_zip_read_info->read_buffer = NULL; + if (pfile_in_zip_read_info->stream_initialised) + inflateEnd (&pfile_in_zip_read_info->stream); + pfile_in_zip_read_info->stream_initialised = 0; + free (pfile_in_zip_read_info); + file->pfile_in_zip_read = NULL; + return err; } /** - * Close a ZipFile. If there is files inside the .Zip opened with - * EXTRACTOR_common_unzip_open_current_file, these files MUST be - * closed with EXTRACTOR_common_unzip_close_current_file before - * calling EXTRACTOR_common_unzip_close. + * Close a ZipFile. * * @param file zip file to close * @return EXTRACTOR_UNZIP_OK if there is no problem. @@ -808,17 +846,40 @@ EXTRACTOR_common_unzip_close_current_file (struct EXTRACTOR_UnzipFile * file) int EXTRACTOR_common_unzip_close (struct EXTRACTOR_UnzipFile *file) { - struct EXTRACTOR_UnzipFile* s; - - if (file==NULL) + if (NULL == file) return EXTRACTOR_UNZIP_PARAMERROR; - s=(struct EXTRACTOR_UnzipFile*)file; - - if (s->pfile_in_zip_read!=NULL) - EXTRACTOR_common_unzip_close_current_file(file); + if (NULL != file->pfile_in_zip_read) + EXTRACTOR_common_unzip_close_current_file (file); + free (file); + return EXTRACTOR_UNZIP_OK; +} + - free(s); - return EXTRACTOR_UNZIP_OK; +/** + * Obtain the global comment from a ZIP file. + * + * @param file unzip file to inspect + * @param comment where to copy the comment + * @param comment_len maximum number of bytes available in comment + * @return EXTRACTOR_UNZIP_OK on success + */ +int +EXTRACTOR_common_unzip_get_global_comment (struct EXTRACTOR_UnzipFile *file, + char *comment, + size_t comment_len) +{ + if (NULL == file) + return EXTRACTOR_UNZIP_PARAMERROR; + if (comment_len > file->gi.size_comment) + comment_len = file->gi.size_comment + 1; + if (0 != + ZSEEK (file->z_filefunc, file->gi.offset_comment, SEEK_SET)) + return EXTRACTOR_UNZIP_ERRNO; + if (comment_len - 1 != + ZREAD (file->z_filefunc, comment, comment_len - 1)) + return EXTRACTOR_UNZIP_ERRNO; + comment[comment_len - 1] = '\0'; + return EXTRACTOR_UNZIP_OK; } @@ -827,6 +888,7 @@ EXTRACTOR_common_unzip_close (struct EXTRACTOR_UnzipFile *file) * No preparation of the structure is needed. * * @param file zipfile to manipulate + * @param pfile_info file information to initialize * @param szFileName where to write the name of the current file * @param fileNameBufferSize number of bytes available in szFileName * @param extraField where to write extra data @@ -845,10 +907,10 @@ EXTRACTOR_common_unzip_get_current_file_info (struct EXTRACTOR_UnzipFile * file, char *szComment, uLong commentBufferSize) { - return unzlocal_GetCurrentFileInfoInternal(file,pfile_info,NULL, - szFileName,fileNameBufferSize, - extraField,extraFieldBufferSize, - szComment,commentBufferSize); + return get_current_file_info (file, pfile_info, NULL, + szFileName, fileNameBufferSize, + extraField, extraFieldBufferSize, + szComment, commentBufferSize); } @@ -860,27 +922,25 @@ EXTRACTOR_common_unzip_get_current_file_info (struct EXTRACTOR_UnzipFile * file, * EXTRACTOR_UNZIP_END_OF_LIST_OF_FILE if the actual file was the latest. */ int -EXTRACTOR_common_unzip_go_to_next_file (struct EXTRACTOR_UnzipFile * file) +EXTRACTOR_common_unzip_go_to_next_file (struct EXTRACTOR_UnzipFile *file) { - struct EXTRACTOR_UnzipFile* s; - int err; - - if (file==NULL) - return EXTRACTOR_UNZIP_PARAMERROR; - s=(struct EXTRACTOR_UnzipFile*)file; - if (!s->current_file_ok) - return EXTRACTOR_UNZIP_END_OF_LIST_OF_FILE; - if (s->num_file+1==s->gi.number_entry) - return EXTRACTOR_UNZIP_END_OF_LIST_OF_FILE; - - s->pos_in_central_dir += SIZECENTRALDIRITEM + s->cur_file_info.size_filename + - s->cur_file_info.size_file_extra + s->cur_file_info.size_file_comment ; - s->num_file++; - err = unzlocal_GetCurrentFileInfoInternal(file,&s->cur_file_info, - &s->cur_file_info_internal, - NULL,0,NULL,0,NULL,0); - s->current_file_ok = (err == EXTRACTOR_UNZIP_OK); - return err; + int err; + + if (NULL == file) + return EXTRACTOR_UNZIP_PARAMERROR; + if (! file->current_file_ok) + return EXTRACTOR_UNZIP_END_OF_LIST_OF_FILE; + if (file->num_file + 1 == file->gi.number_entry) + return EXTRACTOR_UNZIP_END_OF_LIST_OF_FILE; + file->pos_in_central_dir += SIZECENTRALDIRITEM + file->cur_file_info.size_filename + + file->cur_file_info.size_file_extra + file->cur_file_info.size_file_comment; + file->num_file++; + err = get_current_file_info (file, + &file->cur_file_info, + &file->cur_file_info_internal, + NULL, 0, NULL, 0, NULL, 0); + file->current_file_ok = (EXTRACTOR_UNZIP_OK == err); + return err; } @@ -925,20 +985,20 @@ EXTRACTOR_common_unzip_go_find_local_file (struct EXTRACTOR_UnzipFile *file, while (EXTRACTOR_UNZIP_OK == err) { - char szCurrentFileName[UNZ_MAXFILENAMEINZIP+1]; + char szCurrentFileName[UNZ_MAXFILENAMEINZIP + 1]; - err = EXTRACTOR_common_unzip_get_current_file_info (file, NULL, - szCurrentFileName, sizeof (szCurrentFileName) - 1, - NULL, 0, NULL, 0); - if (EXTRACTOR_UNZIP_OK == err) - { - if (0 == - EXTRACTOR_common_unzip_string_file_name_compare (szCurrentFileName, - szFileName, - iCaseSensitivity)) - return EXTRACTOR_UNZIP_OK; - err = EXTRACTOR_common_unzip_go_to_next_file (file); - } + if (EXTRACTOR_UNZIP_OK != + (err = EXTRACTOR_common_unzip_get_current_file_info (file, NULL, + szCurrentFileName, + sizeof (szCurrentFileName) - 1, + NULL, 0, NULL, 0))) + break; + if (0 == + EXTRACTOR_common_unzip_string_file_name_compare (szCurrentFileName, + szFileName, + iCaseSensitivity)) + return EXTRACTOR_UNZIP_OK; + err = EXTRACTOR_common_unzip_go_to_next_file (file); } /* We failed, so restore the state of the 'current file' to where we @@ -969,7 +1029,7 @@ EXTRACTOR_common_unzip_read_current_file (struct EXTRACTOR_UnzipFile *file, { int err = EXTRACTOR_UNZIP_OK; uInt iRead = 0; - struct FileInZipReadInfo* pfile_in_zip_read_info; + struct FileInZipReadInfo *pfile_in_zip_read_info; if (NULL == file) return EXTRACTOR_UNZIP_PARAMERROR; @@ -980,7 +1040,7 @@ EXTRACTOR_common_unzip_read_current_file (struct EXTRACTOR_UnzipFile *file, if (0 == len) return 0; - pfile_in_zip_read_info->stream.next_out = (Bytef*) buf; + pfile_in_zip_read_info->stream.next_out = (Bytef *) buf; pfile_in_zip_read_info->stream.avail_out = (uInt) len; if (len > pfile_in_zip_read_info->rest_read_uncompressed) pfile_in_zip_read_info->stream.avail_out = @@ -989,14 +1049,14 @@ EXTRACTOR_common_unzip_read_current_file (struct EXTRACTOR_UnzipFile *file, while (pfile_in_zip_read_info->stream.avail_out > 0) { if ( (0 == pfile_in_zip_read_info->stream.avail_in) && - (pfile_in_zip_read_info->rest_read_compressed>0) ) + (pfile_in_zip_read_info->rest_read_compressed > 0) ) { uInt uReadThis = UNZ_BUFSIZE; if (pfile_in_zip_read_info->rest_read_compressed<uReadThis) uReadThis = (uInt) pfile_in_zip_read_info->rest_read_compressed; if (0 == uReadThis) return EXTRACTOR_UNZIP_EOF; - if (0 != + if (0 != ZSEEK (pfile_in_zip_read_info->z_filefunc, pfile_in_zip_read_info->pos_in_zipfile + pfile_in_zip_read_info->byte_before_the_zipfile, @@ -1008,78 +1068,75 @@ EXTRACTOR_common_unzip_read_current_file (struct EXTRACTOR_UnzipFile *file, return EXTRACTOR_UNZIP_ERRNO; pfile_in_zip_read_info->pos_in_zipfile += uReadThis; - pfile_in_zip_read_info->rest_read_compressed-=uReadThis; + pfile_in_zip_read_info->rest_read_compressed -= uReadThis; pfile_in_zip_read_info->stream.next_in = - (Bytef*)pfile_in_zip_read_info->read_buffer; - pfile_in_zip_read_info->stream.avail_in = (uInt)uReadThis; + (Bytef *) pfile_in_zip_read_info->read_buffer; + pfile_in_zip_read_info->stream.avail_in = (uInt) uReadThis; } - if (pfile_in_zip_read_info->compression_method==0) - { - uInt uDoCopy; - uInt i; - - if ((pfile_in_zip_read_info->stream.avail_in == 0) && - (pfile_in_zip_read_info->rest_read_compressed == 0)) - return (iRead==0) ? EXTRACTOR_UNZIP_EOF : iRead; - - if (pfile_in_zip_read_info->stream.avail_out < - pfile_in_zip_read_info->stream.avail_in) - uDoCopy = pfile_in_zip_read_info->stream.avail_out ; - else - uDoCopy = pfile_in_zip_read_info->stream.avail_in ; - - for (i=0;i<uDoCopy;i++) - *(pfile_in_zip_read_info->stream.next_out+i) = - *(pfile_in_zip_read_info->stream.next_in+i); - - pfile_in_zip_read_info->crc32 = crc32(pfile_in_zip_read_info->crc32, - pfile_in_zip_read_info->stream.next_out, - uDoCopy); - pfile_in_zip_read_info->rest_read_uncompressed-=uDoCopy; - pfile_in_zip_read_info->stream.avail_in -= uDoCopy; - pfile_in_zip_read_info->stream.avail_out -= uDoCopy; - pfile_in_zip_read_info->stream.next_out += uDoCopy; - pfile_in_zip_read_info->stream.next_in += uDoCopy; - pfile_in_zip_read_info->stream.total_out += uDoCopy; - iRead += uDoCopy; - } - else - { - uLong uTotalOutBefore; - uLong uTotalOutAfter; - const Bytef *bufBefore; - uLong uOutThis; - int flush = Z_SYNC_FLUSH; - - uTotalOutBefore = pfile_in_zip_read_info->stream.total_out; - bufBefore = pfile_in_zip_read_info->stream.next_out; - - /* - if ((pfile_in_zip_read_info->rest_read_uncompressed == - pfile_in_zip_read_info->stream.avail_out) && - (pfile_in_zip_read_info->rest_read_compressed == 0)) - flush = Z_FINISH; - */ - err = inflate(&pfile_in_zip_read_info->stream,flush); - - uTotalOutAfter = pfile_in_zip_read_info->stream.total_out; - uOutThis = uTotalOutAfter-uTotalOutBefore; - - pfile_in_zip_read_info->crc32 = - crc32(pfile_in_zip_read_info->crc32,bufBefore, - (uInt)(uOutThis)); - - pfile_in_zip_read_info->rest_read_uncompressed -= - uOutThis; - - iRead += (uInt)(uTotalOutAfter - uTotalOutBefore); - - if (Z_STREAM_END == err) - return (0 == iRead) ? EXTRACTOR_UNZIP_EOF : iRead; - if (Z_OK != err) - break; - } + if (0 == pfile_in_zip_read_info->compression_method) + { + uInt uDoCopy; + + if ( (0 == pfile_in_zip_read_info->stream.avail_in) && + (0 == pfile_in_zip_read_info->rest_read_compressed) ) + return (0 == iRead) ? EXTRACTOR_UNZIP_EOF : iRead; + + if (pfile_in_zip_read_info->stream.avail_out < + pfile_in_zip_read_info->stream.avail_in) + uDoCopy = pfile_in_zip_read_info->stream.avail_out; + else + uDoCopy = pfile_in_zip_read_info->stream.avail_in; + memcpy (pfile_in_zip_read_info->stream.next_out, + pfile_in_zip_read_info->stream.next_in, + uDoCopy); + pfile_in_zip_read_info->crc32 = crc32 (pfile_in_zip_read_info->crc32, + pfile_in_zip_read_info->stream.next_out, + uDoCopy); + pfile_in_zip_read_info->rest_read_uncompressed -= uDoCopy; + pfile_in_zip_read_info->stream.avail_in -= uDoCopy; + pfile_in_zip_read_info->stream.avail_out -= uDoCopy; + pfile_in_zip_read_info->stream.next_out += uDoCopy; + pfile_in_zip_read_info->stream.next_in += uDoCopy; + pfile_in_zip_read_info->stream.total_out += uDoCopy; + iRead += uDoCopy; + } + else + { + uLong uTotalOutBefore; + uLong uTotalOutAfter; + const Bytef *bufBefore; + uLong uOutThis; + int flush = Z_SYNC_FLUSH; + + uTotalOutBefore = pfile_in_zip_read_info->stream.total_out; + bufBefore = pfile_in_zip_read_info->stream.next_out; + + /* + if ((pfile_in_zip_read_info->rest_read_uncompressed == + pfile_in_zip_read_info->stream.avail_out) && + (pfile_in_zip_read_info->rest_read_compressed == 0)) + flush = Z_FINISH; + */ + err = inflate (&pfile_in_zip_read_info->stream, flush); + + uTotalOutAfter = pfile_in_zip_read_info->stream.total_out; + uOutThis = uTotalOutAfter-uTotalOutBefore; + + pfile_in_zip_read_info->crc32 = + crc32 (pfile_in_zip_read_info->crc32, bufBefore, + (uInt) (uOutThis)); + + pfile_in_zip_read_info->rest_read_uncompressed -= + uOutThis; + + iRead += (uInt) (uTotalOutAfter - uTotalOutBefore); + + if (Z_STREAM_END == err) + return (0 == iRead) ? EXTRACTOR_UNZIP_EOF : iRead; + if (Z_OK != err) + break; + } } if (Z_OK == err) @@ -1089,24 +1146,28 @@ EXTRACTOR_common_unzip_read_current_file (struct EXTRACTOR_UnzipFile *file, /** - * Read the local header of the current zipfile - * Check the coherency of the local header and info in the end of central - * directory about this file - * store in *piSizeVar the size of extra info in local header - * (filename and size of extra field data) + * Read the local header of the current zipfile. Check the coherency of + * the local header and info in the end of central directory about + * this file. Store in *piSizeVar the size of extra info in local + * header (filename and size of extra field data) + * + * @param file zipfile to process + * @param piSizeVar where to store the size of the extra info + * @param poffset_local_extrafield where to store the offset of the local extrafield + * @param psoze_local_extrafield where to store the size of the local extrafield + * @return EXTRACTOR_UNZIP_OK on success */ static int -unzlocal_CheckCurrentFileCoherencyHeader (struct EXTRACTOR_UnzipFile *file, - uInt *piSizeVar, - uLong *poffset_local_extrafield, - uInt *psize_local_extrafield) +parse_current_file_coherency_header (struct EXTRACTOR_UnzipFile *file, + uInt *piSizeVar, + uLong *poffset_local_extrafield, + uInt *psize_local_extrafield) { uLong uMagic; uLong uData; uLong uFlags; uLong size_filename; uLong size_extra_field; - int err = EXTRACTOR_UNZIP_OK; *piSizeVar = 0; *poffset_local_extrafield = 0; @@ -1118,76 +1179,61 @@ unzlocal_CheckCurrentFileCoherencyHeader (struct EXTRACTOR_UnzipFile *file, SEEK_SET)) return EXTRACTOR_UNZIP_ERRNO; if (EXTRACTOR_UNZIP_OK != - unzlocal_getLong (&file->z_filefunc, - &uMagic)) - err = EXTRACTOR_UNZIP_ERRNO; - else if (0x04034b50 != uMagic) - err = EXTRACTOR_UNZIP_BADZIPFILE; + read_long_from_ffd (&file->z_filefunc, + &uMagic)) + return EXTRACTOR_UNZIP_ERRNO; + if (0x04034b50 != uMagic) + return EXTRACTOR_UNZIP_BADZIPFILE; if ( (EXTRACTOR_UNZIP_OK != - unzlocal_getShort (&file->z_filefunc, &uData)) || + read_short_from_ffd (&file->z_filefunc, &uData)) || (EXTRACTOR_UNZIP_OK != - unzlocal_getShort (&file->z_filefunc, &uFlags)) ) - err = EXTRACTOR_UNZIP_ERRNO; - - if (EXTRACTOR_UNZIP_OK != unzlocal_getShort (&file->z_filefunc, &uData)) - err = EXTRACTOR_UNZIP_ERRNO; - else if ((EXTRACTOR_UNZIP_OK == err) && - (uData != file->cur_file_info.compression_method)) - err = EXTRACTOR_UNZIP_BADZIPFILE; - - if ( (EXTRACTOR_UNZIP_OK == err) && - (0 != file->cur_file_info.compression_method) && + read_short_from_ffd (&file->z_filefunc, &uFlags)) ) + return EXTRACTOR_UNZIP_ERRNO; + if (EXTRACTOR_UNZIP_OK != read_short_from_ffd (&file->z_filefunc, &uData)) + return EXTRACTOR_UNZIP_ERRNO; + if (uData != file->cur_file_info.compression_method) + return EXTRACTOR_UNZIP_BADZIPFILE; + if ( (0 != file->cur_file_info.compression_method) && (Z_DEFLATED != file->cur_file_info.compression_method) ) - err = EXTRACTOR_UNZIP_BADZIPFILE; - + return EXTRACTOR_UNZIP_BADZIPFILE; if (EXTRACTOR_UNZIP_OK != - unzlocal_getLong (&file->z_filefunc, &uData)) /* date/time */ - err = EXTRACTOR_UNZIP_ERRNO; - + read_long_from_ffd (&file->z_filefunc, &uData)) /* date/time */ + return EXTRACTOR_UNZIP_ERRNO; if (EXTRACTOR_UNZIP_OK != - unzlocal_getLong (&file->z_filefunc, &uData)) /* crc */ - err = EXTRACTOR_UNZIP_ERRNO; - else if ( (EXTRACTOR_UNZIP_OK == err) && - (uData != file->cur_file_info.crc) && - (0 == (uFlags & 8)) ) - err = EXTRACTOR_UNZIP_BADZIPFILE; - + read_long_from_ffd (&file->z_filefunc, &uData)) /* crc */ + return EXTRACTOR_UNZIP_ERRNO; + if ( (uData != file->cur_file_info.crc) && + (0 == (uFlags & 8)) ) + return EXTRACTOR_UNZIP_BADZIPFILE; if (EXTRACTOR_UNZIP_OK != - unzlocal_getLong(&file->z_filefunc, &uData)) /* size compr */ - err = EXTRACTOR_UNZIP_ERRNO; - else if ( (EXTRACTOR_UNZIP_OK == err) && - (uData != file->cur_file_info.compressed_size) && - (0 == (uFlags & 8)) ) - err = EXTRACTOR_UNZIP_BADZIPFILE; - + read_long_from_ffd(&file->z_filefunc, &uData)) /* size compr */ + return EXTRACTOR_UNZIP_ERRNO; + if ( (uData != file->cur_file_info.compressed_size) && + (0 == (uFlags & 8)) ) + return EXTRACTOR_UNZIP_BADZIPFILE; if (EXTRACTOR_UNZIP_OK != - unzlocal_getLong (&file->z_filefunc, - &uData)) /* size uncompr */ - err = EXTRACTOR_UNZIP_ERRNO; - else if ( (EXTRACTOR_UNZIP_OK == err) && - (uData != file->cur_file_info.uncompressed_size) && - (0 == (uFlags & 8))) - err = EXTRACTOR_UNZIP_BADZIPFILE; - + read_long_from_ffd (&file->z_filefunc, + &uData)) /* size uncompr */ + return EXTRACTOR_UNZIP_ERRNO; + if ( (uData != file->cur_file_info.uncompressed_size) && + (0 == (uFlags & 8))) + return EXTRACTOR_UNZIP_BADZIPFILE; if (EXTRACTOR_UNZIP_OK != - unzlocal_getShort (&file->z_filefunc, &size_filename)) - err = EXTRACTOR_UNZIP_ERRNO; - else if ( (EXTRACTOR_UNZIP_OK == err) && - (size_filename != file->cur_file_info.size_filename) ) - err = EXTRACTOR_UNZIP_BADZIPFILE; - + read_short_from_ffd (&file->z_filefunc, &size_filename)) + return EXTRACTOR_UNZIP_ERRNO; + if (size_filename != file->cur_file_info.size_filename) + return EXTRACTOR_UNZIP_BADZIPFILE; *piSizeVar += (uInt) size_filename; - if (EXTRACTOR_UNZIP_OK != - unzlocal_getShort (&file->z_filefunc, - &size_extra_field)) - err = EXTRACTOR_UNZIP_ERRNO; + read_short_from_ffd (&file->z_filefunc, + &size_extra_field)) + return EXTRACTOR_UNZIP_ERRNO; *poffset_local_extrafield = file->cur_file_info_internal.offset_curfile + SIZEZIPLOCALHEADER + size_filename; *psize_local_extrafield = (uInt) size_extra_field; *piSizeVar += (uInt)size_extra_field; - return err; + return EXTRACTOR_UNZIP_OK; } @@ -1200,7 +1246,7 @@ unzlocal_CheckCurrentFileCoherencyHeader (struct EXTRACTOR_UnzipFile *file, int EXTRACTOR_common_unzip_open_current_file (struct EXTRACTOR_UnzipFile *file) { - int err = EXTRACTOR_UNZIP_OK; + int err; uInt iSizeVar; struct FileInZipReadInfo *pfile_in_zip_read_info; uLong offset_local_extrafield; /* offset of the local extra field */ @@ -1213,14 +1259,14 @@ EXTRACTOR_common_unzip_open_current_file (struct EXTRACTOR_UnzipFile *file) if (NULL != file->pfile_in_zip_read) EXTRACTOR_common_unzip_close_current_file (file); if (EXTRACTOR_UNZIP_OK != - unzlocal_CheckCurrentFileCoherencyHeader (file, - &iSizeVar, - &offset_local_extrafield, - &size_local_extrafield)) + parse_current_file_coherency_header (file, + &iSizeVar, + &offset_local_extrafield, + &size_local_extrafield)) return EXTRACTOR_UNZIP_BADZIPFILE; - if (NULL == (pfile_in_zip_read_info = malloc(sizeof(struct FileInZipReadInfo)))) + if (NULL == (pfile_in_zip_read_info = malloc (sizeof(struct FileInZipReadInfo)))) return EXTRACTOR_UNZIP_INTERNALERROR; - if (NULL == (pfile_in_zip_read_info->read_buffer = malloc(UNZ_BUFSIZE))) + if (NULL == (pfile_in_zip_read_info->read_buffer = malloc (UNZ_BUFSIZE))) { free (pfile_in_zip_read_info); return EXTRACTOR_UNZIP_INTERNALERROR; @@ -1232,7 +1278,11 @@ EXTRACTOR_common_unzip_open_current_file (struct EXTRACTOR_UnzipFile *file) if ( (0 != file->cur_file_info.compression_method) && (Z_DEFLATED != file->cur_file_info.compression_method) ) - err = EXTRACTOR_UNZIP_BADZIPFILE; + { + // err = EXTRACTOR_UNZIP_BADZIPFILE; + // FIXME: we don't do anything with this 'err' code. + // Can this happen? Should we abort in this case? + } pfile_in_zip_read_info->crc32_wait = file->cur_file_info.crc; pfile_in_zip_read_info->crc32 = 0; @@ -1240,8 +1290,7 @@ EXTRACTOR_common_unzip_open_current_file (struct EXTRACTOR_UnzipFile *file) pfile_in_zip_read_info->z_filefunc = file->z_filefunc; pfile_in_zip_read_info->byte_before_the_zipfile = file->byte_before_the_zipfile; pfile_in_zip_read_info->stream.total_out = 0; - - if (file->cur_file_info.compression_method==Z_DEFLATED) + if (Z_DEFLATED == file->cur_file_info.compression_method) { pfile_in_zip_read_info->stream.zalloc = (alloc_func) NULL; pfile_in_zip_read_info->stream.zfree = (free_func) NULL; @@ -1270,11 +1319,19 @@ EXTRACTOR_common_unzip_open_current_file (struct EXTRACTOR_UnzipFile *file) iSizeVar; pfile_in_zip_read_info->stream.avail_in = 0; file->pfile_in_zip_read = pfile_in_zip_read_info; - return EXTRACTOR_UNZIP_OK; } +/** + * Callback to perform read operation using LE API. + * Note that partial reads are not allowed. + * + * @param opaque the 'struct EXTRACTOR_ExtractContext' + * @param buf where to write bytes read + * @param size number of bytes desired + * @return number of bytes copied to buf + */ static uLong ec_read_file_func (voidpf opaque, void* buf, @@ -1283,17 +1340,29 @@ ec_read_file_func (voidpf opaque, struct EXTRACTOR_ExtractContext *ec = opaque; void *ptr; ssize_t ret; + uLong done; - ret = ec->read (ec->cls, - &ptr, - size); - if (ret > 0) - memcpy (buf, ptr, ret); - // FIXME: partial reads are not allowed, need to possibly read more - return ret; + done = 0; + while (done < size) + { + ret = ec->read (ec->cls, + &ptr, + size); + if (ret <= 0) + return done; + memcpy (buf + done, ptr, ret); + done += ret; + } + return done; } +/** + * Callback to obtain current offset in file using LE API. + * + * @param opaque the 'struct EXTRACTOR_ExtractContext' + * @return current offset in file, -1 on error + */ static long ec_tell_file_func (voidpf opaque) { @@ -1303,6 +1372,14 @@ ec_tell_file_func (voidpf opaque) } +/** + * Callback to perform seek operation using LE API. + * + * @param opaque the 'struct EXTRACTOR_ExtractContext' + * @param offset where to seek + * @param origin relative to where should we seek + * @return EXTRACTOR_UNZIP_OK on success + */ static long ec_seek_file_func (voidpf opaque, uLong offset, @@ -1326,14 +1403,14 @@ ec_seek_file_func (voidpf opaque, struct EXTRACTOR_UnzipFile * EXTRACTOR_common_unzip_open (struct EXTRACTOR_ExtractContext *ec) { - struct EXTRACTOR_UnzipFileFuncDefs io; + struct FileFuncDefs ffd; - io.zread_file = &ec_read_file_func; - io.ztell_file = &ec_tell_file_func; - io.zseek_file = &ec_seek_file_func; - io.opaque = ec; + ffd.zread_file = &ec_read_file_func; + ffd.ztell_file = &ec_tell_file_func; + ffd.zseek_file = &ec_seek_file_func; + ffd.opaque = ec; - return EXTRACTOR_common_unzip_open2 (&io); + return unzip_open_using_ffd (&ffd); } /* end of unzip.c */ diff --git a/src/common/unzip.h b/src/common/unzip.h @@ -208,10 +208,21 @@ EXTRACTOR_common_unzip_open (struct EXTRACTOR_ExtractContext *ec); /** - * Close a ZipFile. If there is files inside the .Zip opened with - * EXTRACTOR_common_unzip_open_current_file, these files MUST be - * closed with EXTRACTOR_common_unzip_close_current_file before - * calling EXTRACTOR_common_unzip_close. + * Obtain the global comment from a ZIP file. + * + * @param file unzip file to inspect + * @param comment where to copy the comment + * @param comment_len maximum number of bytes available in comment + * @return EXTRACTOR_UNZIP_OK on success + */ +int +EXTRACTOR_common_unzip_get_global_comment (struct EXTRACTOR_UnzipFile *file, + char *comment, + size_t comment_len); + + +/** + * Close a ZipFile. * * @param file zip file to close * @return EXTRACTOR_UNZIP_OK if there is no problem. @@ -263,6 +274,7 @@ EXTRACTOR_common_unzip_go_find_local_file (struct EXTRACTOR_UnzipFile *file, * No preparation of the structure is needed. * * @param file zipfile to manipulate + * @param pfile_info file information to initialize * @param szFileName where to write the name of the current file * @param fileNameBufferSize number of bytes available in szFileName * @param extraField where to write extra data diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -30,7 +30,9 @@ EXTRA_DIST = template_extractor.c \ testdata/ole2_starwriter40.sdw \ testdata/ole2_blair.doc \ testdata/ole2_excel.xls \ - testdata/png_image.png + testdata/png_image.png \ + testdata/odf_cg.odt \ + testdata/deb_bzip2.deb if HAVE_VORBISFILE PLUGIN_OGG=libextractor_ogg.la @@ -77,13 +79,23 @@ PLUGIN_GSF=libextractor_ole2.la TEST_GSF=test_ole2 endif +if HAVE_ZLIB +PLUGIN_ZLIB=libextractor_deb.la +TEST_ZLIB=test_deb +endif plugin_LTLIBRARIES = \ libextractor_it.la \ + libextractor_nsf.la \ + libextractor_nsfe.la \ + libextractor_odf.la \ libextractor_png.la \ libextractor_xm.la \ libextractor_s3m.la \ + libextractor_sid.la \ libextractor_wav.la \ + libextractor_zip.la \ + $(PLUGIN_ZLIB) \ $(PLUGIN_OGG) \ $(PLUGIN_MIME) \ $(PLUGIN_GIF) \ @@ -103,6 +115,9 @@ check_PROGRAMS = \ test_it \ test_s3m \ test_png \ + test_odf \ + test_zip \ + $(TEST_ZLIB) \ $(TEST_OGG) \ $(TEST_MIME) \ $(TEST_GIF) \ @@ -133,6 +148,55 @@ libextractor_xm_la_LDFLAGS = \ $(PLUGINFLAGS) +libextractor_deb_la_SOURCES = \ + deb_extractor.c +libextractor_deb_la_LDFLAGS = \ + $(PLUGINFLAGS) -lz + +test_deb_SOURCES = \ + test_deb.c +test_deb_LDADD = \ + $(top_builddir)/src/plugins/libtest.la + + +libextractor_nsf_la_SOURCES = \ + nsf_extractor.c +libextractor_nsf_la_LDFLAGS = \ + $(PLUGINFLAGS) + + +libextractor_nsfe_la_SOURCES = \ + nsfe_extractor.c +libextractor_nsfe_la_LDFLAGS = \ + $(PLUGINFLAGS) + + +libextractor_odf_la_SOURCES = \ + odf_extractor.c +libextractor_odf_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_odf_la_LIBADD = \ + $(top_builddir)/src/common/libextractor_common.la + +test_odf_SOURCES = \ + test_odf.c +test_odf_LDADD = \ + $(top_builddir)/src/plugins/libtest.la + + +libextractor_zip_la_SOURCES = \ + zip_extractor.c +libextractor_zip_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_zip_la_LIBADD = \ + $(top_builddir)/src/common/libextractor_common.la + +test_zip_SOURCES = \ + test_zip.c +test_zip_LDADD = \ + $(top_builddir)/src/plugins/libtest.la + + libextractor_png_la_SOURCES = \ png_extractor.c libextractor_png_la_LDFLAGS = \ @@ -157,6 +221,11 @@ test_it_LDADD = \ $(top_builddir)/src/plugins/libtest.la +libextractor_sid_la_SOURCES = \ + sid_extractor.c +libextractor_sid_la_LDFLAGS = \ + $(PLUGINFLAGS) + libextractor_s3m_la_SOURCES = \ s3m_extractor.c libextractor_s3m_la_LDFLAGS = \ diff --git a/src/plugins/deb_extractor.c b/src/plugins/deb_extractor.c @@ -1,10 +1,10 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -17,12 +17,11 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include "platform.h" -#include "extractor.h" -#include <zlib.h> - -/* +/** + * @file plugins/deb_extractor.c + * @brief plugin to support Debian archives + * @author Christian Grothoff + * * The .deb is an ar-chive file. It contains a tar.gz file * named "control.tar.gz" which then contains a file 'control' * that has the meta-data. And which variant of the various @@ -33,14 +32,33 @@ * http://lists.debian.org/debian-policy/2003/12/msg00000.html * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html */ +#include "platform.h" +#include "extractor.h" +#include <zlib.h> + + +/** + * Maximum file size we allow for control.tar.gz files. + * This is a sanity check to avoid allocating huge amounts + * of memory. + */ +#define MAX_CONTROL_SIZE (1024 * 1024) +/** + * Re-implementation of 'strndup'. + * + * @param str string to duplicate + * @param n maximum number of bytes to copy + * @return NULL on error, otherwise 0-terminated copy of 'str' + * with at most n characters + */ static char * stndup (const char *str, size_t n) { char *tmp; - tmp = malloc (n + 1); - if (tmp == NULL) + + if (NULL == (tmp = malloc (n + 1))) return NULL; tmp[n] = '\0'; memcpy (tmp, str, n); @@ -48,15 +66,29 @@ stndup (const char *str, size_t n) } - -typedef struct +/** + * Entry in the mapping from control data to LE types. + */ +struct Matches { + /** + * Key in the Debian control file. + */ const char *text; + + /** + * Corresponding type in LE. + */ enum EXTRACTOR_MetaType type; -} Matches; +}; + -/* see also: "man 5 deb-control" */ -static Matches tmap[] = { +/** + * Map from deb-control entries to LE types. + * + * see also: "man 5 deb-control" + */ +static struct Matches tmap[] = { {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME}, {"Version: ", EXTRACTOR_METATYPE_PACKAGE_VERSION}, {"Section: ", EXTRACTOR_METATYPE_SECTION}, @@ -79,7 +111,13 @@ static Matches tmap[] = { /** - * Process the control file. + * Process the "control" file from the control.tar.gz + * + * @param data decompressed control data + * @param size number of bytes in data + * @param proc function to call with meta data + * @param proc_cls closure for 'proc' + * @return 0 to continue extracting, 1 if we are done */ static int processControl (const char *data, @@ -90,62 +128,52 @@ processControl (const char *data, size_t pos; char *key; char *val; - + size_t colon; + size_t eol; + unsigned int i; + pos = 0; while (pos < size) { - size_t colon; - size_t eol; - int i; - - colon = pos; - while (data[colon] != ':') - { - if ((colon > size) || (data[colon] == '\n')) - return 0; - colon++; - } + for (colon = pos; ':' != data[colon]; colon++) + if ((colon > size) || ('\n' == data[colon])) + return 0; colon++; while ((colon < size) && (isspace ((unsigned char) data[colon]))) colon++; eol = colon; while ((eol < size) && - ((data[eol] != '\n') || - ((eol + 1 < size) && (data[eol + 1] == ' ')))) + (('\n' != data[eol]) || + ((eol + 1 < size) && (' ' == data[eol + 1])))) eol++; if ((eol == colon) || (eol > size)) return 0; - key = stndup (&data[pos], colon - pos); - if (key == NULL) + if (NULL == (key = stndup (&data[pos], colon - pos))) return 0; - i = 0; - while (tmap[i].text != NULL) + for (i = 0; NULL != tmap[i].text; i++) { - if (0 == strcmp (key, tmap[i].text)) - { - val = stndup (&data[colon], eol - colon); - if (val == NULL) - { - free (key); - return 0; - } - if (0 != proc (proc_cls, - "deb", - tmap[i].type, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - val, - strlen(val) + 1)) - { - free (val); - free (key); - return 1; - } + if (0 != strcmp (key, tmap[i].text)) + continue; + if (NULL == (val = stndup (&data[colon], eol - colon))) + { + free (key); + return 0; + } + if (0 != proc (proc_cls, + "deb", + tmap[i].type, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + val, + strlen(val) + 1)) + { free (val); - break; - } - i++; - } + free (key); + return 1; + } + free (val); + break; + } free (key); pos = eol + 1; } @@ -153,62 +181,142 @@ processControl (const char *data, } -typedef struct +/** + * Header of an entry in a TAR file. + */ +struct TarHeader { + /** + * Filename. + */ char name[100]; + + /** + * File access modes. + */ char mode[8]; + + /** + * Owner of the file. + */ char userId[8]; + + /** + * Group of the file. + */ char groupId[8]; + + /** + * Size of the file, in octal. + */ char filesize[12]; + + /** + * Last modification time. + */ char lastModTime[12]; + + /** + * Checksum of the file. + */ char chksum[8]; + + /** + * Is the file a link? + */ char link; + + /** + * Destination of the link. + */ char linkName[100]; -} TarHeader; +}; + -typedef struct +/** + * Extended TAR header for USTar format. + */ +struct USTarHeader { - TarHeader tar; + /** + * Original TAR header. + */ + struct TarHeader tar; + + /** + * Additinal magic for USTar. + */ char magic[6]; + + /** + * Format version. + */ char version[2]; + + /** + * User name. + */ char uname[32]; + + /** + * Group name. + */ char gname[32]; + + /** + * Device major number. + */ char devmajor[8]; + + /** + * Device minor number. + */ char devminor[8]; + + /** + * Unknown (padding?). + */ char prefix[155]; -} USTarHeader; +}; + /** * Process the control.tar file. + * + * @param data the deflated control.tar file data + * @param size number of bytes in data + * @param proc function to call with meta data + * @param proc_cls closure for 'proc' + * @return 0 to continue extracting, 1 if we are done */ static int processControlTar (const char *data, - const size_t size, + size_t size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { - TarHeader *tar; - USTarHeader *ustar; + struct TarHeader *tar; + struct USTarHeader *ustar; size_t pos; pos = 0; - while (pos + sizeof (TarHeader) < size) + while (pos + sizeof (struct TarHeader) < size) { unsigned long long fsize; char buf[13]; - tar = (TarHeader *) & data[pos]; - if (pos + sizeof (USTarHeader) < size) + tar = (struct TarHeader *) & data[pos]; + if (pos + sizeof (struct USTarHeader) < size) { - ustar = (USTarHeader *) & data[pos]; + ustar = (struct USTarHeader *) & data[pos]; if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar"))) - pos += 512; /* sizeof(USTarHeader); */ + pos += 512; /* sizeof (struct USTarHeader); */ else - pos += 257; /* sizeof(TarHeader); minus gcc alignment... */ + pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */ } else { - pos += 257; /* sizeof(TarHeader); minus gcc alignment... */ + pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */ } memcpy (buf, &tar->filesize[0], 12); @@ -220,9 +328,10 @@ processControlTar (const char *data, if (0 == strncmp (&tar->name[0], "./control", strlen ("./control"))) { + /* found the 'control' file we were looking for */ return processControl (&data[pos], fsize, proc, proc_cls); } - if ((fsize & 511) != 0) + if (0 != (fsize & 511)) fsize = (fsize | 511) + 1; /* round up! */ if (pos + fsize < pos) return 0; @@ -231,137 +340,184 @@ processControlTar (const char *data, return 0; } -#define MAX_CONTROL_SIZE (1024 * 1024) - -static voidpf -Emalloc (voidpf opaque, uInt items, uInt size) -{ - if (SIZE_MAX / size <= items) - return NULL; - return malloc (size * items); -} - -static void -Efree (voidpf opaque, voidpf ptr) -{ - free (ptr); -} /** * Process the control.tar.gz file. + * + * @param ec extractor context with control.tar.gz at current read position + * @param size number of bytes in the control file + * @return 0 to continue extracting, 1 if we are done */ static int -processControlTGZ (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +processControlTGZ (struct EXTRACTOR_ExtractContext *ec, + unsigned long long size) { uint32_t bufSize; char *buf; + void *data; + unsigned char *cdata; z_stream strm; int ret; + ssize_t sret; + unsigned long long off; - bufSize = data[size - 4] + (data[size - 3] << 8) + (data[size - 2] << 16) + (data[size - 1] << 24); - if (bufSize > MAX_CONTROL_SIZE) + if (size > MAX_CONTROL_SIZE) + return 0; + if (NULL == (cdata = malloc (size))) return 0; + off = 0; + while (off < size) + { + if (0 >= (sret = ec->read (ec->cls, &data, size - off))) + { + free (cdata); + return 0; + } + memcpy (&cdata[off], data, sret); + off += sret; + } + bufSize = cdata[size - 4] + (cdata[size - 3] << 8) + (cdata[size - 2] << 16) + (cdata[size - 1] << 24); + if (bufSize > MAX_CONTROL_SIZE) + { + free (cdata); + return 0; + } + if (NULL == (buf = malloc (bufSize))) + { + free (cdata); + return 0; + } + ret = 0; memset (&strm, 0, sizeof (z_stream)); strm.next_in = (Bytef *) data; strm.avail_in = size; - strm.total_in = 0; - strm.zalloc = &Emalloc; - strm.zfree = &Efree; - strm.opaque = NULL; - if (Z_OK == inflateInit2 (&strm, 15 + 32)) - { - buf = malloc (bufSize); - if (buf == NULL) - { - inflateEnd (&strm); - return 0; - } + { strm.next_out = (Bytef *) buf; strm.avail_out = bufSize; inflate (&strm, Z_FINISH); if (strm.total_out > 0) - { - ret = processControlTar (buf, strm.total_out, proc, proc_cls); - inflateEnd (&strm); - free (buf); - return ret; - } - free (buf); + ret = processControlTar (buf, strm.total_out, + ec->proc, ec->cls); inflateEnd (&strm); } - return 0; + free (buf); + free (cdata); + return ret; } -typedef struct + +/** + * Header of an object in an "AR"chive file. + */ +struct ObjectHeader { + /** + * Name of the file. + */ char name[16]; + + /** + * Last modification time for the file. + */ char lastModTime[12]; + + /** + * User ID of the owner. + */ char userId[6]; + + /** + * Group ID of the owner. + */ char groupId[6]; + + /** + * File access modes. + */ char modeInOctal[8]; + + /** + * Size of the file (as decimal string) + */ char filesize[10]; + + /** + * Tailer of the object header ("`\n") + */ char trailer[2]; -} ObjectHeader; +}; -int -EXTRACTOR_deb_extract (const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +/** + * Main entry method for the DEB extraction plugin. + * + * @param ec extraction context provided to the plugin + */ +void +EXTRACTOR_deb_extract_method (struct EXTRACTOR_ExtractContext *ec) { - size_t pos; + uint64_t pos; int done = 0; - ObjectHeader *hdr; - unsigned long long fsize; + const struct ObjectHeader *hdr; + uint64_t fsize; + unsigned long long csize; char buf[11]; - - if (size < 128) - return 0; - if (0 != strncmp ("!<arch>\n", data, strlen ("!<arch>\n"))) - return 0; - pos = strlen ("!<arch>\n"); - while (pos + sizeof (ObjectHeader) < size) + void *data; + + fsize = ec->get_size (ec->cls); + if (fsize < 128) + return; + if (8 != + ec->read (ec->cls, &data, 8)) + return; + if (0 != strncmp ("!<arch>\n", data, 8)) + return; + pos = 8; + while (pos + sizeof (struct ObjectHeader) < fsize) { - hdr = (ObjectHeader *) & data[pos]; + if (pos != + ec->seek (ec->cls, pos, SEEK_SET)) + return; + if (sizeof (struct ObjectHeader) != + ec->read (ec->cls, &data, sizeof (struct ObjectHeader))) + return; + hdr = data; if (0 != strncmp (&hdr->trailer[0], "`\n", 2)) - return 0; + return; memcpy (buf, &hdr->filesize[0], 10); buf[10] = '\0'; - if (1 != sscanf (buf, "%10llu", &fsize)) - return 0; - pos += sizeof (ObjectHeader); - if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos)) - return 0; + if (1 != sscanf (buf, "%10llu", &csize)) + return; + pos += sizeof (struct ObjectHeader); + if ((pos + csize > fsize) || (csize > fsize) || (pos + csize < pos)) + return; if (0 == strncmp (&hdr->name[0], - "control.tar.gz", strlen ("control.tar.gz"))) + "control.tar.gz", + strlen ("control.tar.gz"))) { - if (0 != processControlTGZ ((const unsigned char *) &data[pos], - fsize, proc, proc_cls)) - return 1; + if (0 != processControlTGZ (ec, + csize)) + return; done++; } if (0 == strncmp (&hdr->name[0], "debian-binary", strlen ("debian-binary"))) { - if (0 != proc (proc_cls, - "deb", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - "application/x-debian-package", - strlen ("application/x-debian-package")+1)) - return 1; + if (0 != ec->proc (ec->cls, + "deb", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "application/x-debian-package", + strlen ("application/x-debian-package")+1)) + return; done++; } - pos += fsize; - if (done == 2) + pos += csize; + if (2 == done) break; /* no need to process the rest of the archive */ } - return 0; } + +/* end of deb_extractor.c */ diff --git a/src/plugins/nsf_extractor.c b/src/plugins/nsf_extractor.c @@ -1,10 +1,10 @@ /* * This file is part of libextractor. - * (C) 2006, 2009 Toni Ruottu + * (C) 2006, 2009, 2012 Toni Ruottu and Christian Grothoff * * libextractor is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2, or (at your + * by the Free Software Foundation; either version 3, or (at your * option) any later version. * * libextractor is distributed in the hope that it will be useful, but @@ -18,21 +18,22 @@ * Boston, MA 02111-1307, USA. * */ - +/** + * @file plugins/nsf_extractor.c + * @brief plugin to support Nes Sound Format files + * @author Toni Ruottu + * @author Christian Grothoff + */ #include "platform.h" #include "extractor.h" -#include "convert.h" -#define HEADER_SIZE 0x80 /* television system flags */ - #define PAL_FLAG 0x01 #define DUAL_FLAG 0x02 /* sound chip flags */ - #define VRCVI_FLAG 0x01 #define VRCVII_FLAG 0x02 #define FDS_FLAG 0x04 @@ -40,43 +41,108 @@ #define NAMCO_FLAG 0x10 #define SUNSOFT_FLAG 0x20 -#define UINT16 unsigned short +/** + * Header of an NSF file. + */ struct header { + /** + * Magic code. + */ char magicid[5]; + + /** + * NSF version number. + */ char nsfversion; - char songs; - char firstsong; - UINT16 loadaddr; - UINT16 initaddr; - UINT16 playaddr; + + /** + * Number of songs. + */ + unsigned char songs; + + /** + * Starting song. + */ + unsigned char firstsong; + + /** + * Unknown. + */ + uint16_t loadaddr; + + /** + * Unknown. + */ + uint16_t initaddr; + + /** + * Unknown. + */ + uint16_t playaddr; + + /** + * Album title. + */ char title[32]; + + /** + * Artist name. + */ char artist[32]; + + /** + * Copyright information. + */ char copyright[32]; - UINT16 ntscspeed; + + /** + * Unknown. + */ + uint16_t ntscspeed; + + /** + * Unknown. + */ char bankswitch[8]; - UINT16 palspeed; + + /** + * Unknown. + */ + uint16_t palspeed; + + /** + * Flags for TV encoding. + */ char tvflags; + + /** + * Flags about the decoder chip. + */ char chipflags; }; -#define ADD(s,t) do { if (0 != proc (proc_cls, "nsf", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) + +/** + * Give metadata to LE; return if 'proc' returns non-zero. + * + * @param s metadata value as UTF8 + * @param t metadata type to use + */ +#define ADD(s,t) do { if (0 != ec->proc (ec->cls, "nsf", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) return; } while (0) -/* "extract" keyword from a Nes Sound Format file +/** + * "extract" meta data from a Nes Sound Format file * - * NSF specification version 1.61 was used, - * while this piece of software was originally - * written. + * NSF specification version 1.61 was used, while this piece of + * software was originally written. * + * @param ec extraction context */ -int -EXTRACTOR_nsf_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +void +EXTRACTOR_nsf_extract_method (struct EXTRACTOR_ExtractContext *ec) { char album[33]; char artist[33]; @@ -85,12 +151,18 @@ EXTRACTOR_nsf_extract (const unsigned char *data, char startingsong[32]; char nsfversion[32]; const struct header *head; - - if (size < HEADER_SIZE) - return 0; - head = (const struct header *) data; + void *data; + + if (sizeof (struct header) > + ec->read (ec->cls, + &data, + sizeof (struct header))) + return; + head = data; + + /* Check "magic" id bytes */ if (memcmp (head->magicid, "NESM\x1a", 5)) - return 0; + return; ADD ("audio/x-nsf", EXTRACTOR_METATYPE_MIMETYPE); snprintf (nsfversion, sizeof(nsfversion), @@ -100,50 +172,48 @@ EXTRACTOR_nsf_extract (const unsigned char *data, snprintf (songs, sizeof(songs), "%d", - head->songs); + (int) head->songs); ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT); snprintf (startingsong, sizeof(startingsong), "%d", - head->firstsong); + (int) head->firstsong); ADD (startingsong, EXTRACTOR_METATYPE_STARTING_SONG); - memcpy (&album, head->title, 32); album[32] = '\0'; ADD (album, EXTRACTOR_METATYPE_ALBUM); - memcpy (&artist, head->artist, 32); artist[32] = '\0'; ADD (artist, EXTRACTOR_METATYPE_ARTIST); - memcpy (&copyright, head->copyright, 32); copyright[32] = '\0'; ADD (copyright, EXTRACTOR_METATYPE_COPYRIGHT); - if (head->tvflags & DUAL_FLAG) + if (0 != (head->tvflags & DUAL_FLAG)) { ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); } else { - if (head->tvflags & PAL_FLAG) + if (0 != (head->tvflags & PAL_FLAG)) ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); else ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); } /* Detect Extra Sound Chips needed to play the files */ - if (head->chipflags & VRCVI_FLAG) + if (0 != (head->chipflags & VRCVI_FLAG)) ADD ("VRCVI", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (head->chipflags & VRCVII_FLAG) + if (0 != (head->chipflags & VRCVII_FLAG)) ADD ("VRCVII", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (head->chipflags & FDS_FLAG) + if (0 != (head->chipflags & FDS_FLAG)) ADD ("FDS Sound", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (head->chipflags & MMC5_FLAG) + if (0 != (head->chipflags & MMC5_FLAG)) ADD ("MMC5 audio", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (head->chipflags & NAMCO_FLAG) + if (0 != (head->chipflags & NAMCO_FLAG)) ADD ("Namco 106", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (head->chipflags & SUNSOFT_FLAG) + if (0 != (head->chipflags & SUNSOFT_FLAG)) ADD ("Sunsoft FME-07", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - return 0; } + +/* end of nsf_extractor.c */ diff --git a/src/plugins/nsfe_extractor.c b/src/plugins/nsfe_extractor.c @@ -1,10 +1,10 @@ /* * This file is part of libextractor. - * (C) 2007, 2009 Toni Ruottu + * (C) 2007, 2009, 2012 Toni Ruottu and Christian Grothoff * * libextractor is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2, or (at your + * by the Free Software Foundation; either version 3, or (at your * option) any later version. * * libextractor is distributed in the hope that it will be useful, but @@ -18,20 +18,22 @@ * Boston, MA 02111-1307, USA. * */ - +/** + * @file plugins/nsfe_extractor.c + * @brief plugin to support Nes Sound Format files + * @author Toni Ruottu + * @author Christian Grothoff + */ #include "platform.h" #include "extractor.h" #include "convert.h" -#define HEADER_SIZE 0x04 /* television system flags */ - #define PAL_FLAG 0x01 #define DUAL_FLAG 0x02 /* sound chip flags */ - #define VRCVI_FLAG 0x01 #define VRCVII_FLAG 0x02 #define FDS_FLAG 0x04 @@ -39,29 +41,26 @@ #define NAMCO_FLAG 0x10 #define SUNSOFT_FLAG 0x20 -#define UINT16 unsigned short - +/** + * "Header" of an NSFE file. + */ struct header { char magicid[4]; }; -struct infochunk -{ - UINT16 loadaddr; - UINT16 initaddr; - UINT16 playaddr; - char tvflags; - char chipflags; - char songs; - char firstsong; -}; -static int +/** + * Read an unsigned integer at the current offset. + * + * @param data input data to parse + * @return parsed integer + */ +static uint32_t nsfeuint (const char *data) { int i; - int value = 0; + uint32_t value = 0; for (i = 3; i > 0; i--) { @@ -73,8 +72,17 @@ nsfeuint (const char *data) } +/** + * Copy string starting at 'data' with at most + * 'size' bytes. (strndup). + * + * @param data input data to copy + * @param size number of bytes in 'data' + * @return copy of the string at data + */ static char * -nsfestring (const char *data, size_t size) +nsfestring (const char *data, + size_t size) { char *s; size_t length; @@ -83,105 +91,210 @@ nsfestring (const char *data, size_t size) while ( (length < size) && (data[length] != '\0') ) length++; - s = malloc (length + 1); - if (s == NULL) + if (NULL == (s = malloc (length + 1))) return NULL; - strncpy (s, data, length); - s[strlen (data)] = '\0'; + memcpy (s, data, length); + s[length] = '\0'; return s; } -#define ADD(s,t) do { if (0 != proc (proc_cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) -#define ADDF(s,t) do { if (0 != proc (proc_cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) { free(s); return 1; } free (s); } while (0) +/** + * Give metadata to LE; return if 'proc' returns non-zero. + * + * @param s metadata value as UTF8 + * @param t metadata type to use + */ +#define ADD(s,t) do { if (0 != ec->proc (ec->cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) return 1; } while (0) + + +/** + * Give metadata to LE; return if 'proc' returns non-zero. + * + * @param s metadata value as UTF8, free at the end + * @param t metadata type to use + */ +#define ADDF(s,t) do { if (0 != ec->proc (ec->cls, "nsfe", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) { free (s); return 1; } free (s); } while (0) + + +/** + * Format of an 'INFO' chunk. Last two bytes are optional. + */ +struct infochunk +{ + /** + * Unknown. + */ + uint16_t loadaddr; + + /** + * Unknown. + */ + uint16_t initaddr; + + /** + * Unknown. + */ + uint16_t playaddr; + + /** + * TV encoding flags. + */ + char tvflags; + /** + * Chipset encoding flags. + */ + char chipflags; + + /** + * Number of songs. + */ + unsigned char songs; + + /** + * Starting song. + */ + unsigned char firstsong; +}; + + +/** + * Extract data from the INFO chunk. + * + * @param ec extraction context + * @param size number of bytes in INFO chunk + * @return 0 to continue extrating + */ static int -libextractor_nsfe_info_extract(const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +info_extract (struct EXTRACTOR_ExtractContext *ec, + uint32_t size) { + void *data; const struct infochunk *ichunk; char songs[32]; - if (size < 8) + if (size < 8) return 0; - ichunk = (const struct infochunk *) data; - if (ichunk->tvflags & DUAL_FLAG) + if (size > + ec->read (ec->cls, + &data, + size)) + return 1; + ichunk = data; + + if (0 != (ichunk->tvflags & DUAL_FLAG)) { ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); } else { - if (ichunk->tvflags & PAL_FLAG) + if (0 != (ichunk->tvflags & PAL_FLAG)) ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); else ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); } - if (ichunk->chipflags & VRCVI_FLAG) + if (0 != (ichunk->chipflags & VRCVI_FLAG)) ADD ("VRCVI", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (ichunk->chipflags & VRCVII_FLAG) + if (0 != (ichunk->chipflags & VRCVII_FLAG)) ADD ("VRCVII", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (ichunk->chipflags & FDS_FLAG) + if (0 != (ichunk->chipflags & FDS_FLAG)) ADD ("FDS Sound", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (ichunk->chipflags & MMC5_FLAG) + if (0 != (ichunk->chipflags & MMC5_FLAG)) ADD ("MMC5 audio", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (ichunk->chipflags & NAMCO_FLAG) + if (0 != (ichunk->chipflags & NAMCO_FLAG)) ADD ("Namco 106", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (ichunk->chipflags & SUNSOFT_FLAG) + if (0 != (ichunk->chipflags & SUNSOFT_FLAG)) ADD ("Sunsoft FME-07", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - if (size < 9) + + if (size < sizeof (struct infochunk)) { ADD ("1", EXTRACTOR_METATYPE_SONG_COUNT); return 0; } snprintf (songs, - sizeof(songs), + sizeof (songs), "%d", ichunk->songs); ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT); + snprintf (songs, + sizeof (songs), + "%d", + ichunk->firstsong); + ADD (songs, EXTRACTOR_METATYPE_STARTING_SONG); return 0; } +/** + * Extract data from the TLBL chunk. + * + * @param ec extraction context + * @param size number of bytes in TLBL chunk + * @return 0 to continue extrating + */ static int -libextractor_nsfe_tlbl_extract(const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) - +tlbl_extract (struct EXTRACTOR_ExtractContext *ec, + uint32_t size) { char *title; ssize_t left; size_t length; - - for (left = size; left > 0; left -= length) + void *data; + const char *cdata; + + if (size > + ec->read (ec->cls, + &data, + size)) + return 1; + cdata = data; + + left = size; + while (left > 0) { - title = nsfestring (&data[size - left], left); - if (title == NULL) - return 0; + title = nsfestring (&cdata[size - left], left); + if (NULL == title) + return 0; length = strlen (title) + 1; ADDF (title, EXTRACTOR_METATYPE_TITLE); + left -= length; } return 0; } + +/** + * Extract data from the AUTH chunk. + * + * @param ec extraction context + * @param size number of bytes in AUTH chunk + * @return 0 to continue extrating + */ static int -libextractor_nsfe_auth_extract (const char *data, size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +auth_extract (struct EXTRACTOR_ExtractContext *ec, + uint32_t size) { char *album; char *artist; char *copyright; char *ripper; - int left = size; + uint32_t left = size; + void *data; + const char *cdata; if (left < 1) return 0; - album = nsfestring (&data[size - left], left); - if (album != NULL) + if (size > + ec->read (ec->cls, + &data, + size)) + return 1; + cdata = data; + + album = nsfestring (&cdata[size - left], left); + if (NULL != album) { left -= (strlen (album) + 1); ADDF (album, EXTRACTOR_METATYPE_ALBUM); @@ -189,8 +302,8 @@ libextractor_nsfe_auth_extract (const char *data, size_t size, return 0; } - artist = nsfestring (&data[size - left], left); - if (artist != NULL) + artist = nsfestring (&cdata[size - left], left); + if (NULL != artist) { left -= (strlen (artist) + 1); ADDF (artist, EXTRACTOR_METATYPE_ARTIST); @@ -198,67 +311,78 @@ libextractor_nsfe_auth_extract (const char *data, size_t size, return 0; } - copyright = nsfestring (&data[size - left], left); - if (copyright != NULL) + copyright = nsfestring (&cdata[size - left], left); + if (NULL != copyright) { left -= (strlen (copyright) + 1); ADDF (copyright, EXTRACTOR_METATYPE_COPYRIGHT); if (left < 1) return 0; } - ripper = nsfestring (&data[size - left], left); - if (ripper != NULL) + ripper = nsfestring (&cdata[size - left], left); + if (NULL != ripper) ADDF (ripper, EXTRACTOR_METATYPE_RIPPER); return 0; } -/* "extract" keyword from an Extended Nintendo Sound Format file +/** + * "extract" meta data from an Extended Nintendo Sound Format file * - * NSFE specification revision 2 (Sep. 3, 2003) - * was used, while this piece of software was - * originally written. + * NSFE specification revision 2 (Sep. 3, 2003) was used, while this + * piece of software was originally written. * + * @param ec extraction context */ -int -EXTRACTOR_nsfe_extract (const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +void +EXTRACTOR_nsfe_extract_method (struct EXTRACTOR_ExtractContext *ec) { const struct header *head; - int i; - char chunkid[5] = " "; + void *data; + uint64_t off; + uint32_t chunksize; int ret; - - if (size < HEADER_SIZE) - return 0; - head = (const struct header *) data; - if (memcmp (head->magicid, "NSFE", 4)) - return 0; - ADD ("audio/x-nsfe", EXTRACTOR_METATYPE_MIMETYPE); - i = 4; /* Jump over magic id */ + + if (sizeof (struct header) > + ec->read (ec->cls, + &data, + sizeof (struct header))) + return; + head = data; + if (0 != memcmp (head->magicid, "NSFE", 4)) + return; + + if (0 != ec->proc (ec->cls, + "nsfe", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "audio/x-nsfe", + strlen ("audio/x-nsfe") + 1)) + return; + off = sizeof (struct header); ret = 0; - while (i + 7 < size && strncmp (chunkid, "NEND", 4)) /* CHECK */ + while (0 == ret) { - unsigned int chunksize = nsfeuint (&data[i]); - - i += 4; /* Jump over chunk size */ - memcpy (&chunkid, data + i, 4); - chunkid[4] = '\0'; - - i += 4; /* Jump over chunk id */ - if (!strncmp (chunkid, "INFO", 4)) - ret = libextractor_nsfe_info_extract (data + i, chunksize, proc, proc_cls); - else if (!strncmp (chunkid, "auth", 4)) - ret = libextractor_nsfe_auth_extract (data + i, chunksize, proc, proc_cls); - else if (!strncmp (chunkid, "tlbl", 4)) - ret = libextractor_nsfe_tlbl_extract (data + i, chunksize, proc, proc_cls); - /* Ignored chunks: DATA, NEND, plst, time, fade, BANK */ - i += chunksize; - if (ret != 0) + if (off != ec->seek (ec->cls, + off, + SEEK_SET)) break; + if (8 > + ec->read (ec->cls, + &data, + sizeof (struct header))) + break; + chunksize = nsfeuint (data); + off += 4 + chunksize; + if (0 == memcmp (data + 4, "INFO", 4)) + ret = info_extract (ec, chunksize); + else if (0 == memcmp (data + 4, "auth", 4)) + ret = auth_extract (ec, chunksize); + else if (0 == memcmp (data + 4, "tlbl", 4)) + ret = tlbl_extract (ec, chunksize); + /* Ignored chunks: DATA, NEND, plst, time, fade, BANK */ } - return ret; } + +/* end of nsfe_extractor.c */ diff --git a/src/plugins/odf_extractor.c b/src/plugins/odf_extractor.c @@ -28,11 +28,6 @@ #include "unzip.h" /** - * Should filenames be treated as case sensitive? - */ -#define CASESENSITIVITY 0 - -/** * Maximum length of a filename allowed inside the ZIP archive. */ #define MAXFILENAME 256 @@ -90,17 +85,17 @@ static struct Matches tmap[] = { * @return NULL if no mimetype could be found, otherwise the mime type */ static char * -libextractor_oo_getmimetype (EXTRACTOR_unzip_file uf) +libextractor_oo_getmimetype (struct EXTRACTOR_UnzipFile * uf) { char filename_inzip[MAXFILENAME]; - EXTRACTOR_unzip_file_info file_info; + struct EXTRACTOR_UnzipFileInfo file_info; char *buf; size_t buf_size; if (EXTRACTOR_UNZIP_OK != - EXTRACTOR_common_unzip_local_file (uf, - "mimetype", - CASESENSITIVITY)) + EXTRACTOR_common_unzip_go_find_local_file (uf, + "mimetype", + 2)) return NULL; if (EXTRACTOR_UNZIP_OK != EXTRACTOR_common_unzip_get_current_file_info (uf, @@ -113,11 +108,8 @@ libextractor_oo_getmimetype (EXTRACTOR_unzip_file uf) 0)) return NULL; if (EXTRACTOR_UNZIP_OK != - EXTRACTOR_common_unzip_open_current_file3 (uf, NULL, NULL, 0)) - { - EXTRACTOR_common_unzip_close_current_file (uf); - return NULL; - } + EXTRACTOR_common_unzip_open_current_file (uf)) + return NULL; buf_size = file_info.uncompressed_size; if (buf_size > 1024) { @@ -164,40 +156,25 @@ void EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec) { char filename_inzip[MAXFILENAME]; - EXTRACTOR_unzip_file uf; - EXTRACTOR_unzip_file_info file_info; + struct EXTRACTOR_UnzipFile *uf; + struct EXTRACTOR_UnzipFileInfo file_info; char *buf; char *pbuf; size_t buf_size; unsigned int i; - EXTRACTOR_unzip_filefunc_def io; char *mimetype; - if (size < 100) - return 0; - if ( !( ('P'==data[0]) && ('K'==data[1]) && (0x03==data[2]) && (0x04==data[3])) ) - return 0; - - io.zopen_file = &EXTRACTOR_common_unzip_zlib_open_file_func; - io.zread_file = &EXTRACTOR_common_unzip_zlib_read_file_func; - io.zwrite_file = NULL; - io.ztell_file = &EXTRACTOR_common_unzip_zlib_tell_file_func; - io.zseek_file = &EXTRACTOR_common_unzip_zlib_seek_file_func; - io.zclose_file = &EXTRACTOR_common_unzip_zlib_close_file_func; - io.zerror_file = &EXTRACTOR_common_unzip_zlib_testerror_file_func; - io.opaque = ec; - - if (NULL == (uf = EXTRACTOR_common_unzip_open2 ("ERROR", &io))) + if (NULL == (uf = EXTRACTOR_common_unzip_open (ec))) return; if (NULL != (mimetype = libextractor_oo_getmimetype (uf))) { - if (0 != proc (proc_cls, - "deb", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - mimetype, - strlen (mimetype) + 1)) + if (0 != ec->proc (ec->cls, + "odf", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + mimetype, + strlen (mimetype) + 1)) { EXTRACTOR_common_unzip_close (uf); free (mimetype); @@ -206,9 +183,9 @@ EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec) free (mimetype); } if (EXTRACTOR_UNZIP_OK != - EXTRACTOR_common_unzip_local_file (uf, - METAFILE, - CASESENSITIVITY)) + EXTRACTOR_common_unzip_go_find_local_file (uf, + METAFILE, + 2)) { /* metafile not found */ EXTRACTOR_common_unzip_close (uf); @@ -219,14 +196,14 @@ EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec) &file_info, filename_inzip, sizeof (filename_inzip), - NULL,0,NULL,0)) + NULL, 0, NULL, 0)) { /* problems accessing metafile */ EXTRACTOR_common_unzip_close (uf); return; } if (EXTRACTOR_UNZIP_OK != - EXTRACTOR_common_unzip_open_current_file3 (uf, NULL, NULL, 0)) + EXTRACTOR_common_unzip_open_current_file (uf)) { /* problems with unzip */ EXTRACTOR_common_unzip_close (uf); @@ -320,16 +297,14 @@ EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec) memcpy(key, spos, epos-spos); key[epos-spos] = '\0'; - if (0 != proc (proc_cls, - "odf", - tmap[i].type, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - key, - epos - spos + 1)) - { - goto CLEANUP; - } + if (0 != ec->proc (ec->cls, + "odf", + tmap[i].type, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + key, + epos - spos + 1)) + goto CLEANUP; pbuf = epos; } else diff --git a/src/plugins/sid_extractor.c b/src/plugins/sid_extractor.c @@ -1,10 +1,10 @@ /* * This file is part of libextractor. - * (C) 2006, 2007 Toni Ruottu + * (C) 2006, 2007, 2012 Vidyut Samanta and Christian Grothoff * * libextractor is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2, or (at your + * by the Free Software Foundation; either version 3, or (at your * option) any later version. * * libextractor is distributed in the hope that it will be useful, but @@ -18,15 +18,17 @@ * Boston, MA 02111-1307, USA. * */ +/** + * @file plugins/sid_extractor.c + * @brief plugin to support Scream Tracker (S3M) files + * @author Toni Ruottu + * @author Christian Grothoff + */ #include "platform.h" #include "extractor.h" -#define SID1_HEADER_SIZE 0x76 -#define SID2_HEADER_SIZE 0x7c - -/* flags */ - +/* SID flags */ #define MUSPLAYER_FLAG 0x01 #define PLAYSID_FLAG 0x02 #define PAL_FLAG 0x04 @@ -34,51 +36,134 @@ #define MOS6581_FLAG 0x10 #define MOS8580_FLAG 0x20 +/** + * A "SID word". + */ typedef char sidwrd[2]; + +/** + * A "SID long". + */ typedef char sidlongwrd[4]; +/** + * Header of a SID file. + */ struct header { + /** + * Magic string. + */ char magicid[4]; + + /** + * Version number. + */ sidwrd sidversion; + + /** + * Unknown. + */ sidwrd dataoffset; + + /** + * Unknown. + */ sidwrd loadaddr; + + /** + * Unknown. + */ sidwrd initaddr; + + /** + * Unknown. + */ sidwrd playaddr; + + /** + * Number of songs in file. + */ sidwrd songs; + + /** + * Starting song. + */ sidwrd firstsong; + + /** + * Unknown. + */ sidlongwrd speed; + + /** + * Title of the album. + */ char title[32]; + + /** + * Name of the artist. + */ char artist[32]; + + /** + * Copyright information. + */ char copyright[32]; - sidwrd flags; /* version 2 specific fields start */ + + /* version 2 specific fields start */ + + /** + * Flags + */ + sidwrd flags; + + /** + * Unknown. + */ char startpage; + + /** + * Unknown. + */ char pagelength; + + /** + * Unknown. + */ sidwrd reserved; }; + +/** + * Convert a 'sidword' to an integer. + * + * @param data the sidword + * @return corresponding integer value + */ static int sidword (const sidwrd data) { - int value = (unsigned char) data[0] * 0x100 + (unsigned char) data[1]; - return value; - + return (unsigned char) data[0] * 0x100 + (unsigned char) data[1]; } -#define ADD(s,t) do { if (0 != proc (proc_cls, "sid", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1)) return 1; } while (0) -/* "extract" keyword from a SID file - * - * This plugin is based on the nsf extractor +/** + * Give metadata to LE; return if 'proc' returns non-zero. * + * @param s metadata value as UTF8 + * @param t metadata type to use */ +#define ADD(s,t) do { if (0 != ec->proc (ec->cls, "sid", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1)) return; } while (0) + -int -EXTRACTOR_sid_extract (const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +/** + * Extract metadata from SID files. + * + * @param ec extraction context + */ +void +EXTRACTOR_sid_extract_method (struct EXTRACTOR_ExtractContext *ec) { unsigned int flags; int version; @@ -89,17 +174,19 @@ EXTRACTOR_sid_extract (const char *data, char startingsong[32]; char sidversion[32]; const struct header *head; + void *data; - /* Check header size */ - - if (size < SID1_HEADER_SIZE) - return 0; - head = (const struct header *) data; + if (sizeof (struct header) > + ec->read (ec->cls, + &data, + sizeof (struct header))) + return; + head = data; /* Check "magic" id bytes */ - if (memcmp (head->magicid, "PSID", 4) && - memcmp (head->magicid, "RSID", 4)) - return 0; + if ( (0 != memcmp (head->magicid, "PSID", 4)) && + (0 != memcmp (head->magicid, "RSID", 4)) ) + return; /* Mime-type */ ADD ("audio/prs.sid", EXTRACTOR_METATYPE_MIMETYPE); @@ -107,25 +194,24 @@ EXTRACTOR_sid_extract (const char *data, /* Version of SID format */ version = sidword (head->sidversion); snprintf (sidversion, - sizeof(sidversion), + sizeof (sidversion), "%d", version); ADD (sidversion, EXTRACTOR_METATYPE_FORMAT_VERSION); /* Get song count */ snprintf (songs, - sizeof(songs), + sizeof (songs), "%d", sidword (head->songs)); ADD (songs, EXTRACTOR_METATYPE_SONG_COUNT); /* Get number of the first song to be played */ snprintf (startingsong, - sizeof(startingsong), + sizeof (startingsong), "%d", sidword (head->firstsong)); ADD (startingsong, EXTRACTOR_METATYPE_STARTING_SONG); - /* name, artist, copyright fields */ memcpy (&album, head->title, 32); album[32] = '\0'; @@ -139,9 +225,8 @@ EXTRACTOR_sid_extract (const char *data, copyright[32] = '\0'; ADD (copyright, EXTRACTOR_METATYPE_COPYRIGHT); - - if ( (version < 2) || (size < SID2_HEADER_SIZE)) - return 0; + if (version < 2) + return; /* Version 2 specific options follow * @@ -150,42 +235,25 @@ EXTRACTOR_sid_extract (const char *data, */ flags = sidword (head->flags); /* MUS data */ - if (flags & MUSPLAYER_FLAG) + if (0 != (flags & MUSPLAYER_FLAG)) ADD ("Compute!'s Sidplayer", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE); /* PlaySID data */ - if (flags & PLAYSID_FLAG) + if (0 != (flags & PLAYSID_FLAG)) ADD ("PlaySID", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE); /* PAL or NTSC */ - - if (flags & PAL_FLAG) - { - if (flags & NTSC_FLAG) - ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); - else - ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); - } - else - { - if (flags & NTSC_FLAG) - ADD ("NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); - } + if (0 != (flags & NTSC_FLAG)) + ADD ("PAL/NTSC", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); + else if (0 != (flags & PAL_FLAG)) + ADD ("PAL", EXTRACTOR_METATYPE_BROADCAST_TELEVISION_SYSTEM); /* Detect SID Chips suitable for play the files */ - if (flags & MOS6581_FLAG) - { - if (flags & MOS8580_FLAG) - ADD ("MOS6581/MOS8580", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - else - ADD ("MOS6581", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - } - else - { - if (flags & MOS8580_FLAG) - ADD ("MOS8580", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); - } - - return 0; + if (0 != (flags & MOS8580_FLAG)) + ADD ("MOS6581/MOS8580", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); + else if (0 != (flags & MOS6581_FLAG)) + ADD ("MOS6581", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE); } + +/* end of sid_extractor.c */ diff --git a/src/plugins/test_deb.c b/src/plugins/test_deb.c @@ -0,0 +1,150 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ +/** + * @file plugins/test_deb.c + * @brief testcase for deb plugin + * @author Christian Grothoff + */ +#include "platform.h" +#include "test_lib.h" + + +/** + * Main function for the DEB testcase. + * + * @param argc number of arguments (ignored) + * @param argv arguments (ignored) + * @return 0 on success + */ +int +main (int argc, char *argv[]) +{ + struct SolutionData deb_bzip2_sol[] = + { + { + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "application/x-debian-package", + strlen ("application/x-debian-package") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PACKAGE_NAME, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "bzip2", + strlen ("bzip2") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PACKAGE_VERSION, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "1.0.6-4", + strlen ("1.0.6-4") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_TARGET_ARCHITECTURE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "i386", + strlen ("i386") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PACKAGE_MAINTAINER, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "Anibal Monsalve Salazar <anibal@debian.org>", + strlen ("Anibal Monsalve Salazar <anibal@debian.org>") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "113", /* FIXME: should this be 'kb'? */ + strlen ("113") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "libbz2-1.0 (= 1.0.6-4), libc6 (>= 2.4)", + strlen ("libbz2-1.0 (= 1.0.6-4), libc6 (>= 2.4)") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PACKAGE_SUGGESTS, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "bzip2-doc", + strlen ("bzip2-doc") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PACKAGE_REPLACES, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "libbz2 (<< 0.9.5d-3)", + strlen ("libbz2 (<< 0.9.5d-3)") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_SECTION, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "utils", + strlen ("utils") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_UPLOAD_PRIORITY, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "standard", + strlen ("standard") + 1, + 0 + }, +#if 0 + { + EXTRACTOR_METATYPE_DESCRIPTION, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "", + strlen ("") + 1, + 0 + }, +#endif + { 0, 0, NULL, NULL, 0, -1 } + }; + struct ProblemSet ps[] = + { + { "testdata/deb_bzip2.deb", + deb_bzip2_sol }, + { NULL, NULL } + }; + return ET_main ("deb", ps); +} + +/* end of test_deb.c */ diff --git a/src/plugins/test_odf.c b/src/plugins/test_odf.c @@ -0,0 +1,100 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ +/** + * @file plugins/test_odf.c + * @brief testcase for odf plugin + * @author Christian Grothoff + */ +#include "platform.h" +#include "test_lib.h" + + +/** + * Main function for the ODF testcase. + * + * @param argc number of arguments (ignored) + * @param argv arguments (ignored) + * @return 0 on success + */ +int +main (int argc, char *argv[]) +{ + struct SolutionData odf_cg_sol[] = + { + { + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "application/vnd.oasis.opendocument.text", + strlen ("application/vnd.oasis.opendocument.text") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "OpenOffice.org/3.2$Unix OpenOffice.org_project/320m12$Build-9483", + strlen ("OpenOffice.org/3.2$Unix OpenOffice.org_project/320m12$Build-9483") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_PAGE_COUNT, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "1", + strlen ("1") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_CREATION_DATE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "2005-11-22T11:44:00", + strlen ("2005-11-22T11:44:00") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_UNKNOWN_DATE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "2010-06-09T13:09:34", + strlen ("2010-06-09T13:09:34") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_TITLE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "Anhang 1: Profile der beteiligten Wissenschaftler", + strlen ("Anhang 1: Profile der beteiligten Wissenschaftler") + 1, + 0 + }, + { 0, 0, NULL, NULL, 0, -1 } + }; + struct ProblemSet ps[] = + { + { "testdata/odf_cg.odt", + odf_cg_sol }, + { NULL, NULL } + }; + return ET_main ("odf", ps); +} + +/* end of test_odf.c */ diff --git a/src/plugins/test_zip.c b/src/plugins/test_zip.c @@ -0,0 +1,108 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ +/** + * @file plugins/test_zip.c + * @brief testcase for zip plugin + * @author Christian Grothoff + */ +#include "platform.h" +#include "test_lib.h" + + +/** + * Main function for the ZIP testcase. + * + * @param argc number of arguments (ignored) + * @param argv arguments (ignored) + * @return 0 on success + */ +int +main (int argc, char *argv[]) +{ + struct SolutionData zip_test_sol[] = + { + { + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "application/zip", + strlen ("application/zip") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "global zipfile comment", + strlen ("global zipfile comment") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_FILENAME, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "ChangeLog", + strlen ("ChangeLog") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_FILENAME, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "test.png", + strlen ("test.png") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "comment for test.png", + strlen ("comment for test.png") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_FILENAME, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "test.jpg", + strlen ("test.jpg") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + "comment for test.jpg", + strlen ("comment for test.jpg") + 1, + 0 + }, + { 0, 0, NULL, NULL, 0, -1 } + }; + struct ProblemSet ps[] = + { + { "testdata/zip_test.zip", + zip_test_sol }, + { NULL, NULL } + }; + return ET_main ("zip", ps); +} + +/* end of test_zip.c */ diff --git a/src/plugins/testdata/deb_bzip2.deb b/src/plugins/testdata/deb_bzip2.deb Binary files differ. diff --git a/src/plugins/testdata/odf_cg.odt b/src/plugins/testdata/odf_cg.odt Binary files differ. diff --git a/src/plugins/testdata/zip_test.zip b/src/plugins/testdata/zip_test.zip Binary files differ. diff --git a/src/plugins/zip_extractor.c b/src/plugins/zip_extractor.c @@ -1,50 +1,10 @@ -/** - zipextractor.c version 0.0.2 - - Changes from 0.0.1 to 0.0.2 - -> Searches for central dir struct from end of file if this is a self-extracting executable - - - This file was based on mp3extractor.c (0.1.2) - - Currently, this only returns a list of the filenames within a zipfile - and any comments on each file or the whole file itself. File sizes, - modification times, and crc's are currently ignored. - - TODO: Break the comments up into small, atomically, searchable chunks (keywords) - - might need some knowledge of English? - - It returns: - - one EXTRACTOR_MIMETYPE - multiple EXTRACTOR_FILENAME - multiple EXTRACTOR_COMMENT - - ... from a .ZIP file - - TODO: EXTRACTOR_DATE, EXTRACTOR_DESCRIPTION, EXTRACTOR_KEYWORDS, others? - - Does NOT test data integrity (CRCs etc.) - - This version is not recursive (i.e. doesn't look inside zip - files within zip files) - - TODO: Run extract on files inside of archive (?) (i.e. gif, mp3, etc.) - - The current .ZIP format description: - ftp://ftp.pkware.com/appnote.zip - - No Copyright 2003 Julia Wolf - - */ - /* * This file is part of libextractor. - * (C) 2002, 2003, 2009 Vidyut Samanta and Christian Grothoff + * (C) 2012 Vidyut Samanta and Christian Grothoff * * libextractor is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published - * by the Free Software Foundation; either version 2, or (at your + * by the Free Software Foundation; either version 3, or (at your * option) any later version. * * libextractor is distributed in the hope that it will be useful, but @@ -57,354 +17,113 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ - +/** + * @file plugins/zip_extractor.c + * @brief plugin to support ZIP files + * @author Christian Grothoff + */ #include "platform.h" +#include <ctype.h> #include "extractor.h" - -#define DEBUG_EXTRACT_ZIP 0 - -/* In a zipfile there are two kinds of comments. One is a big one for the - entire .zip, it's usually a BBS ad. The other is a small comment on each - individual file; most people don't use this. - */ - -/* TODO: zip_entry linked list is handeled kinda messily, should clean up (maybe) */ - typedef struct -{ - char *filename; - char *comment; - void *next; - } zip_entry; +#include "unzip.h" -/* mimetype = application/zip */ -int -EXTRACTOR_zip_extract (const unsigned char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) -{ - int ret; - void *tmp; - zip_entry * info; - zip_entry * start; - char *filecomment; - const unsigned char *pos; - unsigned int offset, stop; - unsigned int name_length; - unsigned int extra_length; - unsigned int comment_length; - unsigned int filecomment_length; - unsigned int entry_count; -#if DEBUG_EXTRACT_ZIP - unsigned int entry_total; -#endif - - /* I think the smallest zipfile you can have is about 120 bytes */ - if ((NULL == data) || (size < 100)) - return 0; - if (! (('P' == data[0]) && ('K' == data[1]) && (0x03 == data[2]) - && (0x04 == data[3]))) - return 0; - - /* The filenames for each file in a zipfile are stored in two locations. - * There is one at the start of each entry, just before the compressed data, - * and another at the end in a 'central directory structure'. - * - * In order to catch self-extracting executables, we scan backwards from the end - * of the file looking for the central directory structure. The previous version - * of this went forewards through the local headers, but that only works for plain - * vanilla zip's and I don't feel like writing a special case for each of the dozen - * self-extracting executable stubs. - * - * This assumes that the zip file is considered to be non-corrupt/non-truncated. - * If it is truncated then it's not considered to be a zip and skipped. - * - */ - - /* From appnote.iz and appnote.txt (more or less) - * - * (this is why you always need to put in the last floppy if you span disks) - * - * 0- 3 end of central dir signature 4 bytes (0x06054b50) P K ^E ^F - * 4- 5 number of this disk 2 bytes - * 6- 7 number of the disk with the - * start of the central directory 2 bytes - * 8- 9 total number of entries in - * the central dir on this disk 2 bytes - * 10-11 total number of entries in - * the central dir 2 bytes - * 12-15 size of the central directory 4 bytes - * 16-19 offset of start of central - * directory with respect to - * the starting disk number 4 bytes - * 20-21 zipfile comment length 2 bytes - * 22-?? zipfile comment (variable size) max length 65536 bytes - */ - - /* the signature can't be more than 22 bytes from the end */ - offset = size - 22; - pos = &data[offset]; - stop = 0; - if (((signed int) size - 65556) > 0) - stop = size - 65556; - - /* not using int 0x06054b50 so that we don't have to deal with endianess issues. - break out if we go more than 64K backwards and havn't found it, or if we hit the - begining of the file. */ - while ((!(('P' == pos[0]) && ('K' == pos[1]) && (0x05 == pos[2]) - && (0x06 == pos[3]))) && (offset > stop)) - pos = &data[offset--]; - if (offset == stop) - { -#if DEBUG_EXTRACT_ZIP - fprintf (stderr, - "Did not find end of central directory structure signature. offset: %i\n", - offset); - -#endif - return 0; - } - /* offset should now point to the start of the end-of-central directory structure */ - /* and pos[0] should be pointing there too */ - /* so slurp down filecomment while here... */ - filecomment_length = pos[20] + (pos[21] << 8); - if (filecomment_length + offset + 22 > size) - { - return 0; /* invalid zip file format! */ - } - filecomment = NULL; - if (filecomment_length > 0) +/** + * Main entry method for the 'application/zip' extraction plugin. + * + * @param ec extraction context provided to the plugin + */ +void +EXTRACTOR_zip_extract_method (struct EXTRACTOR_ExtractContext *ec) +{ + struct EXTRACTOR_UnzipFile *uf; + struct EXTRACTOR_UnzipFileInfo fi; + char fname[256]; + char fcomment[256]; + + if (NULL == (uf = EXTRACTOR_common_unzip_open (ec))) + return; + if ( (EXTRACTOR_UNZIP_OK == + EXTRACTOR_common_unzip_go_find_local_file (uf, + "meta.xml", + 2)) || + (EXTRACTOR_UNZIP_OK == + EXTRACTOR_common_unzip_go_find_local_file (uf, + "META-INF/MANIFEST.MF", + 2)) ) { - filecomment = malloc (filecomment_length + 1); - if (filecomment != NULL) - { - memcpy (filecomment, &pos[22], filecomment_length); - filecomment[filecomment_length] = '\0'; - } + /* not a normal zip, might be odf, jar, etc. */ + goto CLEANUP; } - -#if DEBUG_EXTRACT_ZIP - if ((0 != pos[4]) && (0 != pos[5])) - fprintf (stderr, - "WARNING: This seems to be the last disk in a multi-volume" - " ZIP archive, and so this might not work.\n"); -#endif - -#if DEBUG_EXTRACT_ZIP - if ((pos[8] != pos[10]) && (pos[9] != pos[11])) - fprintf (stderr, - "WARNING: May not be able to find all the files in this" - " ZIP archive (no multi-volume support right now).\n"); - entry_total = pos[10] + (pos[11] << 8); -#endif - entry_count = 0; - - /* jump to start of central directory, ASSUMING that the starting disk that it's on is disk 0 */ - /* starting disk would otherwise be pos[6]+pos[7]<<8 */ - offset = pos[16] + (pos[17] << 8) + (pos[18] << 16) + (pos[19] << 24); /* offset of cent-dir from start of disk 0 */ - - /* stop = pos[12] + (pos[13]<<8) + (pos[14]<<16) + (pos[15]<<24); *//* length of central dir */ - if (offset + 46 > size) + if (EXTRACTOR_UNZIP_OK != + EXTRACTOR_common_unzip_go_to_first_file (uf)) + { + /* zip malformed? */ + goto CLEANUP; + } + if (0 != + ec->proc (ec->cls, + "zip", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "application/zip", + strlen ("application/zip") + 1)) + goto CLEANUP; + if (EXTRACTOR_UNZIP_OK == + EXTRACTOR_common_unzip_get_global_comment (uf, + fcomment, + sizeof (fcomment))) { - - /* not a zip */ - if (filecomment != NULL) - free (filecomment); - return 0; + if ( (0 != strlen (fcomment)) && + (0 != + ec->proc (ec->cls, + "zip", + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_C_STRING, + "text/plain", + fcomment, + strlen (fcomment) + 1))) + goto CLEANUP; } - pos = &data[offset]; /* jump */ - - /* we should now be at the begining of the central directory structure */ - - /* from appnote.txt and appnote.iz (mostly) - * - * 0- 3 central file header signature 4 bytes (0x02014b50) - * 4- 5 version made by 2 bytes - * 6- 7 version needed to extract 2 bytes - * 8- 9 general purpose bit flag 2 bytes - * 10-11 compression method 2 bytes - * 12-13 last mod file time 2 bytes - * 14-15 last mod file date 2 bytes - * 16-19 crc-32 4 bytes - * 20-23 compressed size 4 bytes - * 24-27 uncompressed size 4 bytes - * 28-29 filename length 2 bytes - * 30-31 extra field length 2 bytes - * 32-33 file comment length 2 bytes - * 34-35 disk number start 2 bytes - * 36-37 internal file attributes 2 bytes - * 38-41 external file attributes 4 bytes - * 42-45 relative offset of local header 4 bytes - * - * 46-?? filename (variable size) - * ?- ? extra field (variable size) - * ?- ? file comment (variable size) - */ - if (!(('P' == pos[0]) && ('K' == pos[1]) && (0x01 == pos[2]) - && (0x02 == pos[3]))) - { -#if DEBUG_EXTRACT_ZIP - fprintf (stderr, - "Did not find central directory structure signature. offset: %i\n", - offset); - -#endif - if (filecomment != NULL) - free (filecomment); - return 0; - } - start = NULL; - info = NULL; - do - { /* while ( (0x01==pos[2])&&(0x02==pos[3]) ) */ - entry_count++; /* check to make sure we found everything at the end */ - name_length = pos[28] + (pos[29] << 8); - extra_length = pos[30] + (pos[31] << 8); - comment_length = pos[32] + (pos[33] << 8); - if (name_length + extra_length + comment_length + offset + 46 > size) - { - - /* ok, invalid, abort! */ - break; - } - -#if DEBUG_EXTRACT_ZIP - fprintf (stderr, "Found filename length %i Comment length: %i\n", - name_length, comment_length); - -#endif - /* yay, finally get filenames */ - if (start == NULL) - { - start = malloc (sizeof (zip_entry)); - if (start == NULL) - break; - start->next = NULL; - info = start; - } - else - { - info->next = malloc (sizeof (zip_entry)); - if (info->next == NULL) - break; - info = info->next; - info->next = NULL; - } - info->filename = malloc (name_length + 1); - info->comment = malloc (comment_length + 1); - - /* (strings in zip files are not null terminated) */ - if (info->filename != NULL) - { - memcpy (info->filename, &pos[46], name_length); - info->filename[name_length] = '\0'; - } - if (info->comment != NULL) - { - memcpy (info->comment, &pos[46 + name_length + extra_length], - comment_length); - info->comment[comment_length] = '\0'; - } - offset += 46 + name_length + extra_length + comment_length; - pos = &data[offset]; - /* check for next header entry (0x02014b50) or (0x06054b50) if at end */ - if (('P' != pos[0]) && ('K' != pos[1])) - { -#if DEBUG_EXTRACT_ZIP - fprintf (stderr, - "Did not find next header in central directory.\n"); - -#endif - info = start; - while (info != NULL) - { - start = info->next; - if (info->filename != NULL) - free (info->filename); - if (info->comment != NULL) - free (info->comment); - free (info); - info = start; - } - if (filecomment != NULL) - free (filecomment); - return 0; - } - } - while ((0x01 == pos[2]) && (0x02 == pos[3])); - - /* end list */ - - /* TODO: should this return an error? indicates corrupt zipfile (or - disk missing in middle of multi-disk)? */ -#if DEBUG_EXTRACT_ZIP - if (entry_count != entry_total) - fprintf (stderr, - "WARNING: Did not find all of the zipfile entries that we should have.\n"); -#endif - - ret = proc (proc_cls, - "zip", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - "application/zip", - strlen ("application/zip")+1); - if ( (filecomment != NULL) && (ret != 0) ) - { - ret = proc (proc_cls, - "zip", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - filecomment, - strlen (filecomment)+1); - } - if (filecomment != NULL) - free (filecomment); - - - /* if we've gotten to here then there is at least one zip entry (see get_zipinfo call above) */ - /* note: this free()'s the info list as it goes */ - info = start; - while (NULL != info) { - if (info->filename != NULL) - { - if ( (ret == 0) && (strlen (info->filename)) ) - { - ret = proc (proc_cls, + if (EXTRACTOR_UNZIP_OK == + EXTRACTOR_common_unzip_get_current_file_info (uf, + &fi, + fname, + sizeof (fname), + NULL, 0, + fcomment, + sizeof (fcomment))) + { + if ( (0 != strlen (fname)) && + (0 != + ec->proc (ec->cls, "zip", EXTRACTOR_METATYPE_FILENAME, - EXTRACTOR_METAFORMAT_UTF8, + EXTRACTOR_METAFORMAT_C_STRING, "text/plain", - info->filename, - strlen (info->filename)+1); - } - free (info->filename); - } - if (info->comment != NULL) - { - if ( (ret == 0) && (strlen (info->comment) > 0) ) - { - ret = proc (proc_cls, + fname, + strlen (fname) + 1))) + goto CLEANUP; + if ( (0 != strlen (fcomment)) && + (0 != + ec->proc (ec->cls, "zip", - EXTRACTOR_METATYPE_FILENAME, - EXTRACTOR_METAFORMAT_UTF8, + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_C_STRING, "text/plain", - info->comment, - strlen (info->comment)+1); - } - free (info->comment); - } - tmp = info; - info = info->next; - free (tmp); + fcomment, + strlen (fcomment) + 1))) + goto CLEANUP; + } } - return ret; + while (EXTRACTOR_UNZIP_OK == + EXTRACTOR_common_unzip_go_to_next_file (uf)); + +CLEANUP: + (void) EXTRACTOR_common_unzip_close (uf); } - +/* end of zip_extractor.c */