libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 5f10e5b8eadbd862aeb78ebf3b6aa2a425372392
parent 50ee542a691d498bf546661702e90ed57b28664e
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sun, 12 Aug 2012 12:50:46 +0000

porting PNG plugin

Diffstat:
Msrc/common/pack.c | 4+++-
Msrc/common/unzip.c | 4++--
Msrc/common/unzip.h | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Msrc/plugins/Makefile.am | 18+++++++++++++++++-
Msrc/plugins/odf_extractor.c | 454+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Msrc/plugins/png_extractor.c | 421++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Msrc/plugins/test_jpeg.c | 2+-
Asrc/plugins/test_png.c | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Rtest/test.png -> src/plugins/testdata/png_image.png | 0
9 files changed, 689 insertions(+), 423 deletions(-)

diff --git a/src/common/pack.c b/src/common/pack.c @@ -39,7 +39,9 @@ typedef signed int sword; int -EXTRACTOR_common_cat_unpack (const void *buf, const char *fmt, ...) +EXTRACTOR_common_cat_unpack (const void *buf, + const char *fmt, + ...) { va_list ap; word maxlen, len, *wordp; diff --git a/src/common/unzip.c b/src/common/unzip.c @@ -4,7 +4,7 @@ libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -695,7 +695,7 @@ EXTRACTOR_common_unzip_close_current_file (EXTRACTOR_unzip_file file) (!pfile_in_zip_read_info->raw)) { if (pfile_in_zip_read_info->crc32 != pfile_in_zip_read_info->crc32_wait) - err=EXTRACTOR_UNZIP_CRCERROR; + err=EXTRACTOR_UNZIP_CRCERROR; } diff --git a/src/common/unzip.h b/src/common/unzip.h @@ -1,10 +1,10 @@ -/* +]/* This file is part of libextractor. (C) 2008 Christian Grothoff (and other contributing authors) libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -34,18 +34,20 @@ typedef voidp EXTRACTOR_unzip_file; + typedef struct EXTRACTOR_unzip_filefunc_def_s { - voidpf ( *zopen_file) (voidpf opaque, const char* filename, int mode); - uLong ( *zread_file) (voidpf opaque, voidpf stream, void* buf, uLong size); - uLong ( *zwrite_file) (voidpf opaque, voidpf stream, const void* buf, uLong size); - long ( *ztell_file) (voidpf opaque, voidpf stream); - long ( *zseek_file) (voidpf opaque, voidpf stream, uLong offset, int origin); - int ( *zclose_file) (voidpf opaque, voidpf stream); - int ( *zerror_file) (voidpf opaque, voidpf stream); + voidpf ( *zopen_file) (voidpf opaque, const char* filename, int mode); + uLong ( *zread_file) (voidpf opaque, voidpf stream, void* buf, uLong size); + uLong ( *zwrite_file) (voidpf opaque, voidpf stream, const void* buf, uLong size); + long ( *ztell_file) (voidpf opaque, voidpf stream); + long ( *zseek_file) (voidpf opaque, voidpf stream, uLong offset, int origin); + int ( *zclose_file) (voidpf opaque, voidpf stream); + int ( *zerror_file) (voidpf opaque, voidpf stream); voidpf opaque; } EXTRACTOR_unzip_filefunc_def; + /* tm_unz contain date/time info */ typedef struct EXTRACTOR_unzip_tm_unz_s { @@ -57,6 +59,7 @@ typedef struct EXTRACTOR_unzip_tm_unz_s uInt tm_year; /* years - [1980..2044] */ } EXTRACTOR_unzip_tm_unz; + /* unz_file_info contain information about a file in the zipfile */ typedef struct EXTRACTOR_unzip_file_info_s { @@ -79,48 +82,96 @@ typedef struct EXTRACTOR_unzip_file_info_s EXTRACTOR_unzip_tm_unz tmu_date; } EXTRACTOR_unzip_file_info; -int EXTRACTOR_common_unzip_string_file_name_compare(const char* fileName1, - const char* fileName2, int iCaseSensitivity); -int EXTRACTOR_common_unzip_go_to_first_file(EXTRACTOR_unzip_file file); +int +EXTRACTOR_common_unzip_string_file_name_compare (const char* fileName1, + const char* fileName2, + int iCaseSensitivity); + + +int +EXTRACTOR_common_unzip_go_to_first_file (EXTRACTOR_unzip_file file); + + +EXTRACTOR_unzip_file +EXTRACTOR_common_unzip_open2 (const char *path, + EXTRACTOR_unzip_filefunc_def* pzlib_filefunc_def); + + +int +EXTRACTOR_common_unzip_close_current_file (EXTRACTOR_unzip_file file); + + +int +EXTRACTOR_common_unzip_close (EXTRACTOR_unzip_file file); + + +int +EXTRACTOR_common_unzip_get_current_file_info (EXTRACTOR_unzip_file file, + EXTRACTOR_unzip_file_info *pfile_info, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize); + + +int +EXTRACTOR_common_unzip_go_to_next_file (EXTRACTOR_unzip_file file); + + +int +EXTRACTOR_common_unzip_local_file (EXTRACTOR_unzip_file file, + const char *szFileName, + int iCaseSensitivity); + + +int +EXTRACTOR_common_unzip_read_current_file (EXTRACTOR_unzip_file file, + voidp buf, + unsigned len); -EXTRACTOR_unzip_file EXTRACTOR_common_unzip_open2(const char *path, - EXTRACTOR_unzip_filefunc_def* pzlib_filefunc_def); -int EXTRACTOR_common_unzip_close_current_file(EXTRACTOR_unzip_file file); +int +EXTRACTOR_common_unzip_open_current_file3 (EXTRACTOR_unzip_file file, + int* method, + int* level, + int raw); -int EXTRACTOR_common_unzip_close(EXTRACTOR_unzip_file file); -int EXTRACTOR_common_unzip_get_current_file_info(EXTRACTOR_unzip_file file, - EXTRACTOR_unzip_file_info *pfile_info, char *szFileName, uLong fileNameBufferSize, - void *extraField, uLong extraFieldBufferSize, char *szComment, - uLong commentBufferSize); +voidpf +EXTRACTOR_common_unzip_zlib_open_file_func (voidpf opaque, + const char* filename, + int mode); -int EXTRACTOR_common_unzip_go_to_next_file(EXTRACTOR_unzip_file file); -int EXTRACTOR_common_unzip_local_file(EXTRACTOR_unzip_file file, const char *szFileName, - int iCaseSensitivity); +uLong +EXTRACTOR_common_unzip_zlib_read_file_func (voidpf opaque, + voidpf stream, + void* buf, + uLong size); -int EXTRACTOR_common_unzip_read_current_file(EXTRACTOR_unzip_file file, voidp buf, - unsigned len); -int EXTRACTOR_common_unzip_open_current_file3(EXTRACTOR_unzip_file file, int* method, - int* level, int raw); +long +EXTRACTOR_common_unzip_zlib_tell_file_func (voidpf opaque, + voidpf stream); -voidpf EXTRACTOR_common_unzip_zlib_open_file_func(voidpf opaque, - const char* filename, int mode); -uLong EXTRACTOR_common_unzip_zlib_read_file_func(voidpf opaque, voidpf stream, - void* buf, uLong size); +long +EXTRACTOR_common_unzip_zlib_seek_file_func (voidpf opaque, + voidpf stream, + uLong offset, + int origin); -long EXTRACTOR_common_unzip_zlib_tell_file_func(voidpf opaque, voidpf stream); -long EXTRACTOR_common_unzip_zlib_seek_file_func(voidpf opaque, voidpf stream, - uLong offset, int origin); +int +EXTRACTOR_common_unzip_zlib_close_file_func (voidpf opaque, + voidpf stream); -int EXTRACTOR_common_unzip_zlib_close_file_func(voidpf opaque, voidpf stream); -int EXTRACTOR_common_unzip_zlib_testerror_file_func(voidpf opaque, - voidpf stream); +int +EXTRACTOR_common_unzip_zlib_testerror_file_func (voidpf opaque, + voidpf stream); #endif /* UNZIP_H_ */ diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -29,7 +29,8 @@ EXTRA_DIST = template_extractor.c \ testdata/ole2_msword.doc \ testdata/ole2_starwriter40.sdw \ testdata/ole2_blair.doc \ - testdata/ole2_excel.xls + testdata/ole2_excel.xls \ + testdata/png_image.png if HAVE_VORBISFILE PLUGIN_OGG=libextractor_ogg.la @@ -79,6 +80,7 @@ endif plugin_LTLIBRARIES = \ libextractor_it.la \ + libextractor_png.la \ libextractor_xm.la \ libextractor_s3m.la \ libextractor_wav.la \ @@ -100,6 +102,7 @@ check_PROGRAMS = \ test_wav \ test_it \ test_s3m \ + test_png \ $(TEST_OGG) \ $(TEST_MIME) \ $(TEST_GIF) \ @@ -130,6 +133,19 @@ libextractor_xm_la_LDFLAGS = \ $(PLUGINFLAGS) +libextractor_png_la_SOURCES = \ + png_extractor.c +libextractor_png_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_png_la_LIBADD = \ + $(top_builddir)/src/common/libextractor_common.la + +test_png_SOURCES = \ + test_png.c +test_png_LDADD = \ + $(top_builddir)/src/plugins/libtest.la + + libextractor_it_la_SOURCES = \ it_extractor.c libextractor_it_la_LDFLAGS = \ diff --git a/src/plugins/odf_extractor.c b/src/plugins/odf_extractor.c @@ -1,10 +1,10 @@ /* This file is part of libextractor. - (C) 2004, 2009 Vidyut Samanta and Christian Grothoff + (C) 2004, 2009, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -17,28 +17,53 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +/** + * @file plugins/odf_extractor.c + * @brief plugin to support ODF files + * @author Christian Grothoff + */ #include "platform.h" #include <ctype.h> #include "extractor.h" -#include "zlib.h" #include "unzip.h" -#define CASESENSITIVITY (0) -#define MAXFILENAME (256) +/** + * Should filenames be treated as case sensitive? + */ +#define CASESENSITIVITY 0 +/** + * Maximum length of a filename allowed inside the ZIP archive. + */ +#define MAXFILENAME 256 /** * Name of the file with the meta-data in OO documents. */ #define METAFILE "meta.xml" -typedef struct { + +/** + * Mapping from ODF meta data strings to LE types. + */ +struct Matches +{ + /** + * ODF description. + */ const char * text; + + /** + * Corresponding LE type. + */ enum EXTRACTOR_MetaType type; -} Matches; +}; + -static Matches tmap[] = { +/** + * NULL-terminated map from ODF meta data strings to LE types. + */ +static struct Matches tmap[] = { { "meta:generator", EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, { "meta:page-count", EXTRACTOR_METATYPE_PAGE_COUNT }, { "meta:creation-date", EXTRACTOR_METATYPE_CREATION_DATE }, @@ -53,101 +78,106 @@ static Matches tmap[] = { { "meta:user-defined meta:name=\"Info 2\"", EXTRACTOR_METATYPE_COMMENT }, { "meta:user-defined meta:name=\"Info 3\"", EXTRACTOR_METATYPE_COMMENT }, { "meta:user-defined meta:name=\"Info 4\"", EXTRACTOR_METATYPE_COMMENT }, - { NULL, 0 }, + { NULL, 0 } }; /** - * returns either zero when mimetype info is missing - * or an already malloc'ed string containing the mimetype info. + * Obtain the mimetype of the archive by reading the 'mimetype' + * file of the ZIP. + * + * @param uf unzip context to extract the mimetype from + * @return NULL if no mimetype could be found, otherwise the mime type */ static char * -libextractor_oo_getmimetype(EXTRACTOR_unzip_file uf) { +libextractor_oo_getmimetype (EXTRACTOR_unzip_file uf) +{ char filename_inzip[MAXFILENAME]; EXTRACTOR_unzip_file_info file_info; - char * buf = NULL; - size_t buf_size = 0; + char *buf; + size_t buf_size; - if (EXTRACTOR_UNZIP_OK != EXTRACTOR_common_unzip_local_file(uf, - "mimetype", - CASESENSITIVITY)) + if (EXTRACTOR_UNZIP_OK != + EXTRACTOR_common_unzip_local_file (uf, + "mimetype", + CASESENSITIVITY)) return NULL; - if ( (EXTRACTOR_UNZIP_OK == EXTRACTOR_common_unzip_get_current_file_info(uf, - &file_info, - filename_inzip, - sizeof(filename_inzip), - NULL, - 0, - NULL, - 0) && - (EXTRACTOR_UNZIP_OK == EXTRACTOR_common_unzip_open_current_file3(uf, NULL, NULL, 0)) ) ) { - buf_size = file_info.uncompressed_size; - - if (buf_size > 1024) - { - /* way too large! */ - } - else if (NULL == (buf = malloc(1 + buf_size))) - { - /* memory exhausted! */ - } - else if (buf_size != (size_t) EXTRACTOR_common_unzip_read_current_file(uf,buf,buf_size)) - { - free(buf); - buf = NULL; - } - else - { - /* found something */ - buf[buf_size] = '\0'; - while ( (0 < buf_size) && - isspace( (unsigned char) buf[buf_size - 1])) - buf[--buf_size] = '\0'; - if ('\0' == buf[0]) - { - free(buf); - buf = NULL; - } - } - } - EXTRACTOR_common_unzip_close_current_file(uf); + if (EXTRACTOR_UNZIP_OK != + EXTRACTOR_common_unzip_get_current_file_info (uf, + &file_info, + filename_inzip, + sizeof (filename_inzip), + NULL, + 0, + NULL, + 0)) + return NULL; + if (EXTRACTOR_UNZIP_OK != + EXTRACTOR_common_unzip_open_current_file3 (uf, NULL, NULL, 0)) + { + EXTRACTOR_common_unzip_close_current_file (uf); + return NULL; + } + buf_size = file_info.uncompressed_size; + if (buf_size > 1024) + { + /* way too large! */ + EXTRACTOR_common_unzip_close_current_file (uf); + return NULL; + } + if (NULL == (buf = malloc (1 + buf_size))) + { + /* memory exhausted! */ + EXTRACTOR_common_unzip_close_current_file (uf); + return NULL; + } + if (buf_size != + (size_t) EXTRACTOR_common_unzip_read_current_file (uf, + buf, + buf_size)) + { + free(buf); + EXTRACTOR_common_unzip_close_current_file(uf); + return NULL; + } + /* found something */ + buf[buf_size] = '\0'; + while ( (0 < buf_size) && + isspace( (unsigned char) buf[buf_size - 1])) + buf[--buf_size] = '\0'; + if ('\0' == buf[0]) + { + free (buf); + buf = NULL; + } + EXTRACTOR_common_unzip_close_current_file (uf); return buf; } -typedef struct Ecls { - char * data; - size_t size; - size_t pos; -} Ecls; - - -int -EXTRACTOR_odf_extract (const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) +/** + * Main entry method for the ODF extraction plugin. + * + * @param ec extraction context provided to the plugin + */ +void +EXTRACTOR_odf_extract_method (struct EXTRACTOR_ExtractContext *ec) { char filename_inzip[MAXFILENAME]; EXTRACTOR_unzip_file uf; EXTRACTOR_unzip_file_info file_info; - char * buf; - char * pbuf; + char *buf; + char *pbuf; size_t buf_size; - int i; + unsigned int i; EXTRACTOR_unzip_filefunc_def io; - Ecls cls; - char * mimetype; + char *mimetype; if (size < 100) return 0; if ( !( ('P'==data[0]) && ('K'==data[1]) && (0x03==data[2]) && (0x04==data[3])) ) return 0; - cls.data = (void*) data; - cls.size = size; - cls.pos = 0; io.zopen_file = &EXTRACTOR_common_unzip_zlib_open_file_func; io.zread_file = &EXTRACTOR_common_unzip_zlib_read_file_func; io.zwrite_file = NULL; @@ -155,151 +185,161 @@ EXTRACTOR_odf_extract (const char *data, io.zseek_file = &EXTRACTOR_common_unzip_zlib_seek_file_func; io.zclose_file = &EXTRACTOR_common_unzip_zlib_close_file_func; io.zerror_file = &EXTRACTOR_common_unzip_zlib_testerror_file_func; - io.opaque = &cls; - - uf = EXTRACTOR_common_unzip_open2("ERROR", &io); - if (uf == NULL) - return 0; - mimetype = libextractor_oo_getmimetype(uf); - if ( (NULL != mimetype) && - (0 != proc (proc_cls, - "deb", - EXTRACTOR_METATYPE_MIMETYPE, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - mimetype, - strlen (mimetype)+1)) ) + io.opaque = ec; + + if (NULL == (uf = EXTRACTOR_common_unzip_open2 ("ERROR", &io))) + return; + if (NULL != (mimetype = libextractor_oo_getmimetype (uf))) { - EXTRACTOR_common_unzip_close(uf); + if (0 != proc (proc_cls, + "deb", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + mimetype, + strlen (mimetype) + 1)) + { + EXTRACTOR_common_unzip_close (uf); + free (mimetype); + return; + } free (mimetype); - return 1; } - free (mimetype); - if (EXTRACTOR_common_unzip_local_file(uf, - METAFILE, - CASESENSITIVITY) != EXTRACTOR_UNZIP_OK) { - EXTRACTOR_common_unzip_close(uf); - return 0; /* not found */ - } - + if (EXTRACTOR_UNZIP_OK != + EXTRACTOR_common_unzip_local_file (uf, + METAFILE, + CASESENSITIVITY)) + { + /* metafile not found */ + EXTRACTOR_common_unzip_close (uf); + return; + } if (EXTRACTOR_UNZIP_OK != - EXTRACTOR_common_unzip_get_current_file_info(uf, - &file_info, - filename_inzip, - sizeof(filename_inzip), - NULL,0,NULL,0)) { - EXTRACTOR_common_unzip_close(uf); - return 0; /* problems... */ - } - - if (EXTRACTOR_UNZIP_OK != EXTRACTOR_common_unzip_open_current_file3(uf, NULL, NULL, 0)) { - EXTRACTOR_common_unzip_close(uf); - return 0; /* problems... */ - } + EXTRACTOR_common_unzip_get_current_file_info (uf, + &file_info, + filename_inzip, + sizeof (filename_inzip), + NULL,0,NULL,0)) + { + /* problems accessing metafile */ + EXTRACTOR_common_unzip_close (uf); + return; + } + if (EXTRACTOR_UNZIP_OK != + EXTRACTOR_common_unzip_open_current_file3 (uf, NULL, NULL, 0)) + { + /* problems with unzip */ + EXTRACTOR_common_unzip_close (uf); + return; + } buf_size = file_info.uncompressed_size; - if (buf_size > 128 * 1024) { - EXTRACTOR_common_unzip_close_current_file(uf); - EXTRACTOR_common_unzip_close(uf); - return 0; /* hardly meta-data! */ - } - buf = malloc(buf_size+1); - if (buf == NULL) + if (buf_size > 128 * 1024) { - EXTRACTOR_common_unzip_close_current_file(uf); - EXTRACTOR_common_unzip_close(uf); - return 0; /* out of memory */ + /* too big to be meta-data! */ + EXTRACTOR_common_unzip_close_current_file (uf); + EXTRACTOR_common_unzip_close (uf); + return; } - - if (buf_size != EXTRACTOR_common_unzip_read_current_file(uf,buf,buf_size)) + if (NULL == (buf = malloc (buf_size+1))) { - free(buf); - EXTRACTOR_common_unzip_close_current_file(uf); - EXTRACTOR_common_unzip_close(uf); - return 0; + /* out of memory */ + EXTRACTOR_common_unzip_close_current_file (uf); + EXTRACTOR_common_unzip_close (uf); + return; } - EXTRACTOR_common_unzip_close_current_file(uf); + if (buf_size != EXTRACTOR_common_unzip_read_current_file (uf, buf, buf_size)) + { + EXTRACTOR_common_unzip_close_current_file (uf); + goto CLEANUP; + } + EXTRACTOR_common_unzip_close_current_file (uf); /* we don't do "proper" parsing of the meta-data but rather use some heuristics to get values out that we understand */ buf[buf_size] = '\0'; /* printf("%s\n", buf); */ /* try to find some of the typical OO xml headers */ - if ( (strstr(buf, "xmlns:meta=\"http://openoffice.org/2000/meta\"") != NULL) || - (strstr(buf, "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"") != NULL) || - (strstr(buf, "xmlns:xlink=\"http://www.w3.org/1999/xlink\"") != NULL) ) { - /* accept as meta-data */ - i = -1; - while (tmap[++i].text != NULL) { - char * spos; - char * epos; - char needle[256]; - int oc; - - pbuf = buf; - - while (1) { - strcpy(needle, "<"); - strcat(needle, tmap[i].text); - strcat(needle, ">"); - spos = strstr(pbuf, needle); - if (NULL == spos) { - strcpy(needle, tmap[i].text); - strcat(needle, "=\""); - spos = strstr(pbuf, needle); - if (spos == NULL) - break; - spos += strlen(needle); - epos = spos; - while ( (epos[0] != '\0') && - (epos[0] != '"') ) - epos++; - } else { - oc = 0; - spos += strlen(needle); - while ( (spos[0] != '\0') && - ( (spos[0] == '<') || - (oc > 0) ) ) { - if (spos[0] == '<') - oc++; - if (spos[0] == '>') - oc--; - spos++; - } - epos = spos; - while ( (epos[0] != '\0') && - (epos[0] != '<') && - (epos[0] != '>') ) { - epos++; - } + if ( (strstr (buf, "xmlns:meta=\"http://openoffice.org/2000/meta\"") != NULL) || + (strstr (buf, "xmlns:dc=\"http://purl.org/dc/elements/1.1/\"") != NULL) || + (strstr (buf, "xmlns:xlink=\"http://www.w3.org/1999/xlink\"") != NULL) ) + { + /* accept as meta-data */ + for (i = 0; NULL != tmap[i].text; i++) + { + char * spos; + char * epos; + char needle[256]; + int oc; + + pbuf = buf; + + while (1) + { + strcpy(needle, "<"); + strcat(needle, tmap[i].text); + strcat(needle, ">"); + spos = strstr(pbuf, needle); + if (NULL == spos) + { + strcpy(needle, tmap[i].text); + strcat(needle, "=\""); + spos = strstr(pbuf, needle); + if (spos == NULL) + break; + spos += strlen(needle); + epos = spos; + while ( (epos[0] != '\0') && + (epos[0] != '"') ) + epos++; + } + else + { + oc = 0; + spos += strlen(needle); + while ( (spos[0] != '\0') && + ( (spos[0] == '<') || + (oc > 0) ) ) + { + if (spos[0] == '<') + oc++; + if (spos[0] == '>') + oc--; + spos++; + } + epos = spos; + while ( (epos[0] != '\0') && + (epos[0] != '<') && + (epos[0] != '>') ) + { + epos++; + } + } + if (spos != epos) + { + char key[epos - spos + 1]; + + memcpy(key, spos, epos-spos); + key[epos-spos] = '\0'; + if (0 != proc (proc_cls, + "odf", + tmap[i].type, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + key, + epos - spos + 1)) + { + goto CLEANUP; + } + pbuf = epos; + } + else + break; + } } - if (spos != epos) - { - char key[epos - spos + 1]; - - memcpy(key, spos, epos-spos); - key[epos-spos] = '\0'; - if (0 != proc (proc_cls, - "odf", - tmap[i].type, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - key, - epos - spos + 1)) - { - free(buf); - EXTRACTOR_common_unzip_close(uf); - return 1; - } - pbuf = epos; - } - else - break; - } } - } - free(buf); - EXTRACTOR_common_unzip_close(uf); - return 0; + CLEANUP: + free (buf); + EXTRACTOR_common_unzip_close (uf); } - +/* end of odf_extractor.c */ diff --git a/src/plugins/png_extractor.c b/src/plugins/png_extractor.c @@ -1,10 +1,10 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2009 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2009, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -17,127 +17,194 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - +/** + * @file plugins/png_extractor.c + * @brief plugin to support PNG files + * @author Christian Grothoff + */ #include "platform.h" -#include "extractor.h" #include <zlib.h> +#include "extractor.h" #include "convert.h" -#include "extractor_plugins.h" +/** + * Header that every PNG file must start with. + */ +#define PNG_HEADER "\211PNG\r\n\032\n" +/** + * Function to create 0-terminated string from the + * first n characters of the given input. + * + * @param str input string + * @param n length of the input + * @return n-bytes from str followed by 0-termination, NULL on error + */ static char * -stndup (const char *str, size_t n) +stndup (const char *str, + size_t n) { char *tmp; - tmp = malloc (n + 1); - if (tmp == NULL) + + if (NULL == (tmp = malloc (n + 1))) return NULL; tmp[n] = '\0'; memcpy (tmp, str, n); return tmp; } + /** * strnlen is GNU specific, let's redo it here to be * POSIX compliant. + * + * @param str input string + * @param maxlen maximum length of str + * @return first position of 0-terminator in str, or maxlen */ static size_t -stnlen (const char *str, size_t maxlen) +stnlen (const char *str, + size_t maxlen) { size_t ret; + ret = 0; - while ((ret < maxlen) && (str[ret] != '\0')) + while ( (ret < maxlen) && + ('\0' != str[ret]) ) ret++; return ret; } -static int -getIntAt (const void *pos) +/** + * Interpret the 4 bytes in 'buf' as a big-endian + * encoded 32-bit integer, convert and return. + * + * @param pos (unaligned) pointer to 4 byte integer + * @return converted integer in host byte order + */ +static uint32_t +get_int_at (const void *pos) { - char p[4]; + uint32_t i; - memcpy (p, pos, 4); /* ensure alignment! */ - return *(int *) &p[0]; + memcpy (&i, pos, sizeof (i)); + return htonl (i); } +/** + * Map from PNG meta data descriptor strings + * to LE types. + */ static struct { - char *name; + /** + * PNG name. + */ + const char *name; + + /** + * Corresponding LE type. + */ enum EXTRACTOR_MetaType type; } tagmap[] = { - { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME}, - { "Description", EXTRACTOR_METATYPE_DESCRIPTION}, - { "Comment", EXTRACTOR_METATYPE_COMMENT}, - { "Copyright", EXTRACTOR_METATYPE_COPYRIGHT}, + { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME }, + { "Description", EXTRACTOR_METATYPE_DESCRIPTION }, + { "Comment", EXTRACTOR_METATYPE_COMMENT }, + { "Copyright", EXTRACTOR_METATYPE_COPYRIGHT }, { "Source", EXTRACTOR_METATYPE_SOURCE_DEVICE }, - { "Creation Time", EXTRACTOR_METATYPE_CREATION_DATE}, - { "Title", EXTRACTOR_METATYPE_TITLE}, - { "Software", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE}, - { "Disclaimer", EXTRACTOR_METATYPE_DISCLAIMER}, - { "Warning", EXTRACTOR_METATYPE_WARNING}, + { "Creation Time", EXTRACTOR_METATYPE_CREATION_DATE }, + { "Title", EXTRACTOR_METATYPE_TITLE }, + { "Software", EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE }, + { "Disclaimer", EXTRACTOR_METATYPE_DISCLAIMER }, + { "Warning", EXTRACTOR_METATYPE_WARNING }, + { "Signature", EXTRACTOR_METATYPE_UNKNOWN }, { NULL, EXTRACTOR_METATYPE_RESERVED } }; -#define ADD(t,s) do { if (0 != (ret = proc (proc_cls, "png", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) goto FINISH; } while (0) -#define ADDF(t,s) do { if ( (s != NULL) && (0 != (ret = proc (proc_cls, "png", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen(s)+1))) ) { free(s); goto FINISH; } if (s != NULL) free (s); } while (0) +/** + * Give the given metadata to LE. Set "ret" to 1 and + * goto 'FINISH' if LE says we are done. + * + * @param t type of the metadata + * @param s utf8 string with the metadata + */ +#define ADD(t,s) do { if (0 != (ret = ec->proc (ec->cls, "png", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1))) goto FINISH; } while (0) + +/** + * Give the given metadata to LE and free the memory. Set "ret" to 1 and + * goto 'FINISH' if LE says we are done. + * + * @param t type of the metadata + * @param s utf8 string with the metadata, to be freed afterwards + */ +#define ADDF(t,s) do { if ( (NULL != s) && (0 != (ret = ec->proc (ec->cls, "png", t, EXTRACTOR_METAFORMAT_UTF8, "text/plain", s, strlen (s) + 1))) ) { free (s); goto FINISH; } if (NULL != s) free (s); } while (0) + +/** + * Process EXt tag. + * + * @param ec extraction context + * @param length length of the tag + * @return 0 to continue extracting, 1 if we are done + */ static int -processtEXt (struct EXTRACTOR_PluginList *plugin, - unsigned int length, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +processtEXt (struct EXTRACTOR_ExtractContext *ec, + uint32_t length) { + void *ptr; unsigned char *data; char *keyword; - unsigned int off; - int i; + size_t off; + unsigned int i; int ret; - if (length != pl_read (plugin, &data, length)) + if (length != ec->read (ec->cls, &ptr, length)) return 1; - - //data += 4; + data = ptr; off = stnlen ((char*) data, length) + 1; if (off >= length) return 0; /* failed to find '\0' */ - keyword = EXTRACTOR_common_convert_to_utf8 ( (char*) &data[off], length - off, "ISO-8859-1"); - if (keyword == NULL) + if (NULL == (keyword = EXTRACTOR_common_convert_to_utf8 ((char*) &data[off], + length - off, + "ISO-8859-1"))) return 0; - i = 0; ret = 0; - while (tagmap[i].name != NULL) - { - if (0 == strcmp (tagmap[i].name, (char*) data)) - { - ADDF (tagmap[i].type, keyword); - return 0; - } - - i++; - } + for (i = 0; NULL != tagmap[i].name; i++) + if (0 == strcmp (tagmap[i].name, (char*) data)) + { + ADDF (tagmap[i].type, keyword); + return 0; + } ADDF (EXTRACTOR_METATYPE_KEYWORDS, keyword); FINISH: return ret; } + +/** + * Process iTXt tag. + * + * @param ec extraction context + * @param length length of the tag + * @return 0 to continue extracting, 1 if we are done + */ static int -processiTXt (struct EXTRACTOR_PluginList *plugin, - unsigned int length, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +processiTXt (struct EXTRACTOR_ExtractContext *ec, + uint32_t length) { + void *ptr; unsigned char *data; - unsigned int pos; + size_t pos; char *keyword; const char *language; const char *translated; - int i; + unsigned int i; int compressed; char *buf; char *lan; @@ -145,31 +212,27 @@ processiTXt (struct EXTRACTOR_PluginList *plugin, int ret; int zret; - if (length != pl_read (plugin, &data, length)) + if (length != ec->read (ec->cls, &ptr, length)) return 1; - - pos = stnlen ( (char*) data, length) + 1; + data = ptr; + pos = stnlen ((char *) data, length) + 1; if (pos >= length) return 0; compressed = data[pos++]; - if (compressed && (data[pos++] != 0)) + if (compressed && (0 != data[pos++])) return 0; /* bad compression method */ - language = (char*) &data[pos]; + language = (char *) &data[pos]; ret = 0; - if (stnlen (language, length - pos) > 0) - { - lan = stndup (language, length - pos); - ADDF (EXTRACTOR_METATYPE_LANGUAGE, lan); - } + if ( (stnlen (language, length - pos) > 0) && + (NULL != (lan = stndup (language, length - pos))) ) + ADDF (EXTRACTOR_METATYPE_LANGUAGE, lan); pos += stnlen (language, length - pos) + 1; if (pos + 1 >= length) return 0; translated = (char*) &data[pos]; /* already in utf-8! */ - if (stnlen (translated, length - pos) > 0) - { - lan = stndup (translated, length - pos); - ADDF (EXTRACTOR_METATYPE_KEYWORDS, lan); - } + if ( (stnlen (translated, length - pos) > 0) && + (NULL != (lan = stndup (translated, length - pos))) ) + ADDF (EXTRACTOR_METATYPE_KEYWORDS, lan); pos += stnlen (translated, length - pos) + 1; if (pos >= length) return 0; @@ -187,22 +250,21 @@ processiTXt (struct EXTRACTOR_PluginList *plugin, /* printf("zlib problem"); */ return 0; } - buf = malloc (bufLen); - if (buf == NULL) + if (NULL == (buf = malloc (bufLen))) { /* printf("out of memory"); */ return 0; /* out of memory */ } - zret = uncompress ((Bytef *) buf, - &bufLen, - (const Bytef *) &data[pos], length - pos); - if (zret == Z_OK) + if (Z_OK == + (zret = uncompress ((Bytef *) buf, + &bufLen, + (const Bytef *) &data[pos], length - pos))) { /* printf("zlib ok"); */ break; } free (buf); - if (zret != Z_BUF_ERROR) + if (Z_BUF_ERROR != zret) return 0; /* unknown error, abort */ } keyword = stndup (buf, bufLen); @@ -210,73 +272,82 @@ processiTXt (struct EXTRACTOR_PluginList *plugin, } else { - keyword = stndup ((char*) &data[pos], length - pos); - } - i = 0; - while (tagmap[i].name != NULL) - { - if (0 == strcmp (tagmap[i].name, (char*) data)) - { - ADDF (tagmap[i].type, keyword /* already in utf8 */); - return 0; - } - i++; + keyword = stndup ((char *) &data[pos], length - pos); } + if (NULL == keyword) + return ret; + for (i = 0; NULL != tagmap[i].name; i++) + if (0 == strcmp (tagmap[i].name, (char*) data)) + { + ADDF (tagmap[i].type, keyword /* already in utf8 */); + return 0; + } ADDF (EXTRACTOR_METATYPE_COMMENT, keyword); FINISH: return ret; } +/** + * Process IHDR tag. + * + * @param ec extraction context + * @param length length of the tag + * @return 0 to continue extracting, 1 if we are done + */ static int -processIHDR (struct EXTRACTOR_PluginList *plugin, - unsigned int length, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +processIHDR (struct EXTRACTOR_ExtractContext *ec, + uint32_t length) { + void *ptr; unsigned char *data; char tmp[128]; int ret; if (length < 12) return 0; - - if (length != pl_read (plugin, &data, length)) + if (length != ec->read (ec->cls, &ptr, length)) return 1; - + data = ptr; ret = 0; snprintf (tmp, - sizeof(tmp), + sizeof (tmp), "%ux%u", - htonl (getIntAt (data)), htonl (getIntAt (&data[4]))); + get_int_at (data), get_int_at (&data[4])); ADD (EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, tmp); FINISH: return ret; } + +/** + * Process zTXt tag. + * + * @param ec extraction context + * @param length length of the tag + * @return 0 to continue extracting, 1 if we are done + */ static int -processzTXt (struct EXTRACTOR_PluginList *plugin, - unsigned int length, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +processzTXt (struct EXTRACTOR_ExtractContext *ec, + uint32_t length) { + void *ptr; unsigned char *data; char *keyword; - unsigned int off; - int i; + size_t off; + unsigned int i; char *buf; uLongf bufLen; int zret; int ret; - if (length != pl_read (plugin, &data, length)) + if (length != ec->read (ec->cls, &ptr, length)) return 1; - - //data += 4; - off = stnlen ( (char*) data, length) + 1; + data = ptr; + off = stnlen ((char *) data, length) + 1; if (off >= length) return 0; /* failed to find '\0' */ - if (data[off] != 0) + if (0 != data[off]) return 0; /* compression method must be 0 */ off++; ret = 0; @@ -291,46 +362,52 @@ processzTXt (struct EXTRACTOR_PluginList *plugin, /* printf("zlib problem"); */ return 0; } - buf = malloc (bufLen); - if (buf == NULL) + if (NULL == (buf = malloc (bufLen))) { /* printf("out of memory"); */ return 0; /* out of memory */ } - zret = uncompress ((Bytef *) buf, - &bufLen, (const Bytef *) &data[off], length - off); - if (zret == Z_OK) + if (Z_OK == + (zret = uncompress ((Bytef *) buf, + &bufLen, + (const Bytef *) &data[off], + length - off))) { /* printf("zlib ok"); */ break; } free (buf); - if (zret != Z_BUF_ERROR) + if (Z_BUF_ERROR != zret) return 0; /* unknown error, abort */ } - keyword = EXTRACTOR_common_convert_to_utf8 (buf, bufLen, "ISO-8859-1"); + keyword = EXTRACTOR_common_convert_to_utf8 (buf, + bufLen, + "ISO-8859-1"); free (buf); - i = 0; - while (tagmap[i].name != NULL) - { - if (0 == strcmp (tagmap[i].name, (char*) data)) - { - ADDF (tagmap[i].type, keyword); - return 0; - } - i++; - } + for (i = 0; NULL != tagmap[i].name; i++) + if (0 == strcmp (tagmap[i].name, (char*) data)) + { + ADDF (tagmap[i].type, keyword); + return 0; + } ADDF (EXTRACTOR_METATYPE_COMMENT, keyword); FINISH: return ret; } + +/** + * Process IME tag. + * + * @param ec extraction context + * @param length length of the tag + * @return 0 to continue extracting, 1 if we are done + */ static int -processtIME (struct EXTRACTOR_PluginList *plugin, - unsigned int length, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) +processtIME (struct EXTRACTOR_ExtractContext *ec, + uint32_t length) { + void *ptr; unsigned char *data; unsigned short y; unsigned int year; @@ -344,12 +421,11 @@ processtIME (struct EXTRACTOR_PluginList *plugin, if (length != 7) return 0; - - if (length != pl_read (plugin, &data, length)) + if (length != ec->read (ec->cls, &ptr, length)) return 1; - + data = ptr; ret = 0; - memcpy (&y, data, sizeof (unsigned short)); + memcpy (&y, data, sizeof (uint16_t)); year = ntohs (y); mo = (unsigned char) data[6]; day = (unsigned char) data[7]; @@ -357,66 +433,63 @@ processtIME (struct EXTRACTOR_PluginList *plugin, m = (unsigned char) data[9]; s = (unsigned char) data[10]; snprintf (val, - sizeof(val), - "%04u-%02u-%02u %02d:%02d:%02d", year, mo, day, h, m, s); + sizeof (val), + "%04u-%02u-%02u %02d:%02d:%02d", + year, mo, day, h, m, s); ADD (EXTRACTOR_METATYPE_MODIFICATION_DATE, val); FINISH: return ret; } -#define PNG_HEADER "\211PNG\r\n\032\n" - - -int -EXTRACTOR_png_extract_method (struct EXTRACTOR_PluginList *plugin, - EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +/** + * Main entry method for the 'image/png' extraction plugin. + * + * @param ec extraction context provided to the plugin + */ +void +EXTRACTOR_png_extract_method (struct EXTRACTOR_ExtractContext *ec) { - unsigned char *data; - unsigned int length; + void *data; + uint32_t length; int64_t pos; int ret; + ssize_t len; - if (plugin == NULL) - return 1; - - ret = strlen (PNG_HEADER); - - if (ret != pl_read (plugin, &data, ret)) - return 1; - - if (0 != strncmp ((char*) data, PNG_HEADER, ret)) - return 1; - + len = strlen (PNG_HEADER); + if (len != ec->read (ec->cls, &data, len)) + return; + if (0 != strncmp ((const char*) data, PNG_HEADER, len)) + return; ADD (EXTRACTOR_METATYPE_MIMETYPE, "image/png"); ret = 0; - while (ret == 0) + while (0 == ret) { - if (4 != pl_read (plugin, &data, 4)) + if (sizeof (uint32_t) + 4 != ec->read (ec->cls, + &data, + sizeof (uint32_t) + 4)) break; - length = htonl (getIntAt (data)); - /* printf("Length: %u, pos %u\n", length, pos - data); */ - if (4 != pl_read (plugin, &data, 4)) - break; - pos = pl_get_pos (plugin); - if (pos <= 0) + length = get_int_at (data); + if (0 > (pos = ec->seek (ec->cls, 0, SEEK_CUR))) break; pos += length + 4; /* Chunk type, data, crc */ - if (0 == strncmp ((char*) data, "IHDR", 4)) - ret = processIHDR (plugin, length, proc, proc_cls); - if (0 == strncmp ((char*) data, "iTXt", 4)) - ret = processiTXt (plugin, length, proc, proc_cls); - if (0 == strncmp ((char*)data, "tEXt", 4)) - ret = processtEXt (plugin, length, proc, proc_cls); - if (0 == strncmp ((char*) data, "zTXt", 4)) - ret = processzTXt (plugin, length, proc, proc_cls); - if (0 == strncmp ((char*) data, "tIME", 4)) - ret = processtIME (plugin, length, proc, proc_cls); + if (0 == strncmp ((char*) data + sizeof (uint32_t), "IHDR", 4)) + ret = processIHDR (ec, length); + if (0 == strncmp ((char*) data + sizeof (uint32_t), "iTXt", 4)) + ret = processiTXt (ec, length); + if (0 == strncmp ((char*) data + sizeof (uint32_t), "tEXt", 4)) + ret = processtEXt (ec, length); + if (0 == strncmp ((char*) data + sizeof (uint32_t), "zTXt", 4)) + ret = processzTXt (ec, length); + if (0 == strncmp ((char*) data + sizeof (uint32_t), "tIME", 4)) + ret = processtIME (ec, length); if (ret != 0) break; - if (pos != pl_seek (plugin, pos, SEEK_SET)) + if (pos != ec->seek (ec->cls, pos, SEEK_SET)) break; } FINISH: - return 1; + return; } + +/* end of png_extractor.c */ diff --git a/src/plugins/test_jpeg.c b/src/plugins/test_jpeg.c @@ -19,7 +19,7 @@ */ /** * @file plugins/test_jpeg.c - * @brief testcase for ogg plugin + * @brief testcase for jpeg plugin * @author Christian Grothoff */ #include "platform.h" diff --git a/src/plugins/test_png.c b/src/plugins/test_png.c @@ -0,0 +1,84 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ +/** + * @file plugins/test_png.c + * @brief testcase for png plugin + * @author Christian Grothoff + */ +#include "platform.h" +#include "test_lib.h" + + +/** + * Main function for the PNG testcase. + * + * @param argc number of arguments (ignored) + * @param argv arguments (ignored) + * @return 0 on success + */ +int +main (int argc, char *argv[]) +{ + struct SolutionData png_image_sol[] = + { + { + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "image/png", + strlen ("image/png") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "4x4", + strlen ("4x4") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "Testing keyword extraction\n", + strlen ("Testing keyword extraction\n") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_UNKNOWN, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "dc6c58c971715e8043baef058b675eec", + strlen ("dc6c58c971715e8043baef058b675eec") + 1, + 0 + }, + { 0, 0, NULL, NULL, 0, -1 } + }; + struct ProblemSet ps[] = + { + { "testdata/png_image.png", + png_image_sol }, + { NULL, NULL } + }; + return ET_main ("png", ps); +} + +/* end of test_png.c */ diff --git a/test/test.png b/src/plugins/testdata/png_image.png Binary files differ.