libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit c9d47aa8945fbccfd26101f8c015f5b6b267d006
parent 27dc2404bfa0dee7bddb972bdfa03de17ea6eb4b
Author: Christian Grothoff <christian@grothoff.org>
Date:   Wed, 15 Aug 2012 22:45:44 +0000

implementing tiff support

Diffstat:
MTODO | 2--
Mconfigure.ac | 12++++++++++++
Msrc/plugins/Makefile.am | 23++++++++++++++++++++++-
Dsrc/plugins/old/tiff_extractor.c | 262-------------------------------------------------------------------------------
Asrc/plugins/test_tiff.c | 121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/plugins/testdata/tiff_haute.tiff | 0
Asrc/plugins/tiff_extractor.c | 244+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 399 insertions(+), 265 deletions(-)

diff --git a/TODO b/TODO @@ -1,7 +1,6 @@ * Update plugins to new API (and cleanup code): - mp3/id3/id3v2 tags - thumbnail-ffmpeg - - thumbnail-gtk - thumbnail-qt - tar - html @@ -12,7 +11,6 @@ - flv - qt - riff - - tiff - ps - pdf - mkv diff --git a/configure.ac b/configure.ac @@ -300,6 +300,13 @@ AC_CHECK_LIB(jpeg, jpeg_std_error, AM_CONDITIONAL(HAVE_JPEG, false))], AM_CONDITIONAL(HAVE_JPEG, false)) +AC_CHECK_LIB(tiff, TIFFClientOpen, + [AC_CHECK_HEADERS([tiffio.h], + AM_CONDITIONAL(HAVE_TIFF, true) + AC_DEFINE(HAVE_TIFF,1,[Have libtiff]), + AM_CONDITIONAL(HAVE_TIFF, false))], + AM_CONDITIONAL(HAVE_TIFF, false)) + AC_MSG_CHECKING(for ImageFactory::iptcData in -lexiv2) AC_LANG_PUSH(C++) SAVED_LDFLAGS=$LDFLAGS @@ -530,6 +537,11 @@ then AC_MSG_NOTICE([NOTICE: libexiv2 not found, exiv2 disabled]) fi +if test "x$HAVE_TIFF_TRUE" = "x#" +then + AC_MSG_NOTICE([NOTICE: libtiff not found, tiff disabled]) +fi + if test "x$HAVE_JPEG_TRUE" = "x#" then AC_MSG_NOTICE([NOTICE: libjpeg not found, jpeg disabled]) diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am @@ -35,7 +35,8 @@ EXTRA_DIST = template_extractor.c \ testdata/deb_bzip2.deb \ testdata/nsf_arkanoid.nsf \ testdata/nsfe_classics.nsfe \ - testdata/xm_diesel.xm + testdata/xm_diesel.xm \ + testdata/tiff_haute.tiff if HAVE_VORBISFILE PLUGIN_OGG=libextractor_ogg.la @@ -68,6 +69,11 @@ PLUGIN_FLAC=libextractor_flac.la TEST_FLAC=test_flac endif +if HAVE_TIFF +PLUGIN_TIFF=libextractor_tiff.la +TEST_TIFF=test_tiff +endif + if HAVE_MPEG2 PLUGIN_MPEG=libextractor_mpeg.la TEST_MPEG=test_mpeg @@ -108,6 +114,7 @@ plugin_LTLIBRARIES = \ $(PLUGIN_ZLIB) \ $(PLUGIN_OGG) \ $(PLUGIN_MIME) \ + $(PLUGIN_TIFF) \ $(PLUGIN_GIF) \ $(PLUGIN_RPM) \ $(PLUGIN_FLAC) \ @@ -134,6 +141,7 @@ check_PROGRAMS = \ $(TEST_GTK) \ $(TEST_OGG) \ $(TEST_MIME) \ + $(TEST_TIFF) \ $(TEST_GIF) \ $(TEST_RPM) \ $(TEST_FLAC) \ @@ -397,6 +405,19 @@ test_exiv2_LDADD = \ $(top_builddir)/src/plugins/libtest.la +libextractor_tiff_la_SOURCES = \ + tiff_extractor.c +libextractor_tiff_la_LDFLAGS = \ + $(PLUGINFLAGS) +libextractor_tiff_la_LIBADD = \ + -ltiff + +test_tiff_SOURCES = \ + test_tiff.c +test_tiff_LDADD = \ + $(top_builddir)/src/plugins/libtest.la + + libextractor_ole2_la_SOURCES = \ ole2_extractor.c libextractor_ole2_la_CFLAGS = \ diff --git a/src/plugins/old/tiff_extractor.c b/src/plugins/old/tiff_extractor.c @@ -1,262 +0,0 @@ -/* - This file is part of libextractor. - (C) 2004, 2009 Vidyut Samanta and Christian Grothoff - - libextractor is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your - option) any later version. - - libextractor is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with libextractor; see the file COPYING. If not, write to the - Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. - */ - -#include "platform.h" -#include "extractor.h" -#include "pack.h" - -#define DEBUG 0 - -static int -addKeyword (EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *keyword, - enum EXTRACTOR_MetaType type) -{ - if (keyword == NULL) - return 0; - return proc (proc_cls, - "tiff", - type, - EXTRACTOR_METAFORMAT_UTF8, - "text/plain", - keyword, - strlen(keyword)+1); -} - -typedef struct -{ - unsigned short byteorder; - unsigned short fourty_two; - unsigned int ifd_offset; -} TIFF_HEADER; -#define TIFF_HEADER_SIZE 8 -#define TIFF_HEADER_FIELDS(p) \ - &(p)->byteorder, \ - &(p)->fourty_two, \ - &(p)->ifd_offset -static char *TIFF_HEADER_SPECS[] = { - "hhw", - "HHW", -}; - -typedef struct -{ - unsigned short tag; - unsigned short type; - unsigned int count; - unsigned int value_or_offset; -} DIRECTORY_ENTRY; -#define DIRECTORY_ENTRY_SIZE 12 -#define DIRECTORY_ENTRY_FIELDS(p) \ - &(p)->tag, \ - &(p)->type, \ - &(p)->count, \ - &(p)->value_or_offset -static char *DIRECTORY_ENTRY_SPECS[] = { - "hhww", - "HHWW" -}; - -#define TAG_LENGTH 0x101 -#define TAG_WIDTH 0x100 -#define TAG_SOFTWARE 0x131 -#define TAG_DAYTIME 0x132 -#define TAG_ARTIST 0x315 -#define TAG_COPYRIGHT 0x8298 -#define TAG_DESCRIPTION 0x10E -#define TAG_DOCUMENT_NAME 0x10D -#define TAG_HOST 0x13C -#define TAG_SCANNER 0x110 -#define TAG_ORIENTATION 0x112 - -#define TYPE_BYTE 1 -#define TYPE_ASCII 2 -#define TYPE_SHORT 3 -#define TYPE_LONG 4 -#define TYPE_RATIONAL 5 - -static int -addASCII (EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *data, - size_t size, DIRECTORY_ENTRY * entry, - enum EXTRACTOR_MetaType type) -{ - if (entry->count > size) - return 0; /* invalid! */ - if (entry->type != TYPE_ASCII) - return 0; /* huh? */ - if (entry->count + entry->value_or_offset > size) - return 0; - if (data[entry->value_or_offset + entry->count - 1] != 0) - return 0; - return addKeyword (proc, proc_cls, - &data[entry->value_or_offset], type); -} - - -int -EXTRACTOR_tiff_extract (const char *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls, - const char *options) -{ - TIFF_HEADER hdr; - int byteOrder; /* 0: do not convert; - 1: do convert */ - unsigned int current_ifd; - unsigned int length = -1; - unsigned int width = -1; - - if (size < TIFF_HEADER_SIZE) - return 0; /* can not be tiff */ - if ((data[0] == 0x49) && (data[1] == 0x49)) - byteOrder = 0; - else if ((data[0] == 0x4D) && (data[1] == 0x4D)) - byteOrder = 1; - else - return 0; /* can not be tiff */ -#if __BYTE_ORDER == __BIG_ENDIAN - byteOrder = 1 - byteOrder; -#endif - EXTRACTOR_common_cat_unpack (data, TIFF_HEADER_SPECS[byteOrder], TIFF_HEADER_FIELDS (&hdr)); - if (hdr.fourty_two != 42) - return 0; /* can not be tiff */ - if (hdr.ifd_offset + 6 > size) - return 0; /* malformed tiff */ - if (0 != addKeyword (proc, proc_cls, "image/tiff", EXTRACTOR_METATYPE_MIMETYPE)) - return 1; - current_ifd = hdr.ifd_offset; - while (current_ifd != 0) - { - unsigned short len; - unsigned int off; - int i; - if ( (current_ifd + 6 > size) || - (current_ifd + 6 < current_ifd) ) - return 0; - if (byteOrder == 0) - len = data[current_ifd + 1] << 8 | data[current_ifd]; - else - len = data[current_ifd] << 8 | data[current_ifd + 1]; - if (len * DIRECTORY_ENTRY_SIZE + 2 + 4 + current_ifd > size) - { -#if DEBUG - printf ("WARNING: malformed tiff\n"); -#endif - return 0; - } - for (i = 0; i < len; i++) - { - DIRECTORY_ENTRY entry; - off = current_ifd + 2 + DIRECTORY_ENTRY_SIZE * i; - - EXTRACTOR_common_cat_unpack (&data[off], - DIRECTORY_ENTRY_SPECS[byteOrder], - DIRECTORY_ENTRY_FIELDS (&entry)); - switch (entry.tag) - { - case TAG_LENGTH: - if ((entry.type == TYPE_SHORT) && (byteOrder == 1)) - { - length = entry.value_or_offset >> 16; - } - else - { - length = entry.value_or_offset; - } - if (width != -1) - { - char tmp[128]; - snprintf (tmp, - sizeof(tmp), "%ux%u", - width, length); - addKeyword (proc, - proc_cls, - tmp, - EXTRACTOR_METATYPE_IMAGE_DIMENSIONS); - } - break; - case TAG_WIDTH: - if ((entry.type == TYPE_SHORT) && (byteOrder == 1)) - width = entry.value_or_offset >> 16; - else - width = entry.value_or_offset; - if (length != -1) - { - char tmp[128]; - snprintf (tmp, - sizeof(tmp), - "%ux%u", - width, length); - addKeyword (proc, proc_cls, - tmp, - EXTRACTOR_METATYPE_IMAGE_DIMENSIONS); - } - break; - case TAG_SOFTWARE: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE)) - return 1; - break; - case TAG_ARTIST: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_ARTIST)) - return 1; - break; - case TAG_DOCUMENT_NAME: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_TITLE)) - return 1; - break; - case TAG_COPYRIGHT: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_COPYRIGHT)) - return 1; - break; - case TAG_DESCRIPTION: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_DESCRIPTION)) - return 1; - break; - case TAG_HOST: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_BUILDHOST)) - return 1; - break; - case TAG_SCANNER: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_SOURCE)) - return 1; - break; - case TAG_DAYTIME: - if (0 != addASCII (proc, proc_cls, data, size, &entry, EXTRACTOR_METATYPE_CREATION_DATE)) - return 1; - break; - } - } - - off = current_ifd + 2 + DIRECTORY_ENTRY_SIZE * len; - if (byteOrder == 0) - current_ifd = - data[off + 3] << 24 | data[off + 2] << 16 | - data[off + 1] << 8 | data[off]; - else - current_ifd = - data[off] << 24 | data[off + 1] << 16 | - data[off + 2] << 8 | data[off + 3]; - } - return 0; -} diff --git a/src/plugins/test_tiff.c b/src/plugins/test_tiff.c @@ -0,0 +1,121 @@ +/* + This file is part of libextractor. + (C) 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. +*/ +/** + * @file plugins/test_tiff.c + * @brief testcase for tiff plugin + * @author Christian Grothoff + */ +#include "platform.h" +#include "test_lib.h" + + + +/** + * Main function for the TIFF testcase. + * + * @param argc number of arguments (ignored) + * @param argv arguments (ignored) + * @return 0 on success + */ +int +main (int argc, char *argv[]) +{ + struct SolutionData tiff_haute_sol[] = + { + { + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "image/tiff", + strlen ("image/tiff") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_ARTIST, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "Anders Espersen", + strlen ("Anders Espersen") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_CREATION_DATE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "2012:05:15 10:51:47", + strlen ("2012:05:15 10:51:47") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_COPYRIGHT, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "© Anders Espersen", + strlen ("© Anders Espersen") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_CAMERA_MAKE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "Hasselblad", + strlen ("Hasselblad") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_CAMERA_MODEL, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "Hasselblad H4D-31", + strlen ("Hasselblad H4D-31") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "Adobe Photoshop CS5 Macintosh", + strlen ("Adobe Photoshop CS5 Macintosh") + 1, + 0 + }, + { + EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "4872x6496", + strlen ("4872x6496") + 1, + 0 + }, + { 0, 0, NULL, NULL, 0, -1 } + }; + struct ProblemSet ps[] = + { + /* note that the original test image was almost + 100 MB large; so for SVN it was cut down to + only contain the first 64 KB, which still parse + fine and give use the meta data */ + { "testdata/tiff_haute.tiff", + tiff_haute_sol }, + { NULL, NULL } + }; + return ET_main ("tiff", ps); +} + +/* end of test_tiff.c */ diff --git a/src/plugins/testdata/tiff_haute.tiff b/src/plugins/testdata/tiff_haute.tiff Binary files differ. diff --git a/src/plugins/tiff_extractor.c b/src/plugins/tiff_extractor.c @@ -0,0 +1,244 @@ +/* + This file is part of libextractor. + (C) 2012 Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ +/** + * @file plugins/tiff_extractor.c + * @brief plugin to support TIFF files + * @author Christian Grothoff + */ +#include "platform.h" +#include "extractor.h" +#include <tiffio.h> + + +/** + * Error handler for libtiff. Does nothing. + * + * @param module where did the error arise? + * @param fmt format string + * @param ap arguments for fmt + */ +static void +error_cb (const char *module, + const char *fmt, + va_list ap) +{ + /* do nothing */ +} + + +/** + * Callback invoked by TIFF lib for reading. + * + * @param ctx the 'struct EXTRACTOR_ExtractContext' + * @param data where to write data + * @param size number of bytes to read + * @return number of bytes read + */ +static tsize_t +read_cb (thandle_t ctx, + tdata_t data, + tsize_t size) +{ + struct EXTRACTOR_ExtractContext *ec = ctx; + void *ptr; + ssize_t ret; + + ret = ec->read (ec->cls, &ptr, size); + if (ret > 0) + memcpy (data, ptr, ret); + return ret; +} + + +/** + * Callback invoked by TIFF lib for writing. Always fails. + * + * @param ctx the 'struct EXTRACTOR_ExtractContext' + * @param data where to write data + * @param size number of bytes to read + * @return -1 (error) + */ +static tsize_t +write_cb (thandle_t ctx, + tdata_t data, + tsize_t size) +{ + return -1; +} + + +/** + * Callback invoked by TIFF lib for seeking. + * + * @param ctx the 'struct EXTRACTOR_ExtractContext' + * @param offset target offset + * @param whence target is relative to where + * @return new offset + */ +static toff_t +seek_cb (thandle_t ctx, + toff_t offset, + int whence) +{ + struct EXTRACTOR_ExtractContext *ec = ctx; + + return ec->seek (ec->cls, offset, whence); +} + + +/** + * Callback invoked by TIFF lib for getting the file size. + * + * @param ctx the 'struct EXTRACTOR_ExtractContext' + * @return file size + */ +static toff_t +size_cb (thandle_t ctx) +{ + struct EXTRACTOR_ExtractContext *ec = ctx; + + return ec->get_size (ec->cls); +} + + +/** + * Callback invoked by TIFF lib for closing the file. Does nothing. + * + * @param ctx the 'struct EXTRACTOR_ExtractContext' + */ +static int +close_cb (thandle_t ctx) +{ + return 0; /* success */ +} + + +/** + * A mapping from TIFF Tag to extractor types. + */ +struct Matches +{ + /** + * TIFF Tag. + */ + ttag_t tag; + + /** + * Corresponding LE type. + */ + enum EXTRACTOR_MetaType type; +}; + + +/** + * Mapping of TIFF tags to LE types. + * NULL-terminated. + */ +static struct Matches tmap[] = { + { TIFFTAG_ARTIST, EXTRACTOR_METATYPE_ARTIST }, + { TIFFTAG_COPYRIGHT, EXTRACTOR_METATYPE_COPYRIGHT }, + { TIFFTAG_DATETIME, EXTRACTOR_METATYPE_CREATION_DATE }, + { TIFFTAG_DOCUMENTNAME, EXTRACTOR_METATYPE_TITLE }, + { TIFFTAG_HOSTCOMPUTER, EXTRACTOR_METATYPE_BUILDHOST }, + { TIFFTAG_IMAGEDESCRIPTION, EXTRACTOR_METATYPE_DESCRIPTION }, + { TIFFTAG_MAKE, EXTRACTOR_METATYPE_CAMERA_MAKE }, + { TIFFTAG_MODEL, EXTRACTOR_METATYPE_CAMERA_MODEL }, + { TIFFTAG_PAGENAME, EXTRACTOR_METATYPE_PAGE_RANGE }, + { TIFFTAG_SOFTWARE, EXTRACTOR_METATYPE_CREATED_BY_SOFTWARE }, + { TIFFTAG_TARGETPRINTER, EXTRACTOR_METATYPE_TARGET_ARCHITECTURE }, + { 0, 0 } +}; + + +/** + * Main entry method for the 'image/tiff' extraction plugin. + * + * @param ec extraction context provided to the plugin + */ +void +EXTRACTOR_tiff_extract_method (struct EXTRACTOR_ExtractContext *ec) +{ + TIFF *tiff; + unsigned int i; + char *meta; + char format[128]; + uint32_t width; + uint32_t height; + + TIFFSetErrorHandler (&error_cb); + TIFFSetWarningHandler (&error_cb); + tiff = TIFFClientOpen ("<no filename>", + "rm", /* read-only, no mmap */ + ec, + &read_cb, + &write_cb, + &seek_cb, + &close_cb, + &size_cb, + NULL, NULL); + if (NULL == tiff) + return; + for (i = 0; 0 != tmap[i].tag; i++) + if ( (1 == + TIFFGetField (tiff, tmap[i].tag, &meta)) && + (0 != + ec->proc (ec->cls, + "tiff", + tmap[i].type, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + meta, + strlen (meta) + 1)) ) + goto CLEANUP; + if ( (1 == + TIFFGetField (tiff, TIFFTAG_IMAGEWIDTH, &width)) && + (1 == + TIFFGetField (tiff, TIFFTAG_IMAGELENGTH, &height)) ) + { + snprintf (format, + sizeof (format), + "%ux%u", + (unsigned int) width, + (unsigned int) height); + if (0 != + ec->proc (ec->cls, + "tiff", + EXTRACTOR_METATYPE_IMAGE_DIMENSIONS, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + format, + strlen (format) + 1)) + goto CLEANUP; + if (0 != + ec->proc (ec->cls, + "tiff", + EXTRACTOR_METATYPE_MIMETYPE, + EXTRACTOR_METAFORMAT_UTF8, + "text/plain", + "image/tiff", + strlen ("image/tiff") + 1)) + goto CLEANUP; + } + + CLEANUP: + TIFFClose (tiff); +} + +/* end of tiff_extractor.c */