libextractor

GNU libextractor
Log | Files | Refs | Submodules | README | LICENSE

commit 479d302fc73af96bda241e6a64eba48cc18ab65e
parent ed3b52a942e733396789e74b76e5053440875828
Author: Christian Grothoff <christian@grothoff.org>
Date:   Sat, 21 Jul 2012 22:34:27 +0000

-towards a new LE core library with cleaner apis, does not currently compile

Diffstat:
MINSTALL | 9+++++++--
Mdoc/version.texi | 4++--
Msrc/include/extractor.h | 139+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Msrc/main/Makefile.am | 14+++++++++-----
Msrc/main/extract.c | 2+-
Msrc/main/extractor.c | 2835++++++++++++-------------------------------------------------------------------
Asrc/main/extractor_datasource.c | 1041+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main/extractor_datasource.h | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main/extractor_ipc.h | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main/extractor_ipc_gnu.c | 490+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/main/extractor_ipc_w32.c | 905+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/main/extractor_plugins.c | 273++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Msrc/main/extractor_plugins.h | 25++++++-------------------
Msrc/main/extractor_plugpath.c | 512++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/main/extractor_plugpath.h | 34+++++++++++-----------------------
Msrc/main/extractor_print.c | 45++++++++++++++++++++++-----------------------
16 files changed, 3648 insertions(+), 2842 deletions(-)

diff --git a/INSTALL b/INSTALL @@ -1,8 +1,8 @@ Installation Instructions ************************* -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, -2006, 2007, 2008, 2009 Free Software Foundation, Inc. +Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation, +Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright @@ -226,6 +226,11 @@ order to use an ANSI C compiler: and if that doesn't work, install pre-built binaries of GCC for HP-UX. + HP-UX `make' updates targets which have the same time stamps as +their prerequisites, which makes it generally unusable when shipped +generated files such as `configure' are involved. Use GNU `make' +instead. + On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `<wchar.h>' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended diff --git a/doc/version.texi b/doc/version.texi @@ -1,4 +1,4 @@ -@set UPDATED 29 January 2012 -@set UPDATED-MONTH January 2012 +@set UPDATED 12 May 2012 +@set UPDATED-MONTH May 2012 @set EDITION 0.6.3 @set VERSION 0.6.3 diff --git a/src/include/extractor.h b/src/include/extractor.h @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -32,7 +32,7 @@ extern "C" { * 0.2.6-1 => 0x00020601 * 4.5.2-0 => 0x04050200 */ -#define EXTRACTOR_VERSION 0x00060301 +#define EXTRACTOR_VERSION 0x00060900 #include <stdio.h> @@ -331,7 +331,7 @@ enum EXTRACTOR_MetaType * translate using 'dgettext ("libextractor", rval)' */ const char * -EXTRACTOR_metatype_to_string(enum EXTRACTOR_MetaType type); +EXTRACTOR_metatype_to_string (enum EXTRACTOR_MetaType type); /** @@ -343,7 +343,7 @@ EXTRACTOR_metatype_to_string(enum EXTRACTOR_MetaType type); * translate using 'dgettext ("libextractor", rval)' */ const char * -EXTRACTOR_metatype_to_description(enum EXTRACTOR_MetaType type); +EXTRACTOR_metatype_to_description (enum EXTRACTOR_MetaType type); /** @@ -372,26 +372,84 @@ EXTRACTOR_metatype_get_max (void); * @param data_len number of bytes in data * @return 0 to continue extracting, 1 to abort */ -typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls, - const char *plugin_name, - enum EXTRACTOR_MetaType type, - enum EXTRACTOR_MetaFormat format, - const char *data_mime_type, - const char *data, - size_t data_len); - - +typedef int (*EXTRACTOR_MetaDataProcessor) (void *cls, + const char *plugin_name, + enum EXTRACTOR_MetaType type, + enum EXTRACTOR_MetaFormat format, + const char *data_mime_type, + const char *data, + size_t data_len); + + +/** + * Context provided for plugins that perform meta data extraction. + */ +struct EXTRACTOR_ExtractContext +{ + + /** + * Closure argument to pass to all callbacks. + */ + void *cls; + + /** + * Configuration string for the plugin. + */ + const char *config; + + /** + * Obtain a pointer to up to 'size' bytes of data from the file to process. + * + * @param cls the 'cls' member of this struct + * @param data pointer to set to the file data, set to NULL on error + * @param size maximum number of bytes requested + * @return number of bytes now available in data (can be smaller than 'size'), + * -1 on error + */ + ssize_t (*read) (void *cls, + unsigned char **data, + size_t size); + + + /** + * Seek in the file. Use 'SEEK_CUR' for whence and 'pos' of 0 to + * obtain the current position in the file. + * + * @param cls the 'cls' member of this struct + * @param pos position to seek (see 'man lseek') + * @param whence how to see (absolute to start, relative, absolute to end) + * @return new absolute position, UINT64_MAX on error (i.e. desired position + * does not exist) + */ + uint64_t (*seek) (void *cls, + int64_t pos, + int whence); + + + /** + * Determine the overall size of the file. + * + * @param cls the 'cls' member of this struct + * @return overall file size, UINT64_MAX on error (i.e. IPC failure) + */ + uint64_t (*get_size) (void *cls); + + /** + * Function to call on extracted data. + */ + EXTRACTOR_MetaDataProcessor proc; + +}; + + /** * Signature of the extract method that each plugin * must provide. * - * @param data data to process - * @param datasize number of bytes available in data - * @param proc function to call for meta data found - * @param proc_cls cls argument to proc - * @param options options for this plugin; can be NULL - * @return 0 if all calls to proc returned 0, otherwise 1 + * @param ec extraction context provided to the plugin */ +typedef void (*EXTRACTOR_extract_method) (struct EXTRACTOR_ExtractContext *ec); + /** * Linked list of extractor plugins. An application builds this list @@ -401,10 +459,6 @@ typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls, */ struct EXTRACTOR_PluginList; -typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin, - EXTRACTOR_MetaDataProcessor proc, void *proc_cls); - - /** * Load the default set of plugins. The default can be changed @@ -428,7 +482,7 @@ typedef int (*EXTRACTOR_extract_method) (struct EXTRACTOR_PluginList *plugin, * @return the default set of plugins, NULL if no plugins were found */ struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags); +EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags); /** @@ -442,10 +496,11 @@ EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags); */ struct EXTRACTOR_PluginList * EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev, - const char * library, + const char *library, const char *options, enum EXTRACTOR_Options flags); + /** * Load multiple libraries as specified by the user. * @@ -462,7 +517,7 @@ EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev, * or if config was empty (or NULL). */ struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev, +EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList *prev, const char *config, enum EXTRACTOR_Options flags); @@ -475,8 +530,8 @@ EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev, * @return the reduced list, unchanged if the plugin was not loaded */ struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, - const char * library); +EXTRACTOR_plugin_remove (struct EXTRACTOR_PluginList *prev, + const char *library); /** @@ -485,7 +540,7 @@ EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, * @param plugin the list of plugins */ void -EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins); +EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins); /** @@ -500,12 +555,12 @@ EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins); * @param proc_cls cls argument to proc */ void -EXTRACTOR_extract(struct EXTRACTOR_PluginList *plugins, - const char *filename, - const void *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls); +EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, + const char *filename, + const void *data, + size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls); /** @@ -525,13 +580,13 @@ EXTRACTOR_extract(struct EXTRACTOR_PluginList *plugins, * @return non-zero if printing failed, otherwise 0. */ int -EXTRACTOR_meta_data_print(void * handle, - const char *plugin_name, - enum EXTRACTOR_MetaType type, - enum EXTRACTOR_MetaFormat format, - const char *data_mime_type, - const char *data, - size_t data_len); +EXTRACTOR_meta_data_print (void * handle, + const char *plugin_name, + enum EXTRACTOR_MetaType type, + enum EXTRACTOR_MetaFormat format, + const char *data_mime_type, + const char *data, + size_t data_len); #if 0 /* keep Emacsens' auto-indent happy */ diff --git a/src/main/Makefile.am b/src/main/Makefile.am @@ -28,6 +28,11 @@ libextractor_la_LDFLAGS = \ libextractor_la_LIBADD = \ $(LIBLTDL) $(zlib) $(bz2lib) $(LTLIBICONV) +if WINDOWS +EXTRACTOR_IPC=extractor_ipc_w32.c +else +EXTRACTOR_IPC=extractor_ipc_gnu.c +endif EXTRA_DIST = \ iconv.c fuzz_default.sh fuzz_thumbnail.sh @@ -36,16 +41,15 @@ libextractor_la_CPPFLAGS = -DPLUGINDIR=\"@RPLUGINDIR@\" -DPLUGININSTDIR=\"${plug libextractor_la_SOURCES = \ extractor.c \ - extractor_plugpath.c \ - extractor_plugins.c \ + $(EXTRACTOR_IPC) extractor_ipc.h \ + extractor_plugpath.c extractor_plugpath.h \ + extractor_plugins.c extractor_plugins.h \ extractor_metatypes.c \ extractor_print.c extract_SOURCES = \ extract.c \ - getopt.c \ - getopt.h \ - getopt1.c + getopt.c getopt.h getopt1.c LDADD = \ diff --git a/src/main/extract.c b/src/main/extract.c @@ -4,7 +4,7 @@ libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but diff --git a/src/main/extractor.c b/src/main/extractor.c @@ -1,10 +1,10 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 2, or (at your + by the Free Software Foundation; either version 3, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but @@ -23,30 +23,21 @@ #include "extractor.h" #include <dirent.h> #include <sys/types.h> -#if !WINDOWS -#include <sys/wait.h> -#include <sys/shm.h> -#endif #include <signal.h> #include <ltdl.h> -#if HAVE_LIBBZ2 -#include <bzlib.h> -#endif - -#if HAVE_ZLIB -#include <zlib.h> -#endif - +#include "extractor_datasource.h" +#include "extractor_ipc.h" #include "extractor_plugpath.h" #include "extractor_plugins.h" /** - * How many bytes do we actually try to scan? (from the beginning - * of the file). Limit to 32 MB. + * How long do we allow an individual meta data object to be? + * Used to guard against (broken) plugns causing us to use + * excessive amounts of memory. */ -#define MAX_READ 32 * 1024 * 1024 +#define MAX_META_DATA 32 * 1024 * 1024 /** * Maximum length of a Mime-Type string. @@ -129,87 +120,38 @@ */ struct IpcHeader { + /** + * Type of the meta data. + */ enum EXTRACTOR_MetaType meta_type; + + /** + * Format of the meta data. + */ enum EXTRACTOR_MetaFormat meta_format; + + /** + * Number of bytes of meta data (value) + */ size_t data_len; + + /** + * Length of the mime type string describing the meta data value's mime type, + * including 0-terminator, 0 for mime type of "NULL". + */ size_t mime_len; }; -#if !WINDOWS -/** - * Opens a shared memory object (for later mmapping). - * This is POSIX variant of the the plugin_open_* function. Shm is always memory-backed. - * Closes a shm is already opened, closes it before opening a new one. - * - * @param plugin plugin context - * @param shm_name name of the shm. - * @return shm id (-1 on error). That is, the result of shm_open() syscall. - */ -static int -plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) -{ - if (plugin->shm_id != -1) - close (plugin->shm_id); - plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); - return plugin->shm_id; -} - -/** - * Opens a file (for later mmapping). - * This is POSIX variant of the plugin_open_* function. - * Closes a file is already opened, closes it before opening a new one. - * - * @param plugin plugin context - * @param shm_name name of the file to open. - * @return file id (-1 on error). That is, the result of open() syscall. - */ -static int -plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) -{ - if (plugin->shm_id != -1) - close (plugin->shm_id); - plugin->shm_id = open (shm_name, O_RDONLY, 0); - return plugin->shm_id; -} -#else -/** - * Opens a shared memory object (for later mmapping). - * This is W32 variant of the plugin_open_* function. - * Opened shm might be memory-backed or file-backed (depending on how - * it was created). shm_name is never a file name, unlike POSIX. - * Closes a shm is already opened, closes it before opening a new one. - * - * @param plugin plugin context - * @param shm_name name of the shared memory object. - * @return memory-mapped file handle (NULL on error). That is, the result of OpenFileMapping() syscall. - */ -HANDLE -plugin_open_shm (struct EXTRACTOR_PluginList *plugin, const char *shm_name) -{ - if (plugin->map_handle != 0) - CloseHandle (plugin->map_handle); - plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); - return plugin->map_handle; -} -/** - * Another name for plugin_open_shm(). - */ -HANDLE -plugin_open_file (struct EXTRACTOR_PluginList *plugin, const char *shm_name) -{ - return plugin_open_shm (plugin, shm_name); -} -#endif /** - * Writes @size bytes from @buf into @fd, returns only when - * writing is not possible, or when all @size bytes were written + * Writes 'size' bytes from 'buf' to 'fd', returns only when + * writing is not possible, or when all 'size' bytes were written * (never does partial writes). * * @param fd fd to write into * @param buf buffer to read from * @param size number of bytes to write - * @return number of bytes written (that is - @size), or -1 on error + * @return number of bytes written (that is 'size'), or -1 on error */ static int write_all (int fd, @@ -230,6 +172,7 @@ write_all (int fd, return size; } + /** * Function called by a plugin in a child process. Transmits * the meta data back to the parent process. @@ -256,13 +199,12 @@ transmit_reply (void *cls, const char *data, size_t data_len) { + static const unsigned char meta_byte = MESSAGE_META; int *cpipe_out = cls; struct IpcHeader hdr; size_t mime_len; - unsigned char meta_byte = MESSAGE_META; - unsigned char zero_byte = 0; - if (data_mime_type == NULL) + if (NULL == data_mime_type) mime_len = 0; else mime_len = strlen (data_mime_type) + 1; @@ -272,84 +214,22 @@ transmit_reply (void *cls, hdr.meta_format = format; hdr.data_len = data_len; hdr.mime_len = mime_len; - if ((1 != write_all (*cpipe_out, &meta_byte, 1)) || - (sizeof(hdr) != write_all (*cpipe_out, &hdr, sizeof(hdr))) || - (mime_len -1 != write_all (*cpipe_out, data_mime_type, mime_len - 1)) || - (1 != write_all (*cpipe_out, &zero_byte, 1)) || - (data_len != write_all (*cpipe_out, data, data_len))) - return 1; - return 0; -} - -/** - * Initializes an extracting session for a plugin. - * opens the file/shm (only in OPMODE_FILE) - * sets shm_ptr to NULL (unmaps it, if it was mapped) - * sets position to 0 - * initializes file size to @fsize (may be -1) - * sets seek request to 0 - * - * @param plugin plugin context - * @param operation_mode the mode of operation (OPMODE_*) - * @param fsize size of the source file (may be -1) - * @param shm_name name of the shm or file to open - * @return 0 on success, non-0 on error. - */ -static int -init_state_method (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, int64_t fsize, const char *shm_name) -{ - plugin->seek_request = 0; -#if !WINDOWS - if (plugin->shm_ptr != NULL) - munmap (plugin->shm_ptr, plugin->map_size); - plugin->shm_ptr = NULL; - if (operation_mode == OPMODE_FILE) - { - if (-1 == plugin_open_file (plugin, shm_name)) - return 1; - } - else if (-1 == plugin_open_shm (plugin, shm_name)) + if ( (sizeof (meta_byte) != + write_all (*cpipe_out, + &meta_byte, sizeof (meta_byte))) || + (sizeof (hdr) != + write_all (*cpipe_out, + &hdr, sizeof (hdr))) || + (mime_len != + write_all (*cpipe_out, + data_mime_type, mime_len)) || + (data_len != + write_all (*cpipe_out, + data, data_len)) ) return 1; -#else - if (plugin->shm_ptr != NULL) - UnmapViewOfFile (plugin->shm_ptr); - plugin->shm_ptr = NULL; - if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name)) - return 1; -#endif - plugin->fsize = fsize; - plugin->shm_pos = 0; - plugin->fpos = 0; return 0; } -/** - * Deinitializes an extracting session for a plugin. - * unmaps shm_ptr (if was mapped) - * closes file/shm (if it was opened) - * sets map size and shm_ptr to NULL. - * - * @param plugin plugin context - */ -static void -discard_state_method (struct EXTRACTOR_PluginList *plugin) -{ -#if !WINDOWS - if (plugin->shm_ptr != NULL && plugin->map_size > 0) - munmap (plugin->shm_ptr, plugin->map_size); - if (plugin->shm_id != -1) - close (plugin->shm_id); - plugin->shm_id = -1; -#else - if (plugin->shm_ptr != NULL) - UnmapViewOfFile (plugin->shm_ptr); - if (plugin->map_handle != 0) - CloseHandle (plugin->map_handle); - plugin->map_handle = 0; -#endif - plugin->map_size = 0; - plugin->shm_ptr = NULL; -} /** * Main loop function for plugins. @@ -368,9 +248,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin) unsigned char code; char *shm_name = NULL; size_t shm_name_len; - int extract_reply; - struct IpcHeader hdr; int do_break; #ifdef WINDOWS @@ -584,6 +462,7 @@ process_requests (struct EXTRACTOR_PluginList *plugin) return 0; } + /** * 'main' function of the child process. Loads the plugin, * sets up its in and out pipes, then runs the request serving function. @@ -593,7 +472,8 @@ process_requests (struct EXTRACTOR_PluginList *plugin) * @param out stream to write to */ static void -plugin_main (struct EXTRACTOR_PluginList *plugin, int in, int out) +plugin_main (struct EXTRACTOR_PluginList *plugin, + int in, int out) { if (plugin == NULL) { @@ -601,7 +481,7 @@ plugin_main (struct EXTRACTOR_PluginList *plugin, int in, int out) close (out); return; } - if (0 != plugin_load (plugin)) + if (0 != EXTRACTOR_plugin_load_ (plugin)) { close (in); close (out); @@ -626,1728 +506,156 @@ plugin_main (struct EXTRACTOR_PluginList *plugin, int in, int out) close (out); } -#if !WINDOWS /** - * Start the process for the given plugin. - */ -static void -start_process (struct EXTRACTOR_PluginList *plugin) + * Open a file + */ +static int +file_open(const char *filename, int oflag, ...) { - int p1[2]; - int p2[2]; - pid_t pid; - int status; + int mode; + const char *fn; +#ifdef MINGW + char szFile[_MAX_PATH + 1]; + long lRet; + + if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS) + { + errno = ENOENT; + SetLastError(lRet); + return -1; + } + fn = szFile; +#else + fn = filename; +#endif + mode = 0; +#ifdef MINGW + /* Set binary mode */ + mode |= O_BINARY; +#endif + return OPEN(fn, oflag, mode); +} + +/** + * Initializes plugin state. Calls init_state_method() + * directly or indirectly. + * + * @param plugin plugin to initialize + * @param operation_mode operation mode + * @param shm_name name of the shm/file + * @param fsize file size (may be -1) + */ +static void +init_plugin_state (struct EXTRACTOR_PluginList *plugin, + uint8_t operation_mode, + const char *shm_name, int64_t fsize) +{ + int write_result; + int init_state_size; + unsigned char *init_state; + int t; + size_t shm_name_len = strlen (shm_name) + 1; + + init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (uint8_t) + sizeof (int64_t); + plugin->operation_mode = operation_mode; switch (plugin->flags) { case EXTRACTOR_OPTION_DEFAULT_POLICY: - if (-1 != plugin->cpid && 0 != plugin->cpid) - return; - break; case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (0 != plugin->cpid) + init_state = malloc (init_state_size); + if (init_state == NULL) + { + stop_process (plugin); + return; + } + t = 0; + init_state[t] = MESSAGE_INIT_STATE; + t += 1; + memcpy (&init_state[t], &operation_mode, sizeof (uint8_t)); + t += sizeof (uint8_t); + memcpy (&init_state[t], &fsize, sizeof (int64_t)); + t += sizeof (int64_t); + memcpy (&init_state[t], &shm_name_len, sizeof (size_t)); + t += sizeof (size_t); + memcpy (&init_state[t], shm_name, shm_name_len); + t += shm_name_len; + write_result = plugin_write (plugin, init_state, init_state_size); + free (init_state); + if (write_result < init_state_size) + { + stop_process (plugin); return; + } + plugin->seek_request = 0; break; case EXTRACTOR_OPTION_IN_PROCESS: + init_state_method (plugin, operation_mode, fsize, shm_name); return; break; case EXTRACTOR_OPTION_DISABLED: return; break; } - - plugin->cpid = -1; - if (0 != pipe (p1)) - { - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - if (0 != pipe (p2)) - { - close (p1[0]); - close (p1[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - pid = fork (); - plugin->cpid = pid; - if (pid == -1) - { - close (p1[0]); - close (p1[1]); - close (p2[0]); - close (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - if (pid == 0) - { - close (p1[1]); - close (p2[0]); - plugin_main (plugin, p1[0], p2[1]); - _exit (0); - } - close (p1[0]); - close (p2[1]); - plugin->cpipe_in = fdopen (p1[1], "w"); - if (plugin->cpipe_in == NULL) - { - perror ("fdopen"); - (void) kill (plugin->cpid, SIGKILL); - waitpid (plugin->cpid, &status, 0); - close (p1[1]); - close (p2[0]); - plugin->cpid = -1; - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - plugin->cpipe_out = p2[0]; } + /** - * Stop the child process of this plugin. + * Discards plugin state. Calls discard_state_method() + * directly or indirectly. + * + * @param plugin plugin to initialize */ static void -stop_process (struct EXTRACTOR_PluginList *plugin) -{ - int status; - -#if DEBUG - if (plugin->cpid == -1) - fprintf (stderr, - "Plugin `%s' choked on this input\n", - plugin->short_libname); -#endif - if ( (plugin->cpid == -1) || - (plugin->cpid == 0) ) - return; - kill (plugin->cpid, SIGKILL); - waitpid (plugin->cpid, &status, 0); - plugin->cpid = -1; - close (plugin->cpipe_out); - fclose (plugin->cpipe_in); - plugin->cpipe_out = -1; - plugin->cpipe_in = NULL; - - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - - plugin->seek_request = -1; -} - -static int -write_plugin_data (const struct EXTRACTOR_PluginList *plugin) -{ - /* This function is only necessary on W32. On POSIX - * systems plugin inherits its own data from the parent */ - return 0; -} - -#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) - -#else /* WINDOWS */ - -#ifndef PIPE_BUF -#define PIPE_BUF 512 -#endif - -/* Copyright Bob Byrnes <byrnes <at> curl.com> - http://permalink.gmane.org/gmane.os.cygwin.patches/2121 -*/ -/* Create a pipe, and return handles to the read and write ends, - just like CreatePipe, but ensure that the write end permits - FILE_READ_ATTRIBUTES access, on later versions of win32 where - this is supported. This access is needed by NtQueryInformationFile, - which is used to implement select and nonblocking writes. - Note that the return value is either NO_ERROR or GetLastError, - unlike CreatePipe, which returns a bool for success or failure. */ -static int -create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr, - LPSECURITY_ATTRIBUTES sa_ptr, DWORD psize, - DWORD dwReadMode, DWORD dwWriteMode) +discard_plugin_state (struct EXTRACTOR_PluginList *plugin) { - /* Default to error. */ - *read_pipe_ptr = *write_pipe_ptr = INVALID_HANDLE_VALUE; - - HANDLE read_pipe = INVALID_HANDLE_VALUE, write_pipe = INVALID_HANDLE_VALUE; - - /* Ensure that there is enough pipe buffer space for atomic writes. */ - if (psize < PIPE_BUF) - psize = PIPE_BUF; - - char pipename[MAX_PATH]; + int write_result; + unsigned char discard_state = MESSAGE_DISCARD_STATE; - /* Retry CreateNamedPipe as long as the pipe name is in use. - * Retrying will probably never be necessary, but we want - * to be as robust as possible. */ - while (1) + switch (plugin->flags) { - static volatile LONG pipe_unique_id; - - snprintf (pipename, sizeof pipename, "\\\\.\\pipe\\gnunet-%d-%ld", - getpid (), InterlockedIncrement ((LONG *) & pipe_unique_id)); - /* Use CreateNamedPipe instead of CreatePipe, because the latter - * returns a write handle that does not permit FILE_READ_ATTRIBUTES - * access, on versions of win32 earlier than WinXP SP2. - * CreatePipe also stupidly creates a full duplex pipe, which is - * a waste, since only a single direction is actually used. - * It's important to only allow a single instance, to ensure that - * the pipe was not created earlier by some other process, even if - * the pid has been reused. We avoid FILE_FLAG_FIRST_PIPE_INSTANCE - * because that is only available for Win2k SP2 and WinXP. */ - read_pipe = CreateNamedPipeA (pipename, PIPE_ACCESS_INBOUND | dwReadMode, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE, 1, /* max instances */ - psize, /* output buffer size */ - psize, /* input buffer size */ - NMPWAIT_USE_DEFAULT_WAIT, sa_ptr); - - if (read_pipe != INVALID_HANDLE_VALUE) - { - break; - } - - DWORD err = GetLastError (); - - switch (err) + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + /* This is somewhat clumsy, but it's the only stop-indicating + * non-W32/POSIX-specific field i could think of... + */ + if (plugin->cpipe_out != -1) { - case ERROR_PIPE_BUSY: - /* The pipe is already open with compatible parameters. - * Pick a new name and retry. */ - continue; - case ERROR_ACCESS_DENIED: - /* The pipe is already open with incompatible parameters. - * Pick a new name and retry. */ - continue; - case ERROR_CALL_NOT_IMPLEMENTED: - /* We are on an older Win9x platform without named pipes. - * Return an anonymous pipe as the best approximation. */ - if (CreatePipe (read_pipe_ptr, write_pipe_ptr, sa_ptr, psize)) + write_result = plugin_write (plugin, &discard_state, 1); + if (write_result < 1) { - return 0; + stop_process (plugin); + return; } - err = GetLastError (); - return err; - default: - return err; } - /* NOTREACHED */ - } - - /* Open the named pipe for writing. - * Be sure to permit FILE_READ_ATTRIBUTES access. */ - write_pipe = CreateFileA (pipename, GENERIC_WRITE | FILE_READ_ATTRIBUTES, 0, /* share mode */ - sa_ptr, OPEN_EXISTING, dwWriteMode, /* flags and attributes */ - 0); /* handle to template file */ - - if (write_pipe == INVALID_HANDLE_VALUE) - { - /* Failure. */ - DWORD err = GetLastError (); - - CloseHandle (read_pipe); - return err; - } - - /* Success. */ - *read_pipe_ptr = read_pipe; - *write_pipe_ptr = write_pipe; - return 0; -} - -/** - * Writes @size bytes from @buf to @h, using @ov for - * overlapped i/o. Deallocates @old_buf and sets it to NULL, - * if necessary. - * Writes asynchronously, but sequentially (only one writing - * operation may be active at any given moment, but it will - * be done in background). Thus it is intended to be used - * for writing a few big chunks rather than a lot of small pieces. - * - * The extravagant interface is mainly because this function - * does not use a separate struct to group together overlapped - * structure, buffer pointer and the handle. - * - * @param h pipe handle - * @param ov overlapped structure pointer - * @param buf buffer to read from. Will be copied internally - * @param size number of bytes to write - * @param old_buf pointer where a copy of previous buffer is stored, - * and where a copy of @buf will be stored. - * - * @return number of bytes written, -1 on error - */ -static int -write_to_pipe (HANDLE h, OVERLAPPED *ov, unsigned char *buf, size_t size, unsigned char **old_buf) -{ - DWORD written; - BOOL bresult; - DWORD err; - - if (WAIT_OBJECT_0 != WaitForSingleObject (ov->hEvent, INFINITE)) - return -1; - - ResetEvent (ov->hEvent); - - if (*old_buf != NULL) - free (*old_buf); - - *old_buf = malloc (size); - if (*old_buf == NULL) - return -1; - memcpy (*old_buf, buf, size); - written = 0; - ov->Offset = 0; - ov->OffsetHigh = 0; - ov->Pointer = 0; - ov->Internal = 0; - ov->InternalHigh = 0; - bresult = WriteFile (h, *old_buf, size, &written, ov); - - if (bresult == TRUE) - { - SetEvent (ov->hEvent); - free (*old_buf); - *old_buf = NULL; - return written; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + discard_state_method (plugin); + return; + break; + case EXTRACTOR_OPTION_DISABLED: + return; + break; } - - err = GetLastError (); - if (err == ERROR_IO_PENDING) - return size; - SetEvent (ov->hEvent); - *old_buf = NULL; - SetLastError (err); - return -1; } -#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer) /** - * Communicates plugin data (library name, options) to the plugin - * process. This is only necessary on W32, where this information - * is not inherited by the plugin, because it is not forked. + * Forces plugin to move the buffer window to 'pos'. * * @param plugin plugin context - * - * @return 0 on success, -1 on failure - */ + * @param pos position to move to + * @param want_start 1 if the caller is interested in the beginning of the + * window, 0 if the caller is interested in its end. Window position + * must be aligned to page size, and this parameter controls the + * direction of window shift. 0 is used mostly by SEEK_END. + * @return 0 on success, -1 on error + */ static int -write_plugin_data (struct EXTRACTOR_PluginList *plugin) -{ - size_t libname_len, shortname_len, opts_len; - DWORD len; - char *str; - size_t total_len = 0; - unsigned char *buf, *ptr; - - switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - break; - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - break; - case EXTRACTOR_OPTION_IN_PROCESS: - return 0; - break; - case EXTRACTOR_OPTION_DISABLED: - return 0; - break; - } - - libname_len = strlen (plugin->libname) + 1; - total_len += sizeof (size_t) + libname_len; - shortname_len = strlen (plugin->short_libname) + 1; - total_len += sizeof (size_t) + shortname_len; - if (plugin->plugin_options != NULL) - { - opts_len = strlen (plugin->plugin_options) + 1; - total_len += opts_len; - } - else - { - opts_len = 0; - } - total_len += sizeof (size_t); - - buf = malloc (total_len); - if (buf == NULL) - return -1; - ptr = buf; - memcpy (ptr, &libname_len, sizeof (size_t)); - ptr += sizeof (size_t); - memcpy (ptr, plugin->libname, libname_len); - ptr += libname_len; - memcpy (ptr, &shortname_len, sizeof (size_t)); - ptr += sizeof (size_t); - memcpy (ptr, plugin->short_libname, shortname_len); - ptr += shortname_len; - memcpy (ptr, &opts_len, sizeof (size_t)); - ptr += sizeof (size_t); - if (opts_len > 0) - { - memcpy (ptr, plugin->plugin_options, opts_len); - ptr += opts_len; - } - if (total_len != write_to_pipe (plugin->cpipe_in, &plugin->ov_write, buf, total_len, &plugin->ov_write_buffer)) - { - free (buf); - return -1; - } - free (buf); - return 0; -} - -/** - * Reads plugin data from the LE server process. - * Also initializes allocation granularity (duh...). - * - * @param fd the pipe to read from - * - * @return newly allocated plugin context - */ -static struct EXTRACTOR_PluginList * -read_plugin_data (int fd) -{ - struct EXTRACTOR_PluginList *ret; - size_t i; - - ret = malloc (sizeof (struct EXTRACTOR_PluginList)); - if (ret == NULL) - return NULL; - read (fd, &i, sizeof (size_t)); - ret->libname = malloc (i); - if (ret->libname == NULL) - { - free (ret); - return NULL; - } - read (fd, ret->libname, i); - ret->libname[i - 1] = '\0'; - - read (fd, &i, sizeof (size_t)); - ret->short_libname = malloc (i); - if (ret->short_libname == NULL) - { - free (ret->libname); - free (ret); - return NULL; - } - read (fd, ret->short_libname, i); - ret->short_libname[i - 1] = '\0'; - - read (fd, &i, sizeof (size_t)); - if (i == 0) - { - ret->plugin_options = NULL; - } - else - { - ret->plugin_options = malloc (i); - if (ret->plugin_options == NULL) - { - free (ret->short_libname); - free (ret->libname); - free (ret); - return NULL; - } - read (fd, ret->plugin_options, i); - ret->plugin_options[i - 1] = '\0'; - } -#if WINDOWS - { - SYSTEM_INFO si; - GetSystemInfo (&si); - ret->allocation_granularity = si.dwAllocationGranularity; - } -#else - ret->allocation_granularity = sysconf (_SC_PAGE_SIZE); -#endif - return ret; -} - -/** - * Start the process for the given plugin. - */ -static void -start_process (struct EXTRACTOR_PluginList *plugin) -{ - HANDLE p1[2]; - HANDLE p2[2]; - STARTUPINFO startup; - PROCESS_INFORMATION proc; - char cmd[MAX_PATH + 1]; - char arg1[10], arg2[10]; - HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; - SECURITY_ATTRIBUTES sa; - - switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - if (plugin->hProcess != INVALID_HANDLE_VALUE && plugin->hProcess != 0) - return; - break; - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (plugin->hProcess != 0) - return; - break; - case EXTRACTOR_OPTION_IN_PROCESS: - return; - break; - case EXTRACTOR_OPTION_DISABLED: - return; - break; - } - - sa.nLength = sizeof (sa); - sa.lpSecurityDescriptor = NULL; - sa.bInheritHandle = FALSE; - - plugin->hProcess = NULL; - - if (0 != create_selectable_pipe (&p1[0], &p1[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) - { - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - if (0 != create_selectable_pipe (&p2[0], &p2[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) - { - CloseHandle (p1[0]); - CloseHandle (p1[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - - memset (&startup, 0, sizeof (STARTUPINFO)); - - if (!DuplicateHandle (GetCurrentProcess (), p1[0], GetCurrentProcess (), - &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS) - || !DuplicateHandle (GetCurrentProcess (), p2[1], GetCurrentProcess (), - &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)) - { - if (p10_os_inh != INVALID_HANDLE_VALUE) - CloseHandle (p10_os_inh); - if (p21_os_inh != INVALID_HANDLE_VALUE) - CloseHandle (p21_os_inh); - CloseHandle (p1[0]); - CloseHandle (p1[1]); - CloseHandle (p2[0]); - CloseHandle (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - - /* TODO: write our own plugin-hosting executable? rundll32, for once, has smaller than usual stack size. - * Also, users might freak out seeing over 9000 rundll32 processes (seeing over 9000 processes named - * "libextractor_plugin_helper" is probably less confusing). - */ - snprintf(cmd, MAX_PATH + 1, "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", p10_os_inh, p21_os_inh); - cmd[MAX_PATH] = '\0'; - if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL, - &startup, &proc)) - { - plugin->hProcess = proc.hProcess; - CloseHandle (proc.hThread); - } - else - { - CloseHandle (p1[0]); - CloseHandle (p1[1]); - CloseHandle (p2[0]); - CloseHandle (p2[1]); - plugin->flags = EXTRACTOR_OPTION_DISABLED; - return; - } - CloseHandle (p1[0]); - CloseHandle (p2[1]); - CloseHandle (p10_os_inh); - CloseHandle (p21_os_inh); - - plugin->cpipe_in = p1[1]; - plugin->cpipe_out = p2[0]; - - memset (&plugin->ov_read, 0, sizeof (OVERLAPPED)); - memset (&plugin->ov_write, 0, sizeof (OVERLAPPED)); - - plugin->ov_write_buffer = NULL; - - plugin->ov_write.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); - plugin->ov_read.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); -} - -/** - * Stop the child process of this plugin. - */ -static void -stop_process (struct EXTRACTOR_PluginList *plugin) -{ - int status; - HANDLE process; - -#if DEBUG - if (plugin->hProcess == INVALID_HANDLE_VALUE) - fprintf (stderr, - "Plugin `%s' choked on this input\n", - plugin->short_libname); -#endif - if (plugin->hProcess == INVALID_HANDLE_VALUE || - plugin->hProcess == NULL) - return; - TerminateProcess (plugin->hProcess, 0); - CloseHandle (plugin->hProcess); - plugin->hProcess = INVALID_HANDLE_VALUE; - CloseHandle (plugin->cpipe_out); - CloseHandle (plugin->cpipe_in); - plugin->cpipe_out = INVALID_HANDLE_VALUE; - plugin->cpipe_in = INVALID_HANDLE_VALUE; - CloseHandle (plugin->ov_read.hEvent); - CloseHandle (plugin->ov_write.hEvent); - if (plugin->ov_write_buffer != NULL) - { - free (plugin->ov_write_buffer); - plugin->ov_write_buffer = NULL; - } - - if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) - plugin->flags = EXTRACTOR_OPTION_DISABLED; - - plugin->seek_request = -1; -} - -#endif /* WINDOWS */ - -/** - * Remove a plugin from a list. - * - * @param prev the current list of plugins - * @param library the name of the plugin to remove - * @return the reduced list, unchanged if the plugin was not loaded - */ -struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_remove(struct EXTRACTOR_PluginList * prev, - const char * library) -{ - struct EXTRACTOR_PluginList *pos; - struct EXTRACTOR_PluginList *first; - - pos = prev; - first = prev; - while ((pos != NULL) && (0 != strcmp (pos->short_libname, library))) - { - prev = pos; - pos = pos->next; - } - if (pos != NULL) - { - /* found, close library */ - if (first == pos) - first = pos->next; - else - prev->next = pos->next; - /* found */ - stop_process (pos); - free (pos->short_libname); - free (pos->libname); - free (pos->plugin_options); - if (NULL != pos->libraryHandle) - lt_dlclose (pos->libraryHandle); - free (pos); - } -#if DEBUG - else - fprintf(stderr, - "Unloading plugin `%s' failed!\n", - library); -#endif - return first; -} - - -/** - * Remove all plugins from the given list (destroys the list). - * - * @param plugin the list of plugins - */ -void -EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins) -{ - while (plugins != NULL) - plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname); -} - - - -/** - * Open a file - */ -static int file_open(const char *filename, int oflag, ...) -{ - int mode; - const char *fn; -#ifdef MINGW - char szFile[_MAX_PATH + 1]; - long lRet; - - if ((lRet = plibc_conv_to_win_path(filename, szFile)) != ERROR_SUCCESS) - { - errno = ENOENT; - SetLastError(lRet); - return -1; - } - fn = szFile; -#else - fn = filename; -#endif - mode = 0; -#ifdef MINGW - /* Set binary mode */ - mode |= O_BINARY; -#endif - return OPEN(fn, oflag, mode); -} - -#if WINDOWS - -/** - * Setup a shared memory segment. - * - * @param ptr set to the location of the map segment - * @param map where to store the map handle - * @param fn name of the mapping - * @param fn_size size available in fn - * @param size number of bytes to allocated for the mapping - * @return 0 on success - */ -static int -make_shm_w32 (void **ptr, HANDLE *map, char *fn, size_t fn_size, size_t size) -{ - const char *tpath = "Local\\"; - snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), - (unsigned int) RANDOM()); - *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn); - *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size); - if (*ptr == NULL) - { - CloseHandle (*map); - return 1; - } - return 0; -} - -/** - * Setup a file-backed shared memory segment. - * - * @param map where to store the map handle - * @param file handle of the file to back the shm - * @param fn name of the mapping - * @param fn_size size available in fn - * @param size number of bytes to allocated for the mapping - * @return 0 on success - */ -static int -make_file_backed_shm_w32 (HANDLE *map, HANDLE file, char *fn, size_t fn_size) -{ - const char *tpath = "Local\\"; - snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), - (unsigned int) RANDOM()); - *map = CreateFileMapping (file, NULL, PAGE_READONLY, 0, 0, fn); - if (*map == NULL) - { - DWORD err = GetLastError (); - return 1; - } - return 0; -} - -static void -destroy_shm_w32 (void *ptr, HANDLE map) -{ - UnmapViewOfFile (ptr); - CloseHandle (map); -} - -static void -destroy_file_backed_shm_w32 (HANDLE map) -{ - CloseHandle (map); -} - -#else - -/** - * Setup a shared memory segment. - * - * @param ptr set to the location of the shm segment - * @param shmid where to store the shm ID - * @param fn name of the shared segment - * @param fn_size size available in fn - * @param size number of bytes to allocated for the segment - * @return 0 on success - */ -static int -make_shm_posix (void **ptr, int *shmid, char *fn, size_t fn_size, size_t size) -{ - const char *tpath; -#if SOMEBSD - /* this works on FreeBSD, not sure about others... */ - tpath = getenv ("TMPDIR"); - if (tpath == NULL) - tpath = "/tmp/"; -#else - tpath = "/"; /* Linux */ -#endif - snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), - (unsigned int) RANDOM()); - *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - *ptr = NULL; - if (-1 == *shmid) - return 1; - if ((0 != ftruncate (*shmid, size)) || - (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || - (*ptr == (void*) -1) ) - { - close (*shmid); - *shmid = -1; - shm_unlink (fn); - return 1; - } - return 0; -} - -static void -destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name) -{ - if (NULL != ptr) - munmap (ptr, size); - if (shm_id != -1) - close (shm_id); - shm_unlink (shm_name); -} -#endif - -#ifndef O_LARGEFILE -#define O_LARGEFILE 0 -#endif - -/** - * A poor attempt to abstract the data source (file or a memory buffer) - * for the decompressor. - */ -struct BufferedFileDataSource -{ - /** - * Descriptor of the file to read data from (may be -1) - */ - int fd; - - /** - * Pointer to the buffer to read from (may be NULL) - */ - const unsigned char *data; - - /** - * Size of the file (or the data buffer) - */ - int64_t fsize; - - /** - * Position within the file or the data buffer - */ - int64_t fpos; - - /** - * A buffer to read into. For fd != -1: when data != NULL, - * data is used directly. - */ - unsigned char *buffer; - - /** - * Position within the buffer. - */ - int64_t buffer_pos; - - /** - * Number of bytes in the buffer (<= buffer_size) - */ - int64_t buffer_bytes; - - /** - * Allocated size of the buffer - */ - int64_t buffer_size; -}; - -/** - * Creates a bfds - * - * @param data data buffer to use as a source (NULL if fd != -1) - * @param fd file descriptor to use as a source (-1 if data != NULL) - * @param fsize size of the file (or the buffer) - * @return newly allocated bfds - */ -struct BufferedFileDataSource * -bfds_new (const unsigned char *data, int fd, int64_t fsize); - -/** - * Unallocates bfds - * - * @param bfds bfds to deallocate - */ -void -bfds_delete (struct BufferedFileDataSource *bfds); - -/** - * Makes bfds seek to @pos and read a chunk of bytes there. - * Changes bfds->fpos, bfds->buffer_bytes and bfds->buffer_pos. - * Does almost nothing for memory-backed bfds. - * - * @param bfds bfds - * @param pos position - * @return 0 on success, -1 on error - */ -int -bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos); - -/** - * Makes bfds seek to @pos in @whence mode. - * Will try to seek within the buffer, will move the buffer location if - * the seek request falls outside of the buffer range. - * - * @param bfds bfds - * @param pos position to seek to - * @param whence one of the seek constants (SEEK_CUR, SEEK_SET, SEEK_END) - * @return new absolute position - */ -int64_t -bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence); - -/** - * Fills @buf_ptr with a pointer to a chunk of data. - * Same as read() but there's no need to allocate or de-allocate the - * memory (since data IS already in memory). - * Will seek if necessary. Will fail if @count exceeds buffer size. - * - * @param bfds bfds - * @param buf_ptr location to store data pointer - * @param count number of bytes to read - * @return number of bytes (<= count) available at location pointed by buf_ptr - */ -int64_t -bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, int64_t count); - -struct BufferedFileDataSource * -bfds_new (const unsigned char *data, int fd, int64_t fsize) -{ - struct BufferedFileDataSource *result; - result = malloc (sizeof (struct BufferedFileDataSource)); - if (result == NULL) - return NULL; - memset (result, 0, sizeof (struct BufferedFileDataSource)); - result->data = data; - result->fsize = fsize; - result->fd = fd; - result->buffer_size = fsize; - if (result->data == NULL) - { - if (result->buffer_size > MAX_READ) - result->buffer_size = MAX_READ; - result->buffer = malloc (result->buffer_size); - if (result->buffer == NULL) - { - free (result); - return NULL; - } - } - bfds_pick_next_buffer_at (result, 0); - return result; -} - -void -bfds_delete (struct BufferedFileDataSource *bfds) -{ - if (bfds->buffer) - free (bfds->buffer); - free (bfds); -} - -int -bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, int64_t pos) -{ - int64_t position, rd; - if (bfds->data != NULL) - { - bfds->buffer_bytes = bfds->fsize; - return 0; - } -#if WINDOWS - position = _lseeki64 (bfds->fd, pos, SEEK_SET); -#elif HAVE_LSEEK64 - position = lseek64 (bfds->fd, pos, SEEK_SET); -#else - position = (int64_t) lseek (bfds->fd, pos, SEEK_SET); -#endif - if (position < 0) - return -1; - bfds->fpos = position; - rd = read (bfds->fd, bfds->buffer, bfds->buffer_size); - if (rd < 0) - return -1; - bfds->buffer_bytes = rd; - return 0; -} - -int64_t -bfds_seek (struct BufferedFileDataSource *bfds, int64_t pos, int whence) -{ - switch (whence) - { - case SEEK_CUR: - if (bfds->data == NULL) - { - if (0 != bfds_pick_next_buffer_at (bfds, bfds->fpos + bfds->buffer_pos + pos)) - return -1; - bfds->buffer_pos = 0; - return bfds->fpos; - } - bfds->buffer_pos += pos; - return bfds->buffer_pos; - break; - case SEEK_SET: - if (pos < 0) - return -1; - if (bfds->data == NULL) - { - if (0 != bfds_pick_next_buffer_at (bfds, pos)) - return -1; - bfds->buffer_pos = 0; - return bfds->fpos; - } - bfds->buffer_pos = pos; - return bfds->buffer_pos; - break; - case SEEK_END: - if (bfds->data == NULL) - { - if (0 != bfds_pick_next_buffer_at (bfds, bfds->fsize + pos)) - return -1; - bfds->buffer_pos = 0; - return bfds->fpos; - } - bfds->buffer_pos = bfds->fsize + pos; - return bfds->buffer_pos; - break; - } - return -1; -} - -int64_t -bfds_read (struct BufferedFileDataSource *bfds, unsigned char **buf_ptr, int64_t count) -{ - if (count > MAX_READ) - return -1; - if (count > bfds->buffer_bytes - bfds->buffer_pos) - { - if (bfds->fpos + bfds->buffer_pos != bfds_seek (bfds, bfds->fpos + bfds->buffer_pos, SEEK_SET)) - return -1; - if (bfds->data == NULL) - { - *buf_ptr = &bfds->buffer[bfds->buffer_pos]; - bfds->buffer_pos += count < bfds->buffer_bytes ? count : bfds->buffer_bytes; - return (count < bfds->buffer_bytes ? count : bfds->buffer_bytes); - } - else - { - int64_t ret = count < (bfds->buffer_bytes - bfds->buffer_pos) ? count : (bfds->buffer_bytes - bfds->buffer_pos); - *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos]; - bfds->buffer_pos += ret; - return ret; - } - } - else - { - if (bfds->data == NULL) - *buf_ptr = &bfds->buffer[bfds->buffer_pos]; - else - *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos]; - bfds->buffer_pos += count; - return count; - } -} - -#if HAVE_ZLIB -#define MIN_ZLIB_HEADER 12 -#endif -#if HAVE_LIBBZ2 -#define MIN_BZ2_HEADER 4 -#endif -#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB -#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER -#endif -#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2 -#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER -#endif -#if !defined (MIN_COMPRESSED_HEADER) -#define MIN_COMPRESSED_HEADER -1 -#endif - -#define COMPRESSED_DATA_PROBE_SIZE 3 - -enum ExtractorCompressionType -{ - COMP_TYPE_UNDEFINED = -1, - COMP_TYPE_INVALID = 0, - COMP_TYPE_ZLIB = 1, - COMP_TYPE_BZ2 = 2 -}; - -/** - * An object from which uncompressed data can be read - */ -struct CompressedFileSource -{ - /** - * The type of compression used in the source - */ - enum ExtractorCompressionType compression_type; - /** - * The source of data - */ - struct BufferedFileDataSource *bfds; - /** - * Size of the source (same as bfds->fsize) - */ - int64_t fsize; - /** - * Position within the source - */ - int64_t fpos; - - /** - * Total size of the uncompressed data. Remains -1 until - * decompression is finished. - */ - int64_t uncompressed_size; - - /* - unsigned char *buffer; - int64_t buffer_bytes; - int64_t buffer_len; - */ - -#if WINDOWS - /** - * W32 handle of the shm into which data is uncompressed - */ - HANDLE shm; -#else - /** - * POSIX id of the shm into which data is uncompressed - */ - int shm; -#endif - /** - * Name of the shm - */ - char shm_name[MAX_SHM_NAME + 1]; - /** - * Pointer to the mapped region of the shm (covers the whole shm) - */ - void *shm_ptr; - /** - * Position within shm - */ - int64_t shm_pos; - /** - * Allocated size of the shm - */ - int64_t shm_size; - /** - * Number of bytes in shm (<= shm_size) - */ - size_t shm_buf_size; - -#if HAVE_ZLIB - /** - * ZLIB stream object - */ - z_stream strm; - /** - * Length of gzip header (may be 0, in that case ZLIB parses the header) - */ - int gzip_header_length; -#endif -#if HAVE_LIBBZ2 - /** - * BZ2 stream object - */ - bz_stream bstrm; -#endif -}; - -void -cfs_delete (struct CompressedFileSource *cfs) -{ -#if WINDOWS - destroy_shm_w32 (cfs->shm_ptr, cfs->shm); -#else - destroy_shm_posix (cfs->shm_ptr, cfs->shm, cfs->shm_size, cfs->shm_name); -#endif - free (cfs); -} - -int -cfs_reset_stream_zlib (struct CompressedFileSource *cfs) -{ - if (cfs->gzip_header_length != bfds_seek (cfs->bfds, cfs->gzip_header_length, SEEK_SET)) - return 0; - cfs->strm.next_in = NULL; - cfs->strm.avail_in = 0; - cfs->strm.total_in = 0; - cfs->strm.zalloc = NULL; - cfs->strm.zfree = NULL; - cfs->strm.opaque = NULL; - - /* - * note: maybe plain inflateInit(&strm) is adequate, - * it looks more backward-compatible also ; - * - * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; - * there might be a better check. - */ - if (Z_OK != inflateInit2 (&cfs->strm, -#ifdef ZLIB_VERNUM - 15 + 32 -#else - -MAX_WBITS -#endif - )) - { - return -1; - } - - cfs->fpos = cfs->gzip_header_length; - cfs->shm_pos = 0; - cfs->shm_buf_size = 0; - - return 1; -} - - -static int -cfs_reset_stream_bz2 (struct CompressedFileSource *cfs) -{ - return -1; -} - -/** - * Resets the compression stream to begin uncompressing - * from the beginning. Used at initialization time, and when - * seeking backward. - * - * @param cfs cfs to reset - * @return 1 on success, -1 on error - */ -int -cfs_reset_stream (struct CompressedFileSource *cfs) -{ - switch (cfs->compression_type) - { - case COMP_TYPE_ZLIB: - return cfs_reset_stream_zlib (cfs); - case COMP_TYPE_BZ2: - return cfs_reset_stream_bz2 (cfs); - default: - return -1; - } -} - - -static int -cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) -{ - /* Process gzip header */ - unsigned int gzip_header_length = 10; - unsigned char *pdata; - unsigned char data[12]; - - if (12 > bfds_read (cfs->bfds, &pdata, 12)) - return -1; - memcpy (data, pdata, 12); - - if (data[3] & 0x4) /* FEXTRA set */ - gzip_header_length += 2 + (unsigned) (data[10] & 0xff) + - (((unsigned) (data[11] & 0xff)) * 256); - - if (data[3] & 0x8) /* FNAME set */ - { - int64_t buf_bytes; - int len; - unsigned char *buf, *cptr; - if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) - return -1; - buf_bytes = bfds_read (cfs->bfds, &buf, 1024); - if (buf_bytes <= 0) - return -1; - cptr = buf; - - len = 0; - /* stored file name is here */ - while (len < buf_bytes) - { - if ('\0' == *cptr) - break; - cptr++; - len++; - } - - if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, - EXTRACTOR_METAFORMAT_C_STRING, "text/plain", - (const char *) buf, - len)) - return 0; /* done */ - - /* FIXME: check for correctness */ - //gzip_header_length = (cptr - data) + 1; - gzip_header_length += len + 1; - } - - if (data[3] & 0x16) /* FCOMMENT set */ - { - int64_t buf_bytes; - int len; - unsigned char *buf, *cptr; - if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) - return -1; - buf_bytes = bfds_read (cfs->bfds, &buf, 1024); - if (buf_bytes <= 0) - return -1; - cptr = buf; - - len = 0; - /* stored file name is here */ - while (len < buf_bytes) - { - if ('\0' == *cptr) - break; - cptr++; - len++; - } - - if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, - EXTRACTOR_METAFORMAT_C_STRING, "text/plain", - (const char *) buf, - len)) - return 0; /* done */ - - /* FIXME: check for correctness */ - //gzip_header_length = (cptr - data) + 1; - gzip_header_length += len + 1; - } - - if (data[3] & 0x2) /* FCHRC set */ - gzip_header_length += 2; - - memset (&cfs->strm, 0, sizeof (z_stream)); - -#ifdef ZLIB_VERNUM - gzip_header_length = 0; -#endif - - cfs->gzip_header_length = gzip_header_length; - return cfs_reset_stream_zlib (cfs); -} - -static int -cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs) -{ - inflateEnd (&cfs->strm); - return 1; -} - -static int -cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) -{ - return -1; -} - -static int -cfs_deinit_decompressor_bz2 (struct CompressedFileSource *cfs) -{ - return -1; -} - -/** - * Initializes decompression object. Might report metadata about - * compresse stream, if available. Resets the stream to the beginning. - * - * @param cfs cfs to initialize - * @param proc callback for metadata - * @param proc_cls callback cls - * @return 1 on success, -1 on error - */ -static int -cfs_init_decompressor (struct CompressedFileSource *cfs, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) -{ - switch (cfs->compression_type) - { - case COMP_TYPE_ZLIB: - return cfs_init_decompressor_zlib (cfs, proc, proc_cls); - case COMP_TYPE_BZ2: - return cfs_init_decompressor_bz2 (cfs, proc, proc_cls); - default: - return -1; - } -} - -/** - * Deinitializes decompression object. - * - * @param cfs cfs to deinitialize - * @return 1 on success, -1 on error - */ -static int -cfs_deinit_decompressor (struct CompressedFileSource *cfs) -{ - switch (cfs->compression_type) - { - case COMP_TYPE_ZLIB: - return cfs_deinit_decompressor_zlib (cfs); - case COMP_TYPE_BZ2: - return cfs_deinit_decompressor_bz2 (cfs); - default: - return -1; - } -} - -/** - * Allocates and initializes new cfs object. - * - * @param bfds data source to use - * @param fsize size of the source - * @param compression_type type of compression used - * @param proc metadata callback - * @param proc_cls callback cls - * @return newly allocated cfs on success, NULL on error - */ -struct CompressedFileSource * -cfs_new (struct BufferedFileDataSource *bfds, int64_t fsize, enum ExtractorCompressionType compression_type, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) -{ - int shm_result; - struct CompressedFileSource *cfs; - cfs = malloc (sizeof (struct CompressedFileSource)); - if (cfs == NULL) - return NULL; - memset (cfs, 0, sizeof (struct CompressedFileSource)); - cfs->compression_type = compression_type; - cfs->bfds = bfds; - cfs->fsize = fsize; - cfs->uncompressed_size = -1; - cfs->shm_size = MAX_READ; -#if !WINDOWS - shm_result = make_shm_posix ((void **) &cfs->shm_ptr, &cfs->shm, cfs->shm_name, MAX_SHM_NAME, cfs->shm_size); -#else - shm_result = make_shm_w32 ((void **) &cfs->shm_ptr, &cfs->shm, cfs->shm_name, MAX_SHM_NAME, cfs->shm_size); -#endif - if (shm_result != 0) - { - cfs_delete (cfs); - return NULL; - } - return cfs; -} - -/** - * Data is read from the source and shoved into decompressor - * in chunks this big. - */ -#define COM_CHUNK_SIZE (10*1024) - -int -cfs_read_zlib (struct CompressedFileSource *cfs, int64_t preserve) -{ - int ret; - int64_t rc = preserve; - int64_t total = cfs->strm.total_out; - if (preserve > 0) - memmove (cfs->shm_ptr, &((unsigned char *)cfs->shm_ptr)[0], preserve); - - while (rc < cfs->shm_size && ret != Z_STREAM_END) - { - if (cfs->strm.avail_in == 0) - { - int64_t count = bfds_read (cfs->bfds, &cfs->strm.next_in, COM_CHUNK_SIZE); - if (count <= 0) - return 0; - cfs->strm.avail_in = (uInt) count; - } - cfs->strm.next_out = &((unsigned char *)cfs->shm_ptr)[rc]; - cfs->strm.avail_out = cfs->shm_size - rc; - ret = inflate (&cfs->strm, Z_SYNC_FLUSH); - if (ret != Z_OK && ret != Z_STREAM_END) - return 0; - rc = cfs->strm.total_out - total; - } - if (ret == Z_STREAM_END) - cfs->uncompressed_size = cfs->strm.total_out; - cfs->shm_pos = preserve; - cfs->shm_buf_size = rc + preserve; - return 1; -} - -int -cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve) -{ - return -1; -} - -/** - * Re-fills shm with new uncompressed data, preserving the last - * @preserve bytes of existing data as the first @preserve bytes - * of the new data. - * Does the actual decompression. Will set uncompressed_size on - * the end of compressed stream. - * - * @param cfds cfs to read from - * @param preserve number of bytes to preserve (0 to discard all old data) - * @return number of bytes in shm. 0 if no more data can be uncompressed. - */ -int64_t -cfs_read (struct CompressedFileSource *cfs, int64_t preserve) -{ - switch (cfs->compression_type) - { - case COMP_TYPE_ZLIB: - return cfs_read_zlib (cfs, preserve); - case COMP_TYPE_BZ2: - return cfs_read_bz2 (cfs, preserve); - default: - return -1; - } -} - -int64_t -cfs_seek_zlib (struct CompressedFileSource *cfs, int64_t position) -{ - int64_t ret; - if (position > cfs->strm.total_out - cfs->shm_buf_size && position < cfs->strm.total_out) - { - ret = cfs_read (cfs, cfs->strm.total_out - position); - if (ret < 0) - return ret; - return position; - } - while (position >= cfs->strm.total_out) - { - if (0 > (ret = cfs_read (cfs, 0))) - return ret; - if (ret == 0) - return position; - } - if (position < cfs->strm.total_out && position > cfs->strm.total_out - cfs->shm_buf_size) - return cfs->strm.total_out - cfs->shm_buf_size; - return -1; -} - -int64_t -cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position) -{ - return -1; -} - -/** - * Moves the buffer to @position in uncompressed steam. If position - * requires seeking backwards beyond the boundaries of the buffer, resets the - * stream and repeats decompression from the beginning to @position. - * - * @param cfds cfs to seek on - * @param position new starting point for the buffer - * @return new absolute buffer position, -1 on error or EOS - */ -int64_t -cfs_seek (struct CompressedFileSource *cfs, int64_t position) -{ - switch (cfs->compression_type) - { - case COMP_TYPE_ZLIB: - return cfs_seek_zlib (cfs, position); - case COMP_TYPE_BZ2: - return cfs_seek_bz2 (cfs, position); - default: - return -1; - } -} - -/** - * Detect if we have compressed data on our hands. - * - * @param data pointer to a data buffer or NULL (in case fd is not -1) - * @param fd a file to read data from, or -1 (if data is not NULL) - * @param fsize size of data (if data is not NULL) or of file (if fd is not -1) - * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression - */ -static enum ExtractorCompressionType -get_compression_type (const unsigned char *data, int fd, int64_t fsize) -{ - void *read_data = NULL; - size_t read_data_size = 0; - ssize_t read_result; - enum ExtractorCompressionType result = COMP_TYPE_INVALID; - - if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER)) - { - return COMP_TYPE_INVALID; - } - if (data == NULL) - { - int64_t position; - read_data_size = COMPRESSED_DATA_PROBE_SIZE; - read_data = malloc (read_data_size); - if (read_data == NULL) - return -1; -#if WINDOWS - position = _lseeki64 (fd, 0, SEEK_CUR); -#elif HAVE_LSEEK64 - position = lseek64 (fd, 0, SEEK_CUR); -#else - position = (int64_t) lseek (fd, 0, SEEK_CUR); -#endif - read_result = READ (fd, read_data, read_data_size); -#if WINDOWS - position = _lseeki64 (fd, position, SEEK_SET); -#elif HAVE_LSEEK64 - position = lseek64 (fd, position, SEEK_SET); -#else - position = lseek (fd, (off_t) position, SEEK_SET); -#endif - if (read_result != read_data_size) - { - free (read_data); - return COMP_TYPE_UNDEFINED; - } - data = (const void *) read_data; - } -#if HAVE_ZLIB - if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08)) - result = COMP_TYPE_ZLIB; -#endif -#if HAVE_LIBBZ2 - if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && (data[2] == 'h')) - result = COMP_TYPE_BZ2; -#endif - if (read_data != NULL) - free (read_data); - return result; -} - -/** - * Initializes plugin state. Calls init_state_method() - * directly or indirectly. - * - * @param plugin plugin to initialize - * @param operation_mode operation mode - * @param shm_name name of the shm/file - * @param fsize file size (may be -1) - */ -static void -init_plugin_state (struct EXTRACTOR_PluginList *plugin, uint8_t operation_mode, const char *shm_name, int64_t fsize) -{ - int write_result; - int init_state_size; - unsigned char *init_state; - int t; - size_t shm_name_len = strlen (shm_name) + 1; - init_state_size = 1 + sizeof (size_t) + shm_name_len + sizeof (uint8_t) + sizeof (int64_t); - plugin->operation_mode = operation_mode; - switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - init_state = malloc (init_state_size); - if (init_state == NULL) - { - stop_process (plugin); - return; - } - t = 0; - init_state[t] = MESSAGE_INIT_STATE; - t += 1; - memcpy (&init_state[t], &operation_mode, sizeof (uint8_t)); - t += sizeof (uint8_t); - memcpy (&init_state[t], &fsize, sizeof (int64_t)); - t += sizeof (int64_t); - memcpy (&init_state[t], &shm_name_len, sizeof (size_t)); - t += sizeof (size_t); - memcpy (&init_state[t], shm_name, shm_name_len); - t += shm_name_len; - write_result = plugin_write (plugin, init_state, init_state_size); - free (init_state); - if (write_result < init_state_size) - { - stop_process (plugin); - return; - } - plugin->seek_request = 0; - break; - case EXTRACTOR_OPTION_IN_PROCESS: - init_state_method (plugin, operation_mode, fsize, shm_name); - return; - break; - case EXTRACTOR_OPTION_DISABLED: - return; - break; - } -} - -/** - * Discards plugin state. Calls discard_state_method() - * directly or indirectly. - * - * @param plugin plugin to initialize - */ -static void -discard_plugin_state (struct EXTRACTOR_PluginList *plugin) -{ - int write_result; - unsigned char discard_state = MESSAGE_DISCARD_STATE; - switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - /* This is somewhat clumsy, but it's the only stop-indicating - * non-W32/POSIX-specific field i could think of... - */ - if (plugin->cpipe_out != -1) - { - write_result = plugin_write (plugin, &discard_state, 1); - if (write_result < 1) - { - stop_process (plugin); - return; - } - } - break; - case EXTRACTOR_OPTION_IN_PROCESS: - discard_state_method (plugin); - return; - break; - case EXTRACTOR_OPTION_DISABLED: - return; - break; - } -} - -/** - * Forces plugin to move the buffer window to @pos. - * - * @param plugin plugin context - * @param pos position to move to - * @param want_start 1 if the caller is interested in the beginning of the - * window, 0 if the caller is interested in its end. Window position - * must be aligned to page size, and this parameter controls the - * direction of window shift. 0 is used mostly by SEEK_END. - * @return 0 on success, -1 on error - */ -static int -pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_t want_start) +pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, + int64_t pos, + uint8_t want_start) { if (plugin->operation_mode == OPMODE_MEMORY) { @@ -2485,6 +793,7 @@ pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_ return -1; } + /** * Moves current absolute buffer position to @pos in @whence mode. * Will move logical position withouth shifting the buffer, if possible. @@ -2495,7 +804,7 @@ pl_pick_next_buffer_at (struct EXTRACTOR_PluginList *plugin, int64_t pos, uint8_ * @param whence seek mode (SEEK_CUR, SEEK_SET, SEEK_END) * @return new absolute position, -1 on error */ -int64_t +static int64_t pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence) { switch (whence) @@ -2549,461 +858,256 @@ pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence) return -1; } -int64_t + +static int64_t pl_get_fsize (struct EXTRACTOR_PluginList *plugin) { return plugin->fsize; } -int64_t -pl_get_pos (struct EXTRACTOR_PluginList *plugin) -{ - return plugin->fpos + plugin->shm_pos; -} /** * Fills @data with a pointer to the data buffer. * Equivalent to read(), except you don't have to allocate and free - * a buffer, since the data is already in memory. - * Will move the buffer, if necessary - * - * @param plugin plugin context - * @param data location to store data pointer - * @param count number of bytes to read - * @return number of bytes (<= count) avalable in @data, -1 on error - */ -int64_t -pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count) -{ - if (count > MAX_READ) - return -1; - if (count > plugin->map_size - plugin->shm_pos) - { - int64_t actual_count; - if (plugin->fpos + plugin->shm_pos != pl_seek (plugin, plugin->fpos + plugin->shm_pos, SEEK_SET)) - return -1; - *data = &plugin->shm_ptr[plugin->shm_pos]; - actual_count = (count < plugin->map_size - plugin->shm_pos ? count : plugin->map_size - plugin->shm_pos); - plugin->shm_pos += actual_count; - return actual_count; - } - else - { - *data = &plugin->shm_ptr[plugin->shm_pos]; - plugin->shm_pos += count; - return count; - } -} - -/** - * Transmits information about updated shm to plugin. - * For OPMODE_DECOMPRESS only. - * - * @param plugin plugin context - * @param position current absolute position in uncompressed stream - * @param map_size number of bytes that are available in shm - * @param fsize total size of the uncompressed stream (might be -1) - * @param operation_mode mode of operation - * @return 0 on success, 1 on error - */ -static int -give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, int64_t position, size_t map_size, int64_t fsize, uint8_t operation_mode) -{ - int write_result; - int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t) + sizeof (int64_t); - unsigned char updated_shm[updated_shm_size]; - int t = 0; - updated_shm[t] = MESSAGE_UPDATED_SHM; - t += 1; - memcpy (&updated_shm[t], &position, sizeof (int64_t)); - t += sizeof (int64_t); - memcpy (&updated_shm[t], &map_size, sizeof (size_t)); - t += sizeof (size_t); - memcpy (&updated_shm[t], &fsize, sizeof (int64_t)); - t += sizeof (int64_t); - switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (operation_mode == OPMODE_DECOMPRESS) - { - if (plugin->seek_request < 0) - return 0; - write_result = plugin_write (plugin, updated_shm, updated_shm_size); - if (write_result < updated_shm_size) - { - stop_process (plugin); - return 0; - } - } - return 1; - case EXTRACTOR_OPTION_IN_PROCESS: - if (operation_mode == OPMODE_DECOMPRESS) - { - plugin->fpos = position; - plugin->map_size = map_size; - plugin->fsize = fsize; - } - return 0; - case EXTRACTOR_OPTION_DISABLED: - return 0; - default: - return 1; - } -} - -/** - * Calls _extract_method of in-process plugin. - * - * @param plugin plugin context - * @param shm_ptr pointer to the data buffer - * @param proc metadata callback - * @param proc_cls callback cls - */ -static void -ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, void *shm_ptr, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) -{ - int extract_reply; - switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - return; - case EXTRACTOR_OPTION_IN_PROCESS: - if (plugin->seek_request >= 0) - { - plugin->shm_ptr = shm_ptr; - extract_reply = plugin->extract_method (plugin, proc, proc_cls); - /* Don't leak errno from the extract method */ - errno = 0; - if (extract_reply == 1) - plugin->seek_request = -1; - } - break; - case EXTRACTOR_OPTION_DISABLED: - return; - break; - } -} - -#if !WINDOWS -/** - * Receive @size bytes from plugin, store them in @buf - * - * @param plugin plugin context - * @param buf buffer to fill - * @param size number of bytes to read - * @return number of bytes read, 0 on EOS, < 0 on error - */ -int -plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) -{ - ssize_t read_result; - size_t read_count = 0; - while (read_count < size) - { - read_result = read (plugin->cpipe_out, &buf[read_count], size - read_count); - if (read_result <= 0) - return read_result; - read_count += read_result; - } - return read_count; -} -#else -/** - * Receive @size bytes from plugin, store them in @buf + * a buffer, since the data is already in memory. + * Will move the buffer, if necessary * * @param plugin plugin context - * @param buf buffer to fill - * @param size number of bytes to read - * @return number of bytes read, 0 on EOS, < 0 on error + * @param data location to store data pointer + * @param count number of bytes to read + * @return number of bytes (<= count) avalable in @data, -1 on error */ -int -plugin_read (struct EXTRACTOR_PluginList *plugin, unsigned char *buf, size_t size) +static int64_t +pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count) { - DWORD bytes_read; - BOOL bresult; - size_t read_count = 0; - while (read_count < size) + *data = NULL; + if (count > MAX_READ) + return -1; + if (count > plugin->map_size - plugin->shm_pos) { - bresult = ReadFile (plugin->cpipe_out, &buf[read_count], size - read_count, &bytes_read, NULL); - if (!bresult) + int64_t actual_count; + if (plugin->fpos + plugin->shm_pos != pl_seek (plugin, plugin->fpos + plugin->shm_pos, SEEK_SET)) return -1; - read_count += bytes_read; + *data = &plugin->shm_ptr[plugin->shm_pos]; + actual_count = (count < plugin->map_size - plugin->shm_pos) ? count : (plugin->map_size - plugin->shm_pos); + plugin->shm_pos += actual_count; + return actual_count; + } + else + { + *data = &plugin->shm_ptr[plugin->shm_pos]; + plugin->shm_pos += count; + return count; } - return read_count; } -#endif + /** - * Receive a reply from plugin (seek request, metadata and done message) + * Transmits information about updated shm to plugin. + * For OPMODE_DECOMPRESS only. * * @param plugin plugin context - * @param proc metadata callback - * @param proc_cls callback cls - * @return 0 on success, -1 on error + * @param position current absolute position in uncompressed stream + * @param map_size number of bytes that are available in shm + * @param fsize total size of the uncompressed stream (might be -1) + * @param operation_mode mode of operation + * @return 0 on success, 1 on error */ static int -receive_reply (struct EXTRACTOR_PluginList *plugin, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +give_shm_to_plugin (struct EXTRACTOR_PluginList *plugin, + int64_t position, + size_t map_size, int64_t fsize, + uint8_t operation_mode) { - int read_result; - unsigned char code; - int must_read = 1; - - int64_t seek_position; - struct IpcHeader hdr; - char *mime_type; - char *data; - - while (must_read) - { - read_result = plugin_read (plugin, &code, 1); - if (read_result < 1) - return -1; - switch (code) + int write_result; + int updated_shm_size = 1 + sizeof (int64_t) + sizeof (size_t) + sizeof (int64_t); + unsigned char updated_shm[updated_shm_size]; + int t = 0; + + updated_shm[t] = MESSAGE_UPDATED_SHM; + t++; + memcpy (&updated_shm[t], &position, sizeof (int64_t)); + t += sizeof (int64_t); + memcpy (&updated_shm[t], &map_size, sizeof (size_t)); + t += sizeof (size_t); + memcpy (&updated_shm[t], &fsize, sizeof (int64_t)); + t += sizeof (int64_t); + switch (plugin->flags) { - case MESSAGE_DONE: /* Done */ - plugin->seek_request = -1; - must_read = 0; - break; - case MESSAGE_SEEK: /* Seek */ - read_result = plugin_read (plugin, (unsigned char *) &seek_position, sizeof (int64_t)); - if (read_result < sizeof (int64_t)) - return -1; - plugin->seek_request = seek_position; - must_read = 0; - break; - case MESSAGE_META: /* Meta */ - read_result = plugin_read (plugin, (unsigned char *) &hdr, sizeof (hdr)); - if (read_result < sizeof (hdr)) /* FIXME: check hdr for sanity */ - return -1; - mime_type = malloc (hdr.mime_len + 1); - if (mime_type == NULL) - return -1; - read_result = plugin_read (plugin, (unsigned char *) mime_type, hdr.mime_len); - if (read_result < hdr.mime_len) - return -1; - mime_type[hdr.mime_len] = '\0'; - data = malloc (hdr.data_len); - if (data == NULL) - { - free (mime_type); - return -1; - } - read_result = plugin_read (plugin, (unsigned char *) data, hdr.data_len); - if (read_result < hdr.data_len) - { - free (mime_type); - free (data); - return -1; - } - read_result = proc (proc_cls, plugin->short_libname, hdr.meta_type, hdr.meta_format, mime_type, data, hdr.data_len); - free (mime_type); - free (data); - if (read_result != 0) - return 1; - break; + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (operation_mode == OPMODE_DECOMPRESS) + { + if (plugin->seek_request < 0) + return 0; + write_result = plugin_write (plugin, updated_shm, updated_shm_size); + if (write_result < updated_shm_size) + { + stop_process (plugin); + return 0; + } + } + return 1; + case EXTRACTOR_OPTION_IN_PROCESS: + if (operation_mode == OPMODE_DECOMPRESS) + { + plugin->fpos = position; + plugin->map_size = map_size; + plugin->fsize = fsize; + } + return 0; + case EXTRACTOR_OPTION_DISABLED: + return 0; default: - return -1; + return 1; } - } - return 0; } -#if !WINDOWS + /** - * Wait for one of the plugins to reply. - * Selects on plugin output pipes, runs receive_reply() - * on each activated pipe until it gets a seek request - * or a done message. Called repeatedly by the user until all pipes are dry or - * broken. + * Calls _extract_method of in-process plugin. * - * @param plugins to select upon + * @param plugin plugin context + * @param shm_ptr pointer to the data buffer * @param proc metadata callback * @param proc_cls callback cls - * @return number of dry/broken pipes since last call, -1 on error or if no - * plugins reply in 10 seconds. */ -static int -wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +static void +ask_in_process_plugin (struct EXTRACTOR_PluginList *plugin, + void *shm_ptr, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { - int ready; - int result; - struct timeval tv; - fd_set to_check; - int highest = 0; - int read_result; - struct EXTRACTOR_PluginList *ppos; - - FD_ZERO (&to_check); + int extract_reply; - for (ppos = plugins; NULL != ppos; ppos = ppos->next) - { - switch (ppos->flags) + switch (plugin->flags) { case EXTRACTOR_OPTION_DEFAULT_POLICY: case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (ppos->seek_request == -1) - continue; - FD_SET (ppos->cpipe_out, &to_check); - if (highest < ppos->cpipe_out) - highest = ppos->cpipe_out; - break; + return; case EXTRACTOR_OPTION_IN_PROCESS: + if (plugin->seek_request >= 0) + { + plugin->shm_ptr = shm_ptr; + extract_reply = plugin->extract_method (plugin, proc, proc_cls); + /* Don't leak errno from the extract method */ + errno = 0; + if (1 == extract_reply) + plugin->seek_request = -1; + } break; case EXTRACTOR_OPTION_DISABLED: + return; break; } - } +} - tv.tv_sec = 10; - tv.tv_usec = 0; - ready = select (highest + 1, &to_check, NULL, NULL, &tv); - if (ready <= 0) - /* an error or timeout -> something's wrong or all plugins hung up */ - return -1; - result = 0; - for (ppos = plugins; NULL != ppos; ppos = ppos->next) - { - switch (ppos->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (ppos->seek_request == -1) - continue; - if (FD_ISSET (ppos->cpipe_out, &to_check)) - { - read_result = receive_reply (ppos, proc, proc_cls); - if (read_result < 0) - { - stop_process (ppos); - } - result += 1; - } - break; - case EXTRACTOR_OPTION_IN_PROCESS: - break; - case EXTRACTOR_OPTION_DISABLED: - break; - } - } - return result; -} -#else /** - * Wait for one of the plugins to reply. - * Selects on plugin output pipes, runs receive_reply() - * on each activated pipe until it gets a seek request - * or a done message. Called repeatedly by the user until all pipes are dry or - * broken. - * This W32 version of wait_for_reply() can't select on more than 64 plugins - * at once (returns -1 if there are more than 64 plugins). + * Receive a reply from plugin (seek request, metadata and done message) * - * @param plugins to select upon + * @param plugin plugin context * @param proc metadata callback * @param proc_cls callback cls - * @return number of dry/broken pipes since last call, -1 on error or if no - * plugins reply in 10 seconds. + * @return 0 on success, -1 on error */ static int -wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +receive_reply (struct EXTRACTOR_PluginList *plugin, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { - int result; - DWORD ms; - DWORD first_ready; - DWORD dwresult; - DWORD bytes_read; - BOOL bresult; - int i; - HANDLE events[MAXIMUM_WAIT_OBJECTS]; - - - struct EXTRACTOR_PluginList *ppos; - - i = 0; - for (ppos = plugins; NULL != ppos; ppos = ppos->next) - { - if (i == MAXIMUM_WAIT_OBJECTS) - return -1; - if (ppos->seek_request == -1) - continue; - switch (ppos->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (WaitForSingleObject (ppos->ov_read.hEvent, 0) == WAIT_OBJECT_0) - { - ResetEvent (ppos->ov_read.hEvent); - bresult = ReadFile (ppos->cpipe_out, &i, 0, &bytes_read, &ppos->ov_read); - if (bresult == TRUE) - { - SetEvent (ppos->ov_read.hEvent); - } - else - { - DWORD err = GetLastError (); - if (err != ERROR_IO_PENDING) - SetEvent (ppos->ov_read.hEvent); - } - } - events[i] = ppos->ov_read.hEvent; - i++; - break; - case EXTRACTOR_OPTION_IN_PROCESS: - break; - case EXTRACTOR_OPTION_DISABLED: - break; - } - } - - ms = 10000; - first_ready = WaitForMultipleObjects (i, events, FALSE, ms); - if (first_ready == WAIT_TIMEOUT || first_ready == WAIT_FAILED) - /* an error or timeout -> something's wrong or all plugins hung up */ - return -1; + int read_result; + unsigned char code; + int64_t seek_position; + struct IpcHeader hdr; + char *mime_type; + char *data; + int must_read = 1; - i = 0; - result = 0; - for (ppos = plugins; NULL != ppos; ppos = ppos->next) - { - int read_result; - switch (ppos->flags) + while (must_read) { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - if (ppos->seek_request == -1) - continue; - if (i < first_ready) - { - i += 1; - continue; - } - dwresult = WaitForSingleObject (ppos->ov_read.hEvent, 0); - read_result = 0; - if (dwresult == WAIT_OBJECT_0) - { - read_result = receive_reply (ppos, proc, proc_cls); - result += 1; - } - if (dwresult == WAIT_FAILED || read_result < 0) - { - stop_process (ppos); - if (dwresult == WAIT_FAILED) - result += 1; - } - i++; - break; - case EXTRACTOR_OPTION_IN_PROCESS: - break; - case EXTRACTOR_OPTION_DISABLED: - break; + read_result = plugin_read (plugin, &code, 1); + if (read_result < 1) + return -1; + switch (code) + { + case MESSAGE_DONE: /* Done */ + plugin->seek_request = -1; + must_read = 0; + break; + case MESSAGE_SEEK: /* Seek */ + read_result = plugin_read (plugin, + &seek_position, sizeof (int64_t)); + if (read_result < sizeof (int64_t)) + return -1; + plugin->seek_request = seek_position; + must_read = 0; + break; + case MESSAGE_META: /* Meta */ + read_result = plugin_read (plugin, + &hdr, sizeof (hdr)); + if (read_result < sizeof (hdr)) + return -1; + /* FIXME: check hdr for sanity */ + if (hdr.data_len > MAX_META_DATA) + return -1; /* not allowing more than MAX_META_DATA meta data */ + if (0 == hdr.mime_len) + { + mime_type = NULL; + } + else + { + if (NULL == (mime_type = malloc (hdr.mime_len))) + return -1; + read_result = plugin_read (plugin, + mime_type, + hdr.mime_len); + if ( (read_result < hdr.mime_len) || + ('\0' != mime_type[hdr.mime_len-1]) ) + { + if (NULL != mime_type) + free (mime_type); + return -1; + } + } + if (0 == hdr.data_len) + { + data = NULL; + } + else + { + if (NULL == (data = malloc (hdr.data_len))) + { + if (NULL != mime_type) + free (mime_type); + return -1; + } + read_result = plugin_read (plugin, + data, hdr.data_len); + if (read_result < hdr.data_len) + { + if (NULL != mime_type) + free (mime_type); + free (data); + return -1; + } + } + read_result = proc (proc_cls, + plugin->short_libname, + hdr.meta_type, hdr.meta_format, + mime_type, data, hdr.data_len); + if (NULL != mime_type) + free (mime_type); + if (NULL != data) + free (data); + if (0 != read_result) + return 1; + break; + default: + return -1; + } } - } - return result; + return 0; } -#endif /** * Checks the seek requests that plugins made, finds the one with @@ -3016,53 +1120,59 @@ wait_for_reply (struct EXTRACTOR_PluginList *plugins, EXTRACTOR_MetaDataProcesso * @return new stream position, -1 on error */ static int64_t -seek_to_new_position (struct EXTRACTOR_PluginList *plugins, struct CompressedFileSource *cfs, int64_t current_position, int64_t map_size) +seek_to_new_position (struct EXTRACTOR_PluginList *plugins, + struct CompressedFileSource *cfs, + int64_t current_position, + int64_t map_size) { int64_t min_pos = current_position + map_size; int64_t min_plugin_pos = 0x7FFFFFFFFFFFFFF; struct EXTRACTOR_PluginList *ppos; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) - { - switch (ppos->flags) { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - case EXTRACTOR_OPTION_IN_PROCESS: - if (ppos->seek_request >= 0 && ppos->seek_request <= min_pos) - min_pos = ppos->seek_request; - if (ppos->seek_request >= 0 && ppos->seek_request <= min_plugin_pos) - min_plugin_pos = ppos->seek_request; - break; - case EXTRACTOR_OPTION_DISABLED: - break; + switch (ppos->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + case EXTRACTOR_OPTION_IN_PROCESS: + if (ppos->seek_request >= 0 && ppos->seek_request <= min_pos) + min_pos = ppos->seek_request; + if (ppos->seek_request >= 0 && ppos->seek_request <= min_plugin_pos) + min_plugin_pos = ppos->seek_request; + break; + case EXTRACTOR_OPTION_DISABLED: + break; + } } - } if (min_plugin_pos == 0x7FFFFFFFFFFFFFF) return -1; if (min_pos < current_position - map_size) - { - if (1 != cfs_reset_stream (cfs)) - return -1; - return 0; - } + { + if (1 != cfs_reset_stream (cfs)) + return -1; + return 0; + } return cfs_seek (cfs, min_pos); } + static void load_in_process_plugin (struct EXTRACTOR_PluginList *plugin) { switch (plugin->flags) - { - case EXTRACTOR_OPTION_DEFAULT_POLICY: - case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: - case EXTRACTOR_OPTION_DISABLED: - break; - case EXTRACTOR_OPTION_IN_PROCESS: - plugin_load (plugin); - break; - } + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + case EXTRACTOR_OPTION_DISABLED: + break; + case EXTRACTOR_OPTION_IN_PROCESS: + EXTRACTOR_plugin_load_ (plugin); + break; + } } + /** * Extract keywords using the given set of plugins. * @@ -3076,7 +1186,13 @@ load_in_process_plugin (struct EXTRACTOR_PluginList *plugin) * @param proc_cls cls argument to proc */ static void -do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, const char *filename, struct CompressedFileSource *cfs, int64_t fsize, EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +do_extract (struct EXTRACTOR_PluginList *plugins, + const char *data, + int fd, + const char *filename, + struct CompressedFileSource *cfs, + int64_t fsize, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) { int operation_mode; int plugin_count = 0; @@ -3258,155 +1374,49 @@ do_extract (struct EXTRACTOR_PluginList *plugins, const char *data, int fd, cons */ void EXTRACTOR_extract (struct EXTRACTOR_PluginList *plugins, - const char *filename, - const void *data, - size_t size, - EXTRACTOR_MetaDataProcessor proc, - void *proc_cls) + const char *filename, + const void *data, + size_t size, + EXTRACTOR_MetaDataProcessor proc, + void *proc_cls) { - int fd = -1; - struct stat64 fstatbuf; - int64_t fsize = 0; - enum ExtractorCompressionType compression_type = -1; - struct CompressedFileSource *cfs = NULL; - - /* If data is not given, then we need to read it from the file. Try opening it */ - if ((data == NULL) && - (filename != NULL) && - (0 == STAT64(filename, &fstatbuf)) && - (!S_ISDIR(fstatbuf.st_mode)) && - (-1 != (fd = file_open (filename, - O_RDONLY | O_LARGEFILE)))) - { - /* Empty files are of no interest */ - fsize = fstatbuf.st_size; - if (fsize == 0) - { - close(fd); - return; - } - } + struct EXTRACTOR_Datasource *datasource; - /* Data is not given, and we've failed to open the file with data -> exit */ - if ((fsize == 0) && (data == NULL)) - return; - /* fsize is now size of the data OR size of the file */ - if (data != NULL) - fsize = size; - - errno = 0; - /* Peek at first few bytes of the file (or of the data), and see if it's compressed. */ - compression_type = get_compression_type (data, fd, fsize); - if (compression_type < 0) - { - /* errno is set by get_compression_type () */ - if (fd != -1) - close (fd); - return; - } - - struct BufferedFileDataSource *bfds; - bfds = bfds_new (data, fd, fsize); - if (bfds == NULL) + if (NULL == filename) + datasource = EXTRACTOR_datasource_create_from_buffer_ (data, size); + else + datasource = EXTRACTOR_datasource_create_from_file_ (filename); + if (NULL == datasource) return; - - if (compression_type > 0) - { - int icr = 0; - /* Set up a decompressor. - * Will also report compression-related metadata to the caller. - */ - cfs = cfs_new (bfds, fsize, compression_type, proc, proc_cls); - if (cfs == NULL) - { - if (fd != -1) - close (fd); - errno = EILSEQ; - return; - } - icr = cfs_init_decompressor (cfs, proc, proc_cls); - if (icr < 0) - { - if (fd != -1) - close (fd); - errno = EILSEQ; - return; - } - else if (icr == 0) - { - if (fd != -1) - close (fd); - errno = 0; - return; - } - } - - /* do_extract () might set errno itself, but from our point of view everything is OK */ - errno = 0; - - do_extract (plugins, data, fd, filename, cfs, fsize, proc, proc_cls); - if (cfs != NULL) - { - cfs_deinit_decompressor (cfs); - cfs_delete (cfs); - } - bfds_delete (bfds); - if (-1 != fd) - close(fd); + do_extract (plugins, datasource, proc, proc_cls); + EXTRACTOR_datasource_destroy_ (datasource); } -#if WINDOWS -void CALLBACK -RundllEntryPoint (HWND hwnd, - HINSTANCE hinst, - LPSTR lpszCmdLine, - int nCmdShow) -{ - intptr_t in_h; - intptr_t out_h; - int in, out; - - sscanf(lpszCmdLine, "%lu %lu", &in_h, &out_h); - in = _open_osfhandle (in_h, _O_RDONLY); - out = _open_osfhandle (out_h, 0); - setmode (in, _O_BINARY); - setmode (out, _O_BINARY); - plugin_main (read_plugin_data (in), - in, out); -} - -void CALLBACK -RundllEntryPointA (HWND hwnd, - HINSTANCE hinst, - LPSTR lpszCmdLine, - int nCmdShow) -{ - return RundllEntryPoint(hwnd, hinst, lpszCmdLine, nCmdShow); -} -#endif - /** * Initialize gettext and libltdl (and W32 if needed). */ -void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() { +void __attribute__ ((constructor)) +EXTRACTOR_ltdl_init () +{ int err; #if ENABLE_NLS - BINDTEXTDOMAIN(PACKAGE, LOCALEDIR); - BINDTEXTDOMAIN("iso-639", ISOLOCALEDIR); /* used by wordextractor */ + BINDTEXTDOMAIN (PACKAGE, LOCALEDIR); + BINDTEXTDOMAIN ("iso-639", ISOLOCALEDIR); /* used by wordextractor */ #endif err = lt_dlinit (); - if (err > 0) { + if (err > 0) + { #if DEBUG - fprintf(stderr, - _("Initialization of plugin mechanism failed: %s!\n"), - lt_dlerror()); + fprintf (stderr, + _("Initialization of plugin mechanism failed: %s!\n"), + lt_dlerror ()); #endif - return; - } + return; + } #if WINDOWS - plibc_init("GNU", PACKAGE); + plibc_init ("GNU", PACKAGE); #endif } @@ -3414,9 +1424,10 @@ void __attribute__ ((constructor)) EXTRACTOR_ltdl_init() { /** * Deinit. */ -void __attribute__ ((destructor)) EXTRACTOR_ltdl_fini() { +void __attribute__ ((destructor)) +EXTRACTOR_ltdl_fini () { #if WINDOWS - plibc_shutdown(); + plibc_shutdown (); #endif lt_dlexit (); } diff --git a/src/main/extractor_datasource.c b/src/main/extractor_datasource.c @@ -0,0 +1,1041 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + +#include "platform.h" + +#if HAVE_LIBBZ2 +#include <bzlib.h> +#endif + +#if HAVE_ZLIB +#include <zlib.h> +#endif + +#ifndef O_LARGEFILE +#define O_LARGEFILE 0 +#endif + +/** + * Maximum size of an IO buffer. + */ +#define MAX_READ (4 * 1024 * 1024) + + +#if HAVE_ZLIB +#define MIN_ZLIB_HEADER 12 +#endif +#if HAVE_LIBBZ2 +#define MIN_BZ2_HEADER 4 +#endif +#if !defined (MIN_COMPRESSED_HEADER) && HAVE_ZLIB +#define MIN_COMPRESSED_HEADER MIN_ZLIB_HEADER +#endif +#if !defined (MIN_COMPRESSED_HEADER) && HAVE_LIBBZ2 +#define MIN_COMPRESSED_HEADER MIN_BZ2_HEADER +#endif +#if !defined (MIN_COMPRESSED_HEADER) +#define MIN_COMPRESSED_HEADER -1 +#endif + +#define COMPRESSED_DATA_PROBE_SIZE 3 + +/** + * Enum with the various possible types of compression supported. + */ +enum ExtractorCompressionType +{ + /** + * We cannot tell from the data (header incomplete). + */ + COMP_TYPE_UNDEFINED = -1, + + /** + * Invalid header (likely uncompressed) + */ + COMP_TYPE_INVALID = 0, + + /** + * libz / gzip compression. + */ + COMP_TYPE_ZLIB = 1, + + /** + * bz2 compression + */ + COMP_TYPE_BZ2 = 2 +}; + + +/** + * Abstraction of the data source (file or a memory buffer) + * for the decompressor. + */ +struct BufferedFileDataSource +{ + /** + * Pointer to the buffer to read from (may be NULL) + */ + const void *data; + + /** + * A buffer to read into. For fd != -1: when data != NULL, + * data is used directly. + */ + void *buffer; + + /** + * Size of the file (or the data buffer) + */ + uint64_t fsize; + + /** + * Position within the file or the data buffer + */ + uint64_t fpos; + + /** + * Position within the buffer. + */ + uint64_t buffer_pos; + + /** + * Number of bytes in the buffer (<= buffer_size) + */ + uint64_t buffer_bytes; + + /** + * Allocated size of the buffer + */ + uint64_t buffer_size; + + /** + * Descriptor of the file to read data from (may be -1) + */ + int fd; + +}; + + +/** + * An object from which uncompressed data can be read + */ +struct CompressedFileSource +{ + /** + * The source of data + */ + struct BufferedFileDataSource *bfds; + + /** + * Size of the source (same as bfds->fsize) + */ + int64_t fsize; + + /** + * Position within the source + */ + int64_t fpos; + + /** + * Total size of the uncompressed data. Remains -1 until + * decompression is finished. + */ + int64_t uncompressed_size; + +#if HAVE_LIBBZ2 + /** + * BZ2 stream object + */ + bz_stream bstrm; +#endif + +#if HAVE_ZLIB + /** + * ZLIB stream object + */ + z_stream strm; + + /** + * Length of gzip header (may be 0, in that case ZLIB parses the header) + */ + int gzip_header_length; +#endif + + /** + * The type of compression used in the source + */ + enum ExtractorCompressionType compression_type; + +}; + + +/** + * Makes bfds seek to 'pos' and read a chunk of bytes there. + * Changes bfds->fpos, bfds->buffer_bytes and bfds->buffer_pos. + * Does almost nothing for memory-backed bfds. + * + * @param bfds bfds + * @param pos position + * @return 0 on success, -1 on error + */ +static int +bfds_pick_next_buffer_at (struct BufferedFileDataSource *bfds, + uint64_t pos) +{ + int64_t position; + ssize_t rd; + + if (pos > bfds->fsize) + return -1; /* invalid */ + if (NULL == bfds->buffer) + { + bfds->buffer_bytes = bfds->fsize; + return 0; + } +#if WINDOWS + position = _lseeki64 (bfds->fd, pos, SEEK_SET); +#elif HAVE_LSEEK64 + position = lseek64 (bfds->fd, pos, SEEK_SET); +#else + position = (int64_t) lseek (bfds->fd, pos, SEEK_SET); +#endif + if (position < 0) + return -1; + bfds->fpos = position; + rd = read (bfds->fd, bfds->buffer, bfds->buffer_size); + if (rd < 0) + return -1; + bfds->buffer_bytes = rd; + return 0; +} + + +/** + * Creates a bfds + * + * @param data data buffer to use as a source (NULL if fd != -1) + * @param fd file descriptor to use as a source (-1 if data != NULL) + * @param fsize size of the file (or the buffer) + * @return newly allocated bfds + */ +static struct BufferedFileDataSource * +bfds_new (const void *data, + int fd, + int64_t fsize) +{ + struct BufferedFileDataSource *result; + size_t xtra; + + if (fsize > MAX_READ) + xtra = MAX_READ; + else + xtra = (size_t) fsize; + if ( (-1 == fd) && (NULL == data) ) + return NULL; + if ( (-1 != fd) && (NULL != data) ) + fd = -1; /* don't need fd */ + if (NULL != data) + xtra = 0; + if (NULL == (result = malloc (sizeof (struct BufferedFileDataSource) + xtra))) + return NULL; + memset (result, 0, sizeof (struct BufferedFileDataSource)); + result->data = (NULL != data) ? data : &result[1]; + result->buffer = (NULL != data) ? NULL : &result[1]; + result->buffer_size = (NULL != data) ? fsize : xtra; + result->fsize = fsize; + result->fd = fd; + bfds_pick_next_buffer_at (result, 0); + return result; +} + + +/** + * Unallocates bfds + * + * @param bfds bfds to deallocate + */ +static void +bfds_delete (struct BufferedFileDataSource *bfds) +{ + if (NULL != bfds->buffer) + free (bfds->buffer); + free (bfds); +} + + +/** + * Makes bfds seek to 'pos' in 'whence' mode. + * Will try to seek within the buffer, will move the buffer location if + * the seek request falls outside of the buffer range. + * + * @param bfds bfds + * @param pos position to seek to + * @param whence one of the seek constants (SEEK_CUR, SEEK_SET, SEEK_END) + * @return new absolute position, -1 on error + */ +static int64_t +bfds_seek (struct BufferedFileDataSource *bfds, + int64_t pos, int whence) +{ + switch (whence) + { + case SEEK_CUR: + if (NULL != bfds->buffer) + { + if (0 != bfds_pick_next_buffer_at (bfds, + bfds->fpos + bfds->buffer_pos + pos)) + return -1; + bfds->buffer_pos = 0; + return bfds->fpos; + } + bfds->buffer_pos += pos; + return bfds->buffer_pos; + case SEEK_SET: + if (pos < 0) + return -1; + if (NULL != bfds->buffer) + { + if (0 != bfds_pick_next_buffer_at (bfds, pos)) + return -1; + bfds->buffer_pos = 0; + return bfds->fpos; + } + bfds->buffer_pos = pos; + return bfds->buffer_pos; + case SEEK_END: + if (NULL != bfds->buffer) + { + if (0 != bfds_pick_next_buffer_at (bfds, bfds->fsize + pos)) + return -1; + bfds->buffer_pos = 0; + return bfds->fpos; + } + bfds->buffer_pos = bfds->fsize + pos; + return bfds->buffer_pos; + } + return -1; +} + + +/** + * Fills 'buf_ptr' with a chunk of data. + * Will seek if necessary. Will fail if 'count' exceeds buffer size. + * + * @param bfds bfds + * @param buf_ptr location to store data + * @param count number of bytes to read + * @return number of bytes (<= count) available at location pointed by buf_ptr + */ +static ssize_t +bfds_read (struct BufferedFileDataSource *bfds, + void *buf_ptr, + size_t count) +{ + if (count > MAX_READ) + return -1; + if (count > bfds->buffer_bytes - bfds->buffer_pos) + { + if (bfds->fpos + bfds->buffer_pos != bfds_seek (bfds, bfds->fpos + bfds->buffer_pos, SEEK_SET)) + return -1; + if (NULL != bfds->buffer) + { + *buf_ptr = &bfds->buffer[bfds->buffer_pos]; + bfds->buffer_pos += count < bfds->buffer_bytes ? count : bfds->buffer_bytes; + return (count < bfds->buffer_bytes ? count : bfds->buffer_bytes); + } + else + { + int64_t ret = count < (bfds->buffer_bytes - bfds->buffer_pos) ? count : (bfds->buffer_bytes - bfds->buffer_pos); + *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos]; + bfds->buffer_pos += ret; + return ret; + } + } + else + { + if (NULL != bfds->buffer) + *buf_ptr = &bfds->buffer[bfds->buffer_pos]; + else + *buf_ptr = (unsigned char*) &bfds->data[bfds->buffer_pos]; + bfds->buffer_pos += count; + return count; + } +} + + +/** + * Release resources of a compressed data source. + * + * @param cfs compressed data source to free + */ +static void +cfs_delete (struct CompressedFileSource *cfs) +{ + free (cfs); +} + + +/** + * Reset gz-compressed data stream to the beginning. + * + * @return 1 on success, 0 if we failed to seek, + * -1 on decompressor initialization failure + */ +static int +cfs_reset_stream_zlib (struct CompressedFileSource *cfs) +{ + if (cfs->gzip_header_length != + bfds_seek (cfs->bfds, cfs->gzip_header_length, SEEK_SET)) + return 0; + cfs->strm.next_in = NULL; + cfs->strm.avail_in = 0; + cfs->strm.total_in = 0; + cfs->strm.zalloc = NULL; + cfs->strm.zfree = NULL; + cfs->strm.opaque = NULL; + + /* + * note: maybe plain inflateInit(&strm) is adequate, + * it looks more backward-compatible also ; + * + * ZLIB_VERNUM isn't defined by zlib version 1.1.4 ; + * there might be a better check. + */ + if (Z_OK != inflateInit2 (&cfs->strm, +#ifdef ZLIB_VERNUM + 15 + 32 +#else + -MAX_WBITS +#endif + )) + { + return -1; + } + cfs->fpos = cfs->gzip_header_length; + cfs->shm_pos = 0; + cfs->shm_buf_size = 0; + return 1; +} + + +/** + * Reset bz2-compressed data stream to the beginning. + * + * @return 1 on success, 0 if we failed to seek, + * -1 on decompressor initialization failure + */ +static int +cfs_reset_stream_bz2 (struct CompressedFileSource *cfs) +{ + /* not implemented */ + return -1; +} + + +/** + * Resets the compression stream to begin uncompressing + * from the beginning. Used at initialization time, and when + * seeking backward. + * + * @param cfs cfs to reset + * @return 1 on success, , 0 if we failed to seek, + * -1 on error + */ +static int +cfs_reset_stream (struct CompressedFileSource *cfs) +{ + switch (cfs->compression_type) + { + case COMP_TYPE_ZLIB: + return cfs_reset_stream_zlib (cfs); + case COMP_TYPE_BZ2: + return cfs_reset_stream_bz2 (cfs); + default: + return -1; + } +} + + +/** + * Initializes gz-decompression object. Might report metadata about + * compresse stream, if available. Resets the stream to the beginning. + * + * @param cfs cfs to initialize + * @param proc callback for metadata + * @param proc_cls callback cls + * @return 1 on success, -1 on error + */ +static int +cfs_init_decompressor_zlib (struct CompressedFileSource *cfs, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + /* Process gzip header */ + unsigned int gzip_header_length = 10; + unsigned char data[12]; + int64_t buf_bytes; + int len; + unsigned char *buf; + unsigned char *cptr; + + if (sizeof (data) > bfds_read (cfs->bfds, data, sizeof (data))) + return -1; + + if (0 != (data[3] & 0x4)) /* FEXTRA set */ + gzip_header_length += 2 + (unsigned) (data[10] & 0xff) + + (((unsigned) (data[11] & 0xff)) * 256); + + if (0 != (data[3] & 0x8)) /* FNAME set */ + { + if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) + return -1; + buf_bytes = bfds_read (cfs->bfds, &buf, 1024); + if (buf_bytes <= 0) + return -1; + cptr = buf; + + len = 0; + /* stored file name is here */ + while (len < buf_bytes) + { + if ('\0' == *cptr) + break; + cptr++; + len++; + } + + if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_FILENAME, + EXTRACTOR_METAFORMAT_C_STRING, "text/plain", + (const char *) buf, + len)) + return 0; /* done */ + + /* FIXME: check for correctness */ + //gzip_header_length = (cptr - data) + 1; + gzip_header_length += len + 1; + } + + if (0 != (data[3] & 0x16)) /* FCOMMENT set */ + { + int64_t buf_bytes; + int len; + unsigned char *buf; + unsigned char *cptr; + + if (gzip_header_length > bfds_seek (cfs->bfds, gzip_header_length, SEEK_SET)) + return -1; + buf_bytes = bfds_read (cfs->bfds, &buf, 1024); + if (buf_bytes <= 0) + return -1; + cptr = buf; + + len = 0; + /* stored file name is here */ + while (len < buf_bytes) + { + if ('\0' == *cptr) + break; + cptr++; + len++; + } + + if (0 != proc (proc_cls, "<zlib>", EXTRACTOR_METATYPE_COMMENT, + EXTRACTOR_METAFORMAT_C_STRING, "text/plain", + (const char *) buf, + len)) + return 0; /* done */ + + /* FIXME: check for correctness */ + //gzip_header_length = (cptr - data) + 1; + gzip_header_length += len + 1; + } + + if (data[3] & 0x2) /* FCHRC set */ + gzip_header_length += 2; + + memset (&cfs->strm, 0, sizeof (z_stream)); + +#ifdef ZLIB_VERNUM + gzip_header_length = 0; +#endif + + cfs->gzip_header_length = gzip_header_length; + return cfs_reset_stream_zlib (cfs); +} + + +/** + * Initializes bz2-decompression object. Might report metadata about + * compresse stream, if available. Resets the stream to the beginning. + * + * @param cfs cfs to initialize + * @param proc callback for metadata + * @param proc_cls callback cls + * @return 1 on success, -1 on error + */ +static int +cfs_init_decompressor_bz2 (struct CompressedFileSource *cfs, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + return -1; +} + + +/** + * Initializes decompression object. Might report metadata about + * compresse stream, if available. Resets the stream to the beginning. + * + * @param cfs cfs to initialize + * @param proc callback for metadata + * @param proc_cls callback cls + * @return 1 on success, -1 on error + */ +static int +cfs_init_decompressor (struct CompressedFileSource *cfs, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + switch (cfs->compression_type) + { + case COMP_TYPE_ZLIB: + return cfs_init_decompressor_zlib (cfs, proc, proc_cls); + case COMP_TYPE_BZ2: + return cfs_init_decompressor_bz2 (cfs, proc, proc_cls); + default: + return -1; + } +} + + +/** + * Deinitializes gz-decompression object. + * + * @param cfs cfs to deinitialize + * @return 1 on success, -1 on error + */ +static int +cfs_deinit_decompressor_zlib (struct CompressedFileSource *cfs) +{ + inflateEnd (&cfs->strm); + return 1; +} + + +/** + * Deinitializes bz2-decompression object. + * + * @param cfs cfs to deinitialize + * @return 1 on success, -1 on error + */ +static int +cfs_deinit_decompressor_bz2 (struct CompressedFileSource *cfs) +{ + return -1; +} + + +/** + * Deinitializes decompression object. + * + * @param cfs cfs to deinitialize + * @return 1 on success, -1 on error + */ +static int +cfs_deinit_decompressor (struct CompressedFileSource *cfs) +{ + switch (cfs->compression_type) + { + case COMP_TYPE_ZLIB: + return cfs_deinit_decompressor_zlib (cfs); + case COMP_TYPE_BZ2: + return cfs_deinit_decompressor_bz2 (cfs); + default: + return -1; + } +} + + +/** + * Allocates and initializes new cfs object. + * + * @param bfds data source to use + * @param fsize size of the source + * @param compression_type type of compression used + * @param proc metadata callback + * @param proc_cls callback cls + * @return newly allocated cfs on success, NULL on error + */ +struct CompressedFileSource * +cfs_new (struct BufferedFileDataSource *bfds, + int64_t fsize, + enum ExtractorCompressionType compression_type, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int shm_result; + struct CompressedFileSource *cfs; + + if (NULL == (cfs = malloc (sizeof (struct CompressedFileSource)))) + return NULL; + memset (cfs, 0, sizeof (struct CompressedFileSource)); + cfs->compression_type = compression_type; + cfs->bfds = bfds; + cfs->fsize = fsize; + cfs->uncompressed_size = -1; + return cfs; +} + + +/** + * Data is read from the source and shoved into decompressor + * in chunks this big. + */ +#define COM_CHUNK_SIZE (10*1024) + + +/** + * Re-fills shm with new uncompressed data, preserving the last + * 'preserve' bytes of existing data as the first 'preserve' bytes + * of the new data. + * Does the actual decompression. Will set uncompressed_size on + * the end of compressed stream. + * + * @param cfds cfs to read from + * @param preserve number of bytes to preserve (0 to discard all old data) + * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 on error + */ +static int +cfs_read_zlib (struct CompressedFileSource *cfs, int64_t preserve) +{ + int ret; + int64_t rc = preserve; + int64_t total = cfs->strm.total_out; + + if (preserve > 0) + memmove (cfs->shm_ptr, &((unsigned char *)cfs->shm_ptr)[0], preserve); + + while (rc < cfs->shm_size && ret != Z_STREAM_END) + { + if (cfs->strm.avail_in == 0) + { + int64_t count = bfds_read (cfs->bfds, &cfs->strm.next_in, COM_CHUNK_SIZE); + if (count <= 0) + return 0; + cfs->strm.avail_in = (uInt) count; + } + cfs->strm.next_out = &((unsigned char *)cfs->shm_ptr)[rc]; + cfs->strm.avail_out = cfs->shm_size - rc; + ret = inflate (&cfs->strm, Z_SYNC_FLUSH); + if (ret != Z_OK && ret != Z_STREAM_END) + return 0; + rc = cfs->strm.total_out - total; + } + if (ret == Z_STREAM_END) + cfs->uncompressed_size = cfs->strm.total_out; + cfs->shm_pos = preserve; + cfs->shm_buf_size = rc + preserve; + return 1; +} + + +/** + * Re-fills shm with new uncompressed data, preserving the last + * 'preserve' bytes of existing data as the first 'preserve' bytes + * of the new data. + * Does the actual decompression. Will set uncompressed_size on + * the end of compressed stream. + * + * @param cfds cfs to read from + * @param preserve number of bytes to preserve (0 to discard all old data) + * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 on error + */ +static int +cfs_read_bz2 (struct CompressedFileSource *cfs, int64_t preserve) +{ + return -1; +} + + +/** + * Re-fills shm with new uncompressed data, preserving the last + * 'preserve' bytes of existing data as the first 'preserve' bytes + * of the new data. + * Does the actual decompression. Will set uncompressed_size on + * the end of compressed stream. + * + * @param cfds cfs to read from + * @param preserve number of bytes to preserve (0 to discard all old data) + * @return number of bytes in shm. 0 if no more data can be uncompressed, -1 on error + */ +static int64_t +cfs_read (struct CompressedFileSource *cfs, int64_t preserve) +{ + switch (cfs->compression_type) + { + case COMP_TYPE_ZLIB: + return cfs_read_zlib (cfs, preserve); + case COMP_TYPE_BZ2: + return cfs_read_bz2 (cfs, preserve); + default: + return -1; + } +} + + +/** + * Moves the buffer to 'position' in uncompressed steam. If position + * requires seeking backwards beyond the boundaries of the buffer, resets the + * stream and repeats decompression from the beginning to 'position'. + * + * @param cfds cfs to seek on + * @param position new starting point for the buffer + * @return new absolute buffer position, -1 on error or EOS + */ +static int64_t +cfs_seek_zlib (struct CompressedFileSource *cfs, int64_t position) +{ + int64_t ret; + + if (position > cfs->strm.total_out - cfs->shm_buf_size && position < cfs->strm.total_out) + { + ret = cfs_read (cfs, cfs->strm.total_out - position); + if (ret < 0) + return ret; + return position; + } + while (position >= cfs->strm.total_out) + { + if (0 > (ret = cfs_read (cfs, 0))) + return ret; + if (ret == 0) + return position; + } + if (position < cfs->strm.total_out && position > cfs->strm.total_out - cfs->shm_buf_size) + return cfs->strm.total_out - cfs->shm_buf_size; + return -1; +} + + +/** + * Moves the buffer to 'position' in uncompressed steam. If position + * requires seeking backwards beyond the boundaries of the buffer, resets the + * stream and repeats decompression from the beginning to 'position'. + * + * @param cfds cfs to seek on + * @param position new starting point for the buffer + * @return new absolute buffer position, -1 on error or EOS + */ +static int64_t +cfs_seek_bz2 (struct CompressedFileSource *cfs, int64_t position) +{ + return -1; +} + + +/** + * Moves the buffer to 'position' in uncompressed steam. If position + * requires seeking backwards beyond the boundaries of the buffer, resets the + * stream and repeats decompression from the beginning to 'position'. + * + * @param cfds cfs to seek on + * @param position new starting point for the buffer + * @return new absolute buffer position, -1 on error or EOS + */ +static int64_t +cfs_seek (struct CompressedFileSource *cfs, int64_t position) +{ + switch (cfs->compression_type) + { + case COMP_TYPE_ZLIB: + return cfs_seek_zlib (cfs, position); + case COMP_TYPE_BZ2: + return cfs_seek_bz2 (cfs, position); + default: + return -1; + } +} + + +/** + * Detect if we have compressed data on our hands. + * + * @param data pointer to a data buffer or NULL (in case fd is not -1) + * @param fd a file to read data from, or -1 (if data is not NULL) + * @param fsize size of data (if data is not NULL) or of file (if fd is not -1) + * @return -1 to indicate an error, 0 to indicate uncompressed data, or a type (> 0) of compression + */ +static enum ExtractorCompressionType +get_compression_type (const unsigned char *data, + int fd, + int64_t fsize) +{ + void *read_data = NULL; + size_t read_data_size = 0; + ssize_t read_result; + enum ExtractorCompressionType result = COMP_TYPE_INVALID; + + if ((MIN_COMPRESSED_HEADER < 0) || (fsize < MIN_COMPRESSED_HEADER)) + { + return COMP_TYPE_INVALID; + } + if (data == NULL) + { + int64_t position; + read_data_size = COMPRESSED_DATA_PROBE_SIZE; + read_data = malloc (read_data_size); + if (read_data == NULL) + return -1; +#if WINDOWS + position = _lseeki64 (fd, 0, SEEK_CUR); +#elif HAVE_LSEEK64 + position = lseek64 (fd, 0, SEEK_CUR); +#else + position = (int64_t) lseek (fd, 0, SEEK_CUR); +#endif + read_result = READ (fd, read_data, read_data_size); +#if WINDOWS + position = _lseeki64 (fd, position, SEEK_SET); +#elif HAVE_LSEEK64 + position = lseek64 (fd, position, SEEK_SET); +#else + position = lseek (fd, (off_t) position, SEEK_SET); +#endif + if (read_result != read_data_size) + { + free (read_data); + return COMP_TYPE_UNDEFINED; + } + data = (const void *) read_data; + } +#if HAVE_ZLIB + if ((fsize >= MIN_ZLIB_HEADER) && (data[0] == 0x1f) && (data[1] == 0x8b) && (data[2] == 0x08)) + result = COMP_TYPE_ZLIB; +#endif +#if HAVE_LIBBZ2 + if ((fsize >= MIN_BZ2_HEADER) && (data[0] == 'B') && (data[1] == 'Z') && (data[2] == 'h')) + result = COMP_TYPE_BZ2; +#endif + if (read_data != NULL) + free (read_data); + return result; +} + + +#if 0 + + enum ExtractorCompressionType compression_type = -1; + struct CompressedFileSource *cfs = NULL; + int fd = -1; + struct stat64 fstatbuf; + int64_t fsize = 0; + + /* If data is not given, then we need to read it from the file. Try opening it */ + if ((data == NULL) && + (filename != NULL) && + (0 == STAT64(filename, &fstatbuf)) && + (!S_ISDIR(fstatbuf.st_mode)) && + (-1 != (fd = file_open (filename, + O_RDONLY | O_LARGEFILE)))) + { + /* Empty files are of no interest */ + fsize = fstatbuf.st_size; + if (fsize == 0) + { + close(fd); + return; + } + } + + /* Data is not given, and we've failed to open the file with data -> exit */ + if ((fsize == 0) && (data == NULL)) + return; + /* fsize is now size of the data OR size of the file */ + if (data != NULL) + fsize = size; + + errno = 0; + + /* Peek at first few bytes of the file (or of the data), and see if it's compressed. */ + compression_type = get_compression_type (data, fd, fsize); + if (compression_type < 0) + { + /* errno is set by get_compression_type () */ + if (fd != -1) + close (fd); + return; + } + + struct BufferedFileDataSource *bfds; + bfds = bfds_new (data, fd, fsize); + if (bfds == NULL) + return; + + if (compression_type > 0) + { + int icr = 0; + /* Set up a decompressor. + * Will also report compression-related metadata to the caller. + */ + cfs = cfs_new (bfds, fsize, compression_type, proc, proc_cls); + if (cfs == NULL) + { + if (fd != -1) + close (fd); + errno = EILSEQ; + return; + } + icr = cfs_init_decompressor (cfs, proc, proc_cls); + if (icr < 0) + { + if (fd != -1) + close (fd); + errno = EILSEQ; + return; + } + else if (icr == 0) + { + if (fd != -1) + close (fd); + errno = 0; + return; + } + } + + +#endif + + + +/** + * Destroy a data source. + * + * @param datasource source to destroy + */ +void +EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *datasource) +{ + if (cfs != NULL) + { + cfs_deinit_decompressor (cfs); + cfs_delete (cfs); + } + bfds_delete (bfds); + if (-1 != fd) + close(fd); +} + +/* end of extractor_datasource.c */ diff --git a/src/main/extractor_datasource.h b/src/main/extractor_datasource.h @@ -0,0 +1,101 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ +#ifndef EXTRACTOR_DATASOURCE_H +#define EXTRACTOR_DATASOURCE_H + +/** + * Handle to a datasource we can use for the plugins. + */ +struct EXTRACTOR_Datasource; + + +/** + * Create a datasource from a file on disk. + * + * @param filename name of the file on disk + * @return handle to the datasource + */ +struct EXTRACTOR_Datasource * +EXTRACTOR_datasource_create_from_file_ (const char *filename); + + +/** + * Create a datasource from a buffer in memory. + * + * @param buf data in memory + * @param size number of bytes in 'buf' + * @return handle to the datasource + */ +struct EXTRACTOR_Datasource * +EXTRACTOR_datasource_create_from_buffer_ (const char *buf, + size_t size); + + +/** + * Destroy a data source. + * + * @param datasource source to destroy + */ +void +EXTRACTOR_datasource_destroy_ (struct EXTRACTOR_Datasource *datasource); + + +/** + * Make 'size' bytes of data from the data source available at '*data'. + * + * @param cls must be a 'struct EXTRACTOR_Datasource' + * @param data where the data should be copied to + * @param size maximum number of bytes requested + * @return number of bytes now available in data (can be smaller than 'size'), + * -1 on error + */ +ssize_t +EXTRACTOR_datasource_read_ (void *cls, + void *data, + size_t size); + + +/** + * Seek in the datasource. Use 'SEEK_CUR' for whence and 'pos' of 0 to + * obtain the current position in the file. + * + * @param cls must be a 'struct EXTRACTOR_Datasource' + * @param pos position to seek (see 'man lseek') + * @param whence how to see (absolute to start, relative, absolute to end) + * @return new absolute position, UINT64_MAX on error (i.e. desired position + * does not exist) + */ +uint64_t +EXTRACTOR_datasource_seek_ (void *cls, + uint64_t pos, + int whence); + + +/** + * Determine the overall size of the data source (after compression). + * + * @param cls must be a 'struct EXTRACTOR_Datasource' + * @return overall file size, UINT64_MAX on error (i.e. IPC failure) + */ +uint64_t +EXTRACTOR_datasource_get_size_ (void *cls); + + +#endif diff --git a/src/main/extractor_ipc.h b/src/main/extractor_ipc.h @@ -0,0 +1,61 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ +#ifndef EXTRACTOR_IPC_H +#define EXTRACTOR_IPC_H + +/** + * Definition of an IPC communication channel with + * some plugin. + */ +struct EXTRACTOR_Channel; + + +/** + * Create a channel to communicate with a process wrapping + * the plugin of the given name. Starts the process as well. + * + * @param short_libname name of the plugin + * @return NULL on error, otherwise IPC channel + */ +struct EXTRACTOR_Channel * +EXTRACTOR_IPC_channel_create_ (const char *short_libname); + + +/** + * Destroy communication channel with a plugin/process. Also + * destroys the process. + * + * @param channel channel to communicate with the plugin + */ +void +EXTRACTOR_IPC_channel_destroy_ (struct EXTRACTOR_Channel *channel); + + +/** + * Map the given buffer + * + * @param channel channel to communicate with the plugin + */ +void +EXTRACTOR_IPC_channel_xxx_ (struct EXTRACTOR_Channel *channel); + + + +#endif diff --git a/src/main/extractor_ipc_gnu.c b/src/main/extractor_ipc_gnu.c @@ -0,0 +1,490 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + +#include "platform.h" +#include "plibc.h" +#include "extractor.h" +#include <dirent.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/shm.h> +#include <signal.h> + + +/** + * Definition of an IPC communication channel with + * some plugin. + */ +struct EXTRACTOR_Channel +{ + /** + * POSIX id of the shm into which data is uncompressed + */ + int shm; + + /** + * Name of the shm + */ + char shm_name[MAX_SHM_NAME + 1]; + + /** + * Pointer to the mapped region of the shm (covers the whole shm) + */ + void *shm_ptr; + + /** + * Position within shm + */ + int64_t shm_pos; + + /** + * Allocated size of the shm + */ + int64_t shm_size; + + /** + * Number of bytes in shm (<= shm_size) + */ + size_t shm_buf_size; + + +}; + + +/** + * Opens a shared memory object (for later mmapping). + * This is POSIX variant of the the plugin_open_* function. Shm is always memory-backed. + * Closes a shm is already opened, closes it before opening a new one. + * + * @param plugin plugin context + * @param shm_name name of the shm. + * @return shm id (-1 on error). That is, the result of shm_open() syscall. + */ +static int +plugin_open_shm (struct EXTRACTOR_PluginList *plugin, + const char *shm_name) +{ + if (plugin->shm_id != -1) + close (plugin->shm_id); + plugin->shm_id = shm_open (shm_name, O_RDONLY, 0); + return plugin->shm_id; +} + + +/** + * Opens a file (for later mmapping). + * This is POSIX variant of the plugin_open_* function. + * Closes a file is already opened, closes it before opening a new one. + * + * @param plugin plugin context + * @param shm_name name of the file to open. + * @return file id (-1 on error). That is, the result of open() syscall. + */ +static int +plugin_open_file (struct EXTRACTOR_PluginList *plugin, + const char *shm_name) +{ + if (plugin->shm_id != -1) + close (plugin->shm_id); + plugin->shm_id = open (shm_name, O_RDONLY, 0); + return plugin->shm_id; +} + + +/** + * Initializes an extracting session for a plugin. + * opens the file/shm (only in OPMODE_FILE) + * sets shm_ptr to NULL (unmaps it, if it was mapped) + * sets position to 0 + * initializes file size to 'fsize' (may be -1) + * sets seek request to 0 + * + * @param plugin plugin context + * @param operation_mode the mode of operation (OPMODE_*) + * @param fsize size of the source file (may be -1) + * @param shm_name name of the shm or file to open + * @return 0 on success, non-0 on error. + */ +static int +init_state_method (struct EXTRACTOR_PluginList *plugin, + uint8_t operation_mode, + int64_t fsize, + const char *shm_name) +{ + plugin->seek_request = 0; + if (plugin->shm_ptr != NULL) + munmap (plugin->shm_ptr, plugin->map_size); + plugin->shm_ptr = NULL; + if (operation_mode == OPMODE_FILE) + { + if (-1 == plugin_open_file (plugin, shm_name)) + return 1; + } + else if (-1 == plugin_open_shm (plugin, shm_name)) + return 1; + plugin->fsize = fsize; + plugin->shm_pos = 0; + plugin->fpos = 0; + return 0; +} + + +/** + * Deinitializes an extracting session for a plugin. + * unmaps shm_ptr (if was mapped) + * closes file/shm (if it was opened) + * sets map size and shm_ptr to NULL. + * + * @param plugin plugin context + */ +static void +discard_state_method (struct EXTRACTOR_PluginList *plugin) +{ + if (plugin->shm_ptr != NULL && plugin->map_size > 0) + munmap (plugin->shm_ptr, plugin->map_size); + if (plugin->shm_id != -1) + close (plugin->shm_id); + plugin->shm_id = -1; + plugin->map_size = 0; + plugin->shm_ptr = NULL; +} + + + +/** + * Start the process for the given plugin. + */ +static void +start_process (struct EXTRACTOR_PluginList *plugin) +{ + int p1[2]; + int p2[2]; + pid_t pid; + int status; + + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + if (-1 != plugin->cpid && 0 != plugin->cpid) + return; + break; + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (0 != plugin->cpid) + return; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + return; + break; + case EXTRACTOR_OPTION_DISABLED: + return; + break; + } + + plugin->cpid = -1; + if (0 != pipe (p1)) + { + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + if (0 != pipe (p2)) + { + close (p1[0]); + close (p1[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + pid = fork (); + plugin->cpid = pid; + if (pid == -1) + { + close (p1[0]); + close (p1[1]); + close (p2[0]); + close (p2[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + if (pid == 0) + { + close (p1[1]); + close (p2[0]); + plugin_main (plugin, p1[0], p2[1]); + _exit (0); + } + close (p1[0]); + close (p2[1]); + plugin->cpipe_in = fdopen (p1[1], "w"); + if (plugin->cpipe_in == NULL) + { + perror ("fdopen"); + (void) kill (plugin->cpid, SIGKILL); + waitpid (plugin->cpid, &status, 0); + close (p1[1]); + close (p2[0]); + plugin->cpid = -1; + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + plugin->cpipe_out = p2[0]; +} + + +/** + * Stop the child process of this plugin. + */ +static void +stop_process (struct EXTRACTOR_PluginList *plugin) +{ + int status; + +#if DEBUG + if (plugin->cpid == -1) + fprintf (stderr, + "Plugin `%s' choked on this input\n", + plugin->short_libname); +#endif + if ( (plugin->cpid == -1) || + (plugin->cpid == 0) ) + return; + kill (plugin->cpid, SIGKILL); + waitpid (plugin->cpid, &status, 0); + plugin->cpid = -1; + close (plugin->cpipe_out); + fclose (plugin->cpipe_in); + plugin->cpipe_out = -1; + plugin->cpipe_in = NULL; + + if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) + plugin->flags = EXTRACTOR_OPTION_DISABLED; + + plugin->seek_request = -1; +} + + +static int +write_plugin_data (const struct EXTRACTOR_PluginList *plugin) +{ + /* This function is only necessary on W32. On POSIX + * systems plugin inherits its own data from the parent */ + return 0; +} + + +/** + * Initializes an extracting session for a plugin. + * opens the file/shm (only in OPMODE_FILE) + * sets shm_ptr to NULL (unmaps it, if it was mapped) + * sets position to 0 + * initializes file size to 'fsize' (may be -1) + * sets seek request to 0 + * + * @param plugin plugin context + * @param operation_mode the mode of operation (OPMODE_*) + * @param fsize size of the source file (may be -1) + * @param shm_name name of the shm or file to open + * @return 0 on success, non-0 on error. + */ +static int +init_state_method (struct EXTRACTOR_PluginList *plugin, + uint8_t operation_mode, + int64_t fsize, + const char *shm_name) +{ + plugin->seek_request = 0; + if (plugin->shm_ptr != NULL) + munmap (plugin->shm_ptr, plugin->map_size); + plugin->shm_ptr = NULL; + if (operation_mode == OPMODE_FILE) + { + if (-1 == plugin_open_file (plugin, shm_name)) + return 1; + } + else if (-1 == plugin_open_shm (plugin, shm_name)) + return 1; + plugin->fsize = fsize; + plugin->shm_pos = 0; + plugin->fpos = 0; + return 0; +} + + +/** + * Setup a shared memory segment. + * + * @param ptr set to the location of the shm segment + * @param shmid where to store the shm ID + * @param fn name of the shared segment + * @param fn_size size available in fn + * @param size number of bytes to allocated for the segment + * @return 0 on success + */ +static int +make_shm_posix (void **ptr, + int *shmid, + char *fn, + size_t fn_size, size_t size) +{ + const char *tpath; +#if SOMEBSD + /* this works on FreeBSD, not sure about others... */ + tpath = getenv ("TMPDIR"); + if (tpath == NULL) + tpath = "/tmp/"; +#else + tpath = "/"; /* Linux */ +#endif + snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), + (unsigned int) RANDOM()); + *shmid = shm_open (fn, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + *ptr = NULL; + if (-1 == *shmid) + return 1; + if ((0 != ftruncate (*shmid, size)) || + (NULL == (*ptr = mmap (NULL, size, PROT_WRITE, MAP_SHARED, *shmid, 0))) || + (*ptr == (void*) -1) ) + { + close (*shmid); + *shmid = -1; + shm_unlink (fn); + return 1; + } + return 0; +} + + +static void +destroy_shm_posix (void *ptr, int shm_id, size_t size, char *shm_name) +{ + if (NULL != ptr) + munmap (ptr, size); + if (shm_id != -1) + close (shm_id); + shm_unlink (shm_name); +} + + +/** + * Receive 'size' bytes from plugin, store them in 'buf' + * + * @param plugin plugin context + * @param buf buffer to fill + * @param size number of bytes to read + * @return number of bytes read, 0 on EOS, < 0 on error + */ +static int +plugin_read (struct EXTRACTOR_PluginList *plugin, + void *buf, + size_t size) +{ + char *rb = buf; + ssize_t read_result; + size_t read_count = 0; + + while (read_count < size) + { + read_result = read (plugin->cpipe_out, + &rb[read_count], size - read_count); + if (read_result <= 0) + return read_result; + read_count += read_result; + } + return read_count; +} + + +/** + * Wait for one of the plugins to reply. + * Selects on plugin output pipes, runs receive_reply() + * on each activated pipe until it gets a seek request + * or a done message. Called repeatedly by the user until all pipes are dry or + * broken. + * + * @param plugins to select upon + * @param proc metadata callback + * @param proc_cls callback cls + * @return number of dry/broken pipes since last call, -1 on error or if no + * plugins reply in 10 seconds. + */ +static int +wait_for_reply (struct EXTRACTOR_PluginList *plugins, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int ready; + int result; + struct timeval tv; + fd_set to_check; + int highest = 0; + int read_result; + struct EXTRACTOR_PluginList *ppos; + + FD_ZERO (&to_check); + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + switch (ppos->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (ppos->seek_request == -1) + continue; + FD_SET (ppos->cpipe_out, &to_check); + if (highest < ppos->cpipe_out) + highest = ppos->cpipe_out; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; + } + } + + tv.tv_sec = 10; + tv.tv_usec = 0; + ready = select (highest + 1, &to_check, NULL, NULL, &tv); + if (ready <= 0) + /* an error or timeout -> something's wrong or all plugins hung up */ + return -1; + + result = 0; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + switch (ppos->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (ppos->seek_request == -1) + continue; + if (FD_ISSET (ppos->cpipe_out, &to_check)) + { + read_result = receive_reply (ppos, proc, proc_cls); + if (read_result < 0) + { + stop_process (ppos); + } + result += 1; + } + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; + } + } + return result; +} diff --git a/src/main/extractor_ipc_w32.c b/src/main/extractor_ipc_w32.c @@ -0,0 +1,905 @@ +/* + This file is part of libextractor. + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff + + libextractor is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + libextractor is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with libextractor; see the file COPYING. If not, write to the + Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + */ + + +/** + * Definition of an IPC communication channel with + * some plugin. + */ +struct EXTRACTOR_Channel +{ + + /** + * W32 handle of the shm into which data is uncompressed + */ + HANDLE shm; + + /** + * Name of the shm + */ + char shm_name[MAX_SHM_NAME + 1]; + + /** + * Pointer to the mapped region of the shm (covers the whole shm) + */ + void *shm_ptr; + + /** + * Position within shm + */ + int64_t shm_pos; + + /** + * Allocated size of the shm + */ + int64_t shm_size; + + /** + * Number of bytes in shm (<= shm_size) + */ + size_t shm_buf_size; + + +}; + + + +/** + * Initializes an extracting session for a plugin. + * opens the file/shm (only in OPMODE_FILE) + * sets shm_ptr to NULL (unmaps it, if it was mapped) + * sets position to 0 + * initializes file size to 'fsize' (may be -1) + * sets seek request to 0 + * + * @param plugin plugin context + * @param operation_mode the mode of operation (OPMODE_*) + * @param fsize size of the source file (may be -1) + * @param shm_name name of the shm or file to open + * @return 0 on success, non-0 on error. + */ +static int +init_state_method (struct EXTRACTOR_PluginList *plugin, + uint8_t operation_mode, + int64_t fsize, + const char *shm_name) +{ + plugin->seek_request = 0; + if (plugin->shm_ptr != NULL) + UnmapViewOfFile (plugin->shm_ptr); + plugin->shm_ptr = NULL; + if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name)) + return 1; + plugin->fsize = fsize; + plugin->shm_pos = 0; + plugin->fpos = 0; + return 0; +} + +/** + * Opens a shared memory object (for later mmapping). + * This is W32 variant of the plugin_open_* function. + * Opened shm might be memory-backed or file-backed (depending on how + * it was created). shm_name is never a file name, unlike POSIX. + * Closes a shm is already opened, closes it before opening a new one. + * + * @param plugin plugin context + * @param shm_name name of the shared memory object. + * @return memory-mapped file handle (NULL on error). That is, the result of OpenFileMapping() syscall. + */ +static HANDLE +plugin_open_shm (struct EXTRACTOR_PluginList *plugin, + const char *shm_name) +{ + if (plugin->map_handle != 0) + CloseHandle (plugin->map_handle); + plugin->map_handle = OpenFileMapping (FILE_MAP_READ, FALSE, shm_name); + return plugin->map_handle; +} + + +/** + * Another name for plugin_open_shm(). + */ +static HANDLE +plugin_open_file (struct EXTRACTOR_PluginList *plugin, + const char *shm_name) +{ + return plugin_open_shm (plugin, shm_name); +} + + +/** + * Initializes an extracting session for a plugin. + * opens the file/shm (only in OPMODE_FILE) + * sets shm_ptr to NULL (unmaps it, if it was mapped) + * sets position to 0 + * initializes file size to 'fsize' (may be -1) + * sets seek request to 0 + * + * @param plugin plugin context + * @param operation_mode the mode of operation (OPMODE_*) + * @param fsize size of the source file (may be -1) + * @param shm_name name of the shm or file to open + * @return 0 on success, non-0 on error. + */ +static int +init_state_method (struct EXTRACTOR_PluginList *plugin, + uint8_t operation_mode, + int64_t fsize, + const char *shm_name) +{ + plugin->seek_request = 0; + if (plugin->shm_ptr != NULL) + UnmapViewOfFile (plugin->shm_ptr); + plugin->shm_ptr = NULL; + if (INVALID_HANDLE_VALUE == plugin_open_shm (plugin, shm_name)) + return 1; + plugin->fsize = fsize; + plugin->shm_pos = 0; + plugin->fpos = 0; + return 0; +} + + +/** + * Deinitializes an extracting session for a plugin. + * unmaps shm_ptr (if was mapped) + * closes file/shm (if it was opened) + * sets map size and shm_ptr to NULL. + * + * @param plugin plugin context + */ +static void +discard_state_method (struct EXTRACTOR_PluginList *plugin) +{ + if (plugin->shm_ptr != NULL) + UnmapViewOfFile (plugin->shm_ptr); + if (plugin->map_handle != 0) + CloseHandle (plugin->map_handle); + plugin->map_handle = 0; + plugin->map_size = 0; + plugin->shm_ptr = NULL; +} + + +#ifndef PIPE_BUF +#define PIPE_BUF 512 +#endif + +/* Copyright Bob Byrnes <byrnes <at> curl.com> + http://permalink.gmane.org/gmane.os.cygwin.patches/2121 +*/ +/* Create a pipe, and return handles to the read and write ends, + just like CreatePipe, but ensure that the write end permits + FILE_READ_ATTRIBUTES access, on later versions of win32 where + this is supported. This access is needed by NtQueryInformationFile, + which is used to implement select and nonblocking writes. + Note that the return value is either NO_ERROR or GetLastError, + unlike CreatePipe, which returns a bool for success or failure. */ +static int +create_selectable_pipe (PHANDLE read_pipe_ptr, PHANDLE write_pipe_ptr, + LPSECURITY_ATTRIBUTES sa_ptr, DWORD psize, + DWORD dwReadMode, DWORD dwWriteMode) +{ + /* Default to error. */ + *read_pipe_ptr = *write_pipe_ptr = INVALID_HANDLE_VALUE; + + HANDLE read_pipe = INVALID_HANDLE_VALUE, write_pipe = INVALID_HANDLE_VALUE; + + /* Ensure that there is enough pipe buffer space for atomic writes. */ + if (psize < PIPE_BUF) + psize = PIPE_BUF; + + char pipename[MAX_PATH]; + + /* Retry CreateNamedPipe as long as the pipe name is in use. + * Retrying will probably never be necessary, but we want + * to be as robust as possible. */ + while (1) + { + static volatile LONG pipe_unique_id; + + snprintf (pipename, sizeof pipename, "\\\\.\\pipe\\gnunet-%d-%ld", + getpid (), InterlockedIncrement ((LONG *) & pipe_unique_id)); + /* Use CreateNamedPipe instead of CreatePipe, because the latter + * returns a write handle that does not permit FILE_READ_ATTRIBUTES + * access, on versions of win32 earlier than WinXP SP2. + * CreatePipe also stupidly creates a full duplex pipe, which is + * a waste, since only a single direction is actually used. + * It's important to only allow a single instance, to ensure that + * the pipe was not created earlier by some other process, even if + * the pid has been reused. We avoid FILE_FLAG_FIRST_PIPE_INSTANCE + * because that is only available for Win2k SP2 and WinXP. */ + read_pipe = CreateNamedPipeA (pipename, PIPE_ACCESS_INBOUND | dwReadMode, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE, 1, /* max instances */ + psize, /* output buffer size */ + psize, /* input buffer size */ + NMPWAIT_USE_DEFAULT_WAIT, sa_ptr); + + if (read_pipe != INVALID_HANDLE_VALUE) + { + break; + } + + DWORD err = GetLastError (); + + switch (err) + { + case ERROR_PIPE_BUSY: + /* The pipe is already open with compatible parameters. + * Pick a new name and retry. */ + continue; + case ERROR_ACCESS_DENIED: + /* The pipe is already open with incompatible parameters. + * Pick a new name and retry. */ + continue; + case ERROR_CALL_NOT_IMPLEMENTED: + /* We are on an older Win9x platform without named pipes. + * Return an anonymous pipe as the best approximation. */ + if (CreatePipe (read_pipe_ptr, write_pipe_ptr, sa_ptr, psize)) + { + return 0; + } + err = GetLastError (); + return err; + default: + return err; + } + /* NOTREACHED */ + } + + /* Open the named pipe for writing. + * Be sure to permit FILE_READ_ATTRIBUTES access. */ + write_pipe = CreateFileA (pipename, GENERIC_WRITE | FILE_READ_ATTRIBUTES, 0, /* share mode */ + sa_ptr, OPEN_EXISTING, dwWriteMode, /* flags and attributes */ + 0); /* handle to template file */ + + if (write_pipe == INVALID_HANDLE_VALUE) + { + /* Failure. */ + DWORD err = GetLastError (); + + CloseHandle (read_pipe); + return err; + } + + /* Success. */ + *read_pipe_ptr = read_pipe; + *write_pipe_ptr = write_pipe; + return 0; +} + + +/** + * Writes @size bytes from @buf to @h, using @ov for + * overlapped i/o. Deallocates @old_buf and sets it to NULL, + * if necessary. + * Writes asynchronously, but sequentially (only one writing + * operation may be active at any given moment, but it will + * be done in background). Thus it is intended to be used + * for writing a few big chunks rather than a lot of small pieces. + * + * The extravagant interface is mainly because this function + * does not use a separate struct to group together overlapped + * structure, buffer pointer and the handle. + * + * @param h pipe handle + * @param ov overlapped structure pointer + * @param buf buffer to read from. Will be copied internally + * @param size number of bytes to write + * @param old_buf pointer where a copy of previous buffer is stored, + * and where a copy of @buf will be stored. + * + * @return number of bytes written, -1 on error + */ +static int +write_to_pipe (HANDLE h, + OVERLAPPED *ov, + unsigned char *buf, size_t size, + unsigned char **old_buf) +{ + DWORD written; + BOOL bresult; + DWORD err; + + if (WAIT_OBJECT_0 != WaitForSingleObject (ov->hEvent, INFINITE)) + return -1; + + ResetEvent (ov->hEvent); + + if (*old_buf != NULL) + free (*old_buf); + + *old_buf = malloc (size); + if (*old_buf == NULL) + return -1; + memcpy (*old_buf, buf, size); + written = 0; + ov->Offset = 0; + ov->OffsetHigh = 0; + ov->Pointer = 0; + ov->Internal = 0; + ov->InternalHigh = 0; + bresult = WriteFile (h, *old_buf, size, &written, ov); + + if (bresult == TRUE) + { + SetEvent (ov->hEvent); + free (*old_buf); + *old_buf = NULL; + return written; + } + + err = GetLastError (); + if (err == ERROR_IO_PENDING) + return size; + SetEvent (ov->hEvent); + *old_buf = NULL; + SetLastError (err); + return -1; +} + + +#define plugin_write(plug, buf, size) write_to_pipe (plug->cpipe_in, &plug->ov_write, buf, size, &plug->ov_write_buffer) + + +/** + * Communicates plugin data (library name, options) to the plugin + * process. This is only necessary on W32, where this information + * is not inherited by the plugin, because it is not forked. + * + * @param plugin plugin context + * + * @return 0 on success, -1 on failure + */ +static int +write_plugin_data (struct EXTRACTOR_PluginList *plugin) +{ + size_t libname_len, shortname_len, opts_len; + DWORD len; + char *str; + size_t total_len = 0; + unsigned char *buf, *ptr; + + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + break; + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + break; + case EXTRACTOR_OPTION_IN_PROCESS: + return 0; + break; + case EXTRACTOR_OPTION_DISABLED: + return 0; + break; + } + + libname_len = strlen (plugin->libname) + 1; + total_len += sizeof (size_t) + libname_len; + shortname_len = strlen (plugin->short_libname) + 1; + total_len += sizeof (size_t) + shortname_len; + if (plugin->plugin_options != NULL) + { + opts_len = strlen (plugin->plugin_options) + 1; + total_len += opts_len; + } + else + { + opts_len = 0; + } + total_len += sizeof (size_t); + + buf = malloc (total_len); + if (buf == NULL) + return -1; + ptr = buf; + memcpy (ptr, &libname_len, sizeof (size_t)); + ptr += sizeof (size_t); + memcpy (ptr, plugin->libname, libname_len); + ptr += libname_len; + memcpy (ptr, &shortname_len, sizeof (size_t)); + ptr += sizeof (size_t); + memcpy (ptr, plugin->short_libname, shortname_len); + ptr += shortname_len; + memcpy (ptr, &opts_len, sizeof (size_t)); + ptr += sizeof (size_t); + if (opts_len > 0) + { + memcpy (ptr, plugin->plugin_options, opts_len); + ptr += opts_len; + } + if (total_len != write_to_pipe (plugin->cpipe_in, &plugin->ov_write, buf, total_len, &plugin->ov_write_buffer)) + { + free (buf); + return -1; + } + free (buf); + return 0; +} + + +/** + * Reads plugin data from the LE server process. + * Also initializes allocation granularity (duh...). + * + * @param fd the pipe to read from + * + * @return newly allocated plugin context + */ +static struct EXTRACTOR_PluginList * +read_plugin_data (int fd) +{ + struct EXTRACTOR_PluginList *ret; + size_t i; + + ret = malloc (sizeof (struct EXTRACTOR_PluginList)); + if (ret == NULL) + return NULL; + read (fd, &i, sizeof (size_t)); + ret->libname = malloc (i); + if (ret->libname == NULL) + { + free (ret); + return NULL; + } + read (fd, ret->libname, i); + ret->libname[i - 1] = '\0'; + + read (fd, &i, sizeof (size_t)); + ret->short_libname = malloc (i); + if (ret->short_libname == NULL) + { + free (ret->libname); + free (ret); + return NULL; + } + read (fd, ret->short_libname, i); + ret->short_libname[i - 1] = '\0'; + + read (fd, &i, sizeof (size_t)); + if (i == 0) + { + ret->plugin_options = NULL; + } + else + { + ret->plugin_options = malloc (i); + if (ret->plugin_options == NULL) + { + free (ret->short_libname); + free (ret->libname); + free (ret); + return NULL; + } + read (fd, ret->plugin_options, i); + ret->plugin_options[i - 1] = '\0'; + } + { + SYSTEM_INFO si; + GetSystemInfo (&si); + ret->allocation_granularity = si.dwAllocationGranularity; + } + return ret; +} + + +/** + * Start the process for the given plugin. + */ +static void +start_process (struct EXTRACTOR_PluginList *plugin) +{ + HANDLE p1[2]; + HANDLE p2[2]; + STARTUPINFO startup; + PROCESS_INFORMATION proc; + char cmd[MAX_PATH + 1]; + char arg1[10], arg2[10]; + HANDLE p10_os_inh = INVALID_HANDLE_VALUE, p21_os_inh = INVALID_HANDLE_VALUE; + SECURITY_ATTRIBUTES sa; + + switch (plugin->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + if (plugin->hProcess != INVALID_HANDLE_VALUE && plugin->hProcess != 0) + return; + break; + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (plugin->hProcess != 0) + return; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + return; + break; + case EXTRACTOR_OPTION_DISABLED: + return; + break; + } + + sa.nLength = sizeof (sa); + sa.lpSecurityDescriptor = NULL; + sa.bInheritHandle = FALSE; + + plugin->hProcess = NULL; + + if (0 != create_selectable_pipe (&p1[0], &p1[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) + { + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + if (0 != create_selectable_pipe (&p2[0], &p2[1], &sa, 1024, FILE_FLAG_OVERLAPPED, FILE_FLAG_OVERLAPPED)) + { + CloseHandle (p1[0]); + CloseHandle (p1[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + + memset (&startup, 0, sizeof (STARTUPINFO)); + + if (!DuplicateHandle (GetCurrentProcess (), p1[0], GetCurrentProcess (), + &p10_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS) + || !DuplicateHandle (GetCurrentProcess (), p2[1], GetCurrentProcess (), + &p21_os_inh, 0, TRUE, DUPLICATE_SAME_ACCESS)) + { + if (p10_os_inh != INVALID_HANDLE_VALUE) + CloseHandle (p10_os_inh); + if (p21_os_inh != INVALID_HANDLE_VALUE) + CloseHandle (p21_os_inh); + CloseHandle (p1[0]); + CloseHandle (p1[1]); + CloseHandle (p2[0]); + CloseHandle (p2[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + + /* TODO: write our own plugin-hosting executable? rundll32, for once, has smaller than usual stack size. + * Also, users might freak out seeing over 9000 rundll32 processes (seeing over 9000 processes named + * "libextractor_plugin_helper" is probably less confusing). + */ + snprintf(cmd, MAX_PATH + 1, + "rundll32.exe libextractor-3.dll,RundllEntryPoint@16 %lu %lu", + p10_os_inh, p21_os_inh); + cmd[MAX_PATH] = '\0'; + if (CreateProcessA (NULL, cmd, NULL, NULL, TRUE, 0, NULL, NULL, + &startup, &proc)) + { + plugin->hProcess = proc.hProcess; + CloseHandle (proc.hThread); + } + else + { + CloseHandle (p1[0]); + CloseHandle (p1[1]); + CloseHandle (p2[0]); + CloseHandle (p2[1]); + plugin->flags = EXTRACTOR_OPTION_DISABLED; + return; + } + CloseHandle (p1[0]); + CloseHandle (p2[1]); + CloseHandle (p10_os_inh); + CloseHandle (p21_os_inh); + + plugin->cpipe_in = p1[1]; + plugin->cpipe_out = p2[0]; + + memset (&plugin->ov_read, 0, sizeof (OVERLAPPED)); + memset (&plugin->ov_write, 0, sizeof (OVERLAPPED)); + + plugin->ov_write_buffer = NULL; + + plugin->ov_write.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); + plugin->ov_read.hEvent = CreateEvent (NULL, TRUE, TRUE, NULL); +} + + +/** + * Stop the child process of this plugin. + */ +static void +stop_process (struct EXTRACTOR_PluginList *plugin) +{ + int status; + HANDLE process; + +#if DEBUG + if (plugin->hProcess == INVALID_HANDLE_VALUE) + fprintf (stderr, + "Plugin `%s' choked on this input\n", + plugin->short_libname); +#endif + if (plugin->hProcess == INVALID_HANDLE_VALUE || + plugin->hProcess == NULL) + return; + TerminateProcess (plugin->hProcess, 0); + CloseHandle (plugin->hProcess); + plugin->hProcess = INVALID_HANDLE_VALUE; + CloseHandle (plugin->cpipe_out); + CloseHandle (plugin->cpipe_in); + plugin->cpipe_out = INVALID_HANDLE_VALUE; + plugin->cpipe_in = INVALID_HANDLE_VALUE; + CloseHandle (plugin->ov_read.hEvent); + CloseHandle (plugin->ov_write.hEvent); + if (plugin->ov_write_buffer != NULL) + { + free (plugin->ov_write_buffer); + plugin->ov_write_buffer = NULL; + } + + if (plugin->flags != EXTRACTOR_OPTION_DEFAULT_POLICY) + plugin->flags = EXTRACTOR_OPTION_DISABLED; + + plugin->seek_request = -1; +} + + + +/** + * Setup a shared memory segment. + * + * @param ptr set to the location of the map segment + * @param map where to store the map handle + * @param fn name of the mapping + * @param fn_size size available in fn + * @param size number of bytes to allocated for the mapping + * @return 0 on success + */ +static int +make_shm_w32 (void **ptr, + HANDLE *map, + char *fn, + size_t fn_size, size_t size) +{ + const char *tpath = "Local\\"; + + snprintf (fn, fn_size, + "%slibextractor-shm-%u-%u", + tpath, getpid(), + (unsigned int) RANDOM()); + *map = CreateFileMapping (INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, size, fn); + *ptr = MapViewOfFile (*map, FILE_MAP_WRITE, 0, 0, size); + if (*ptr == NULL) + { + CloseHandle (*map); + return 1; + } + return 0; +} + +/** + * Setup a file-backed shared memory segment. + * + * @param map where to store the map handle + * @param file handle of the file to back the shm + * @param fn name of the mapping + * @param fn_size size available in fn + * @param size number of bytes to allocated for the mapping + * @return 0 on success + */ +static int +make_file_backed_shm_w32 (HANDLE *map, HANDLE file, char *fn, size_t fn_size) +{ + const char *tpath = "Local\\"; + snprintf (fn, fn_size, "%slibextractor-shm-%u-%u", tpath, getpid(), + (unsigned int) RANDOM()); + *map = CreateFileMapping (file, NULL, PAGE_READONLY, 0, 0, fn); + if (*map == NULL) + { + DWORD err = GetLastError (); + return 1; + } + return 0; +} + + +static void +destroy_shm_w32 (void *ptr, HANDLE map) +{ + UnmapViewOfFile (ptr); + CloseHandle (map); +} + + +static void +destroy_file_backed_shm_w32 (HANDLE map) +{ + CloseHandle (map); +} + + + +#define plugin_write(plug, buf, size) write_all (fileno (plug->cpipe_in), buf, size) + + +void CALLBACK +RundllEntryPoint (HWND hwnd, + HINSTANCE hinst, + LPSTR lpszCmdLine, + int nCmdShow) +{ + intptr_t in_h; + intptr_t out_h; + int in; + int out; + + sscanf (lpszCmdLine, "%lu %lu", &in_h, &out_h); + in = _open_osfhandle (in_h, _O_RDONLY); + out = _open_osfhandle (out_h, 0); + setmode (in, _O_BINARY); + setmode (out, _O_BINARY); + plugin_main (read_plugin_data (in), + in, out); +} + + +void CALLBACK +RundllEntryPointA (HWND hwnd, + HINSTANCE hinst, + LPSTR lpszCmdLine, + int nCmdShow) +{ + return RundllEntryPoint (hwnd, hinst, lpszCmdLine, nCmdShow); +} + + + +/** + * Receive 'size' bytes from plugin, store them in 'buf' + * + * @param plugin plugin context + * @param buf buffer to fill + * @param size number of bytes to read + * @return number of bytes read, 0 on EOS, < 0 on error + */ +static int +plugin_read (struct EXTRACTOR_PluginList *plugin, + void *buf, size_t size) +{ + char *rb = buf; + DWORD bytes_read; + size_t read_count = 0; + + while (read_count < size) + { + if (! ReadFile (plugin->cpipe_out, + &rb[read_count], size - read_count, + &bytes_read, NULL)) + return -1; + read_count += bytes_read; + } + return read_count; +} + + +/** + * Wait for one of the plugins to reply. + * Selects on plugin output pipes, runs receive_reply() + * on each activated pipe until it gets a seek request + * or a done message. Called repeatedly by the user until all pipes are dry or + * broken. + * This W32 version of wait_for_reply() can't select on more than 64 plugins + * at once (returns -1 if there are more than 64 plugins). + * + * @param plugins to select upon + * @param proc metadata callback + * @param proc_cls callback cls + * @return number of dry/broken pipes since last call, -1 on error or if no + * plugins reply in 10 seconds. + */ +static int +wait_for_reply (struct EXTRACTOR_PluginList *plugins, + EXTRACTOR_MetaDataProcessor proc, void *proc_cls) +{ + int result; + DWORD ms; + DWORD first_ready; + DWORD dwresult; + DWORD bytes_read; + BOOL bresult; + unsigned int i; + HANDLE events[MAXIMUM_WAIT_OBJECTS]; + struct EXTRACTOR_PluginList *ppos; + + i = 0; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + if (i == MAXIMUM_WAIT_OBJECTS) + return -1; + if (ppos->seek_request == -1) + continue; + switch (ppos->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (WaitForSingleObject (ppos->ov_read.hEvent, 0) == WAIT_OBJECT_0) + { + ResetEvent (ppos->ov_read.hEvent); + bresult = ReadFile (ppos->cpipe_out, &i, 0, &bytes_read, &ppos->ov_read); + if (bresult == TRUE) + { + SetEvent (ppos->ov_read.hEvent); + } + else + { + DWORD err = GetLastError (); + if (err != ERROR_IO_PENDING) + SetEvent (ppos->ov_read.hEvent); + } + } + events[i] = ppos->ov_read.hEvent; + i++; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; + } + } + + ms = 10000; + first_ready = WaitForMultipleObjects (i, events, FALSE, ms); + if (first_ready == WAIT_TIMEOUT || first_ready == WAIT_FAILED) + /* an error or timeout -> something's wrong or all plugins hung up */ + return -1; + + i = 0; + result = 0; + for (ppos = plugins; NULL != ppos; ppos = ppos->next) + { + int read_result; + switch (ppos->flags) + { + case EXTRACTOR_OPTION_DEFAULT_POLICY: + case EXTRACTOR_OPTION_OUT_OF_PROCESS_NO_RESTART: + if (ppos->seek_request == -1) + continue; + if (i < first_ready) + { + i += 1; + continue; + } + dwresult = WaitForSingleObject (ppos->ov_read.hEvent, 0); + read_result = 0; + if (dwresult == WAIT_OBJECT_0) + { + read_result = receive_reply (ppos, proc, proc_cls); + result += 1; + } + if (dwresult == WAIT_FAILED || read_result < 0) + { + stop_process (ppos); + if (dwresult == WAIT_FAILED) + result += 1; + } + i++; + break; + case EXTRACTOR_OPTION_IN_PROCESS: + break; + case EXTRACTOR_OPTION_DISABLED: + break; + } + } + return result; +} + + diff --git a/src/main/extractor_plugins.c b/src/main/extractor_plugins.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -17,38 +17,11 @@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - #include "extractor_plugins.h" #include "extractor_plugpath.h" /** - * Load the default set of plugins. The default can be changed - * by setting the LIBEXTRACTOR_LIBRARIES environment variable. - * If it is set to "env", then this function will return - * EXTRACTOR_plugin_add_config (NULL, env, flags). Otherwise, - * it will load all of the installed plugins and return them. - * - * @param flags options for all of the plugins loaded - * @return the default set of plugins, NULL if no plugins were found - */ -struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags) -{ - struct DefaultLoaderContext dlc; - char *env; - - env = getenv ("LIBEXTRACTOR_LIBRARIES"); - if (env != NULL) - return EXTRACTOR_plugin_add_config (NULL, env, flags); - dlc.res = NULL; - dlc.flags = flags; - get_installation_paths (&load_plugins_from_dir, - &dlc); - return dlc.res; -} - -/** * Try to resolve a plugin function. * * @param lib_handle library to search for the symbol @@ -58,10 +31,10 @@ EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags) * @return NULL on error, otherwise pointer to the symbol */ static void * -get_symbol_with_prefix(void *lib_handle, - const char *template, - const char *prefix, - const char **options) +get_symbol_with_prefix (void *lib_handle, + const char *template, + const char *prefix, + const char **options) { char *name; void *symbol; @@ -70,19 +43,17 @@ get_symbol_with_prefix(void *lib_handle, char *dot; const char *(*opt_fun)(void); - if (NULL != options) *options = NULL; - sym_name = strrchr (prefix, '_'); - if (sym_name == NULL) + if (NULL != options) + *options = NULL; + if (NULL == (sym_name = strrchr (prefix, '_'))) return NULL; sym_name++; - sym = strdup (sym_name); - if (sym == NULL) + if (NULL == (sym = strdup (sym_name))) return NULL; dot = strchr (sym, '.'); - if (dot != NULL) + if (NULL != dot) *dot = '\0'; - name = malloc(strlen(sym) + strlen(template) + 1); - if (name == NULL) + if (NULL == (name = malloc(strlen(sym) + strlen(template) + 1))) { free (sym); return NULL; @@ -91,32 +62,32 @@ get_symbol_with_prefix(void *lib_handle, template, sym); /* try without '_' first */ - symbol = lt_dlsym(lib_handle, name + 1); - if (symbol==NULL) + symbol = lt_dlsym (lib_handle, name + 1); + if (NULL == symbol) { /* now try with the '_' */ #if DEBUG - char *first_error = strdup (lt_dlerror()); + char *first_error = strdup (lt_dlerror ()); #endif - symbol = lt_dlsym(lib_handle, name); + symbol = lt_dlsym (lib_handle, name); #if DEBUG if (NULL == symbol) { - fprintf(stderr, - "Resolving symbol `%s' failed, " - "so I tried `%s', but that failed also. Errors are: " - "`%s' and `%s'.\n", - name+1, - name, - first_error == NULL ? "out of memory" : first_error, - lt_dlerror()); + fprintf (stderr, + "Resolving symbol `%s' failed, " + "so I tried `%s', but that failed also. Errors are: " + "`%s' and `%s'.\n", + name+1, + name, + first_error == NULL ? "out of memory" : first_error, + lt_dlerror()); } - if (first_error != NULL) + if (NULL != first_error) free(first_error); #endif } - if ( (symbol != NULL) && + if ( (NULL != symbol) && (NULL != options) ) { /* get special options */ @@ -124,15 +95,14 @@ get_symbol_with_prefix(void *lib_handle, "_EXTRACTOR_%s_options", sym); /* try without '_' first */ - opt_fun = lt_dlsym(lib_handle, name + 1); - if (opt_fun == NULL) - opt_fun = lt_dlsym(lib_handle, name); - if (opt_fun != NULL) + opt_fun = lt_dlsym (lib_handle, name + 1); + if (NULL == opt_fun) + opt_fun = lt_dlsym (lib_handle, name); + if (NULL != opt_fun) *options = opt_fun (); } free (sym); - free(name); - + free (name); return symbol; } @@ -144,7 +114,7 @@ get_symbol_with_prefix(void *lib_handle, * @return 0 on success, -1 on error */ int -plugin_load (struct EXTRACTOR_PluginList *plugin) +EXTRACTOR_plugin_load_ (struct EXTRACTOR_PluginList *plugin) { #if WINDOWS wchar_t wlibname[4097]; @@ -152,9 +122,11 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) #endif lt_dladvise advise; - if (plugin->libname == NULL) - plugin->libname = find_plugin (plugin->short_libname); - if (plugin->libname == NULL) + if (EXTRACTOR_OPTION_DISABLED == plugin->flags) + return -1; + if (NULL == plugin->libname) + plugin->libname = EXTRACTOR_find_plugin_ (plugin->short_libname); + if (NULL == plugin->libname) { #if DEBUG fprintf (stderr, @@ -170,9 +142,11 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) #if WINDOWS wlibname[0] = L'\0'; llibname[0] = '\0'; - if (MultiByteToWideChar (CP_UTF8, 0, plugin->libname, -1, wlibname, 4097) <= 0 - || WideCharToMultiByte (CP_ACP, 0, wlibname, -1, llibname, 4097, NULL, NULL) < 0) - { + if ( (MultiByteToWideChar (CP_UTF8, 0, plugin->libname, -1, + wlibname, sizeof (wlibname)) <= 0) || + (WideCharToMultiByte (CP_ACP, 0, wlibname, -1, + llibname, sizeof (llibname), NULL, NULL) < 0) ) + { #if DEBUG fprintf (stderr, "Loading `%s' plugin failed: %s\n", @@ -185,13 +159,13 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) #endif } plugin->libraryHandle = lt_dlopenadvise (llibname, - advise); + advise); #else plugin->libraryHandle = lt_dlopenadvise (plugin->libname, - advise); + advise); #endif - lt_dladvise_destroy(&advise); - if (plugin->libraryHandle == NULL) + lt_dladvise_destroy (&advise); + if (NULL == plugin->libraryHandle) { #if DEBUG fprintf (stderr, @@ -205,10 +179,10 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) return -1; } plugin->extract_method = get_symbol_with_prefix (plugin->libraryHandle, - "_EXTRACTOR_%s_extract_method", - plugin->libname, - &plugin->specials); - if (plugin->extract_method == NULL) + "_EXTRACTOR_%s_extract_method", + plugin->libname, + &plugin->specials); + if (NULL == plugin->extract_method) { #if DEBUG fprintf (stderr, @@ -226,46 +200,39 @@ plugin_load (struct EXTRACTOR_PluginList *plugin) } - - /** * Add a library for keyword extraction. * * @param prev the previous list of libraries, may be NULL * @param library the name of the library + * @param options options to pass to the plugin * @param flags options to use * @return the new list of libraries, equal to prev iff an error occured */ struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev, +EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList *prev, const char *library, const char *options, enum EXTRACTOR_Options flags) { struct EXTRACTOR_PluginList *result; - struct EXTRACTOR_PluginList *i; + struct EXTRACTOR_PluginList *pos; char *libname; - for (i = prev; i != NULL; i = i->next) - { - if (strcmp (i->short_libname, library) == 0) - return prev; - } - - libname = find_plugin (library); - if (libname == NULL) + for (pos = prev; NULL != pos; pos = pos->next) + if (0 == strcmp (pos->short_libname, library)) + return prev; /* no change, library already loaded */ + if (NULL == (libname = find_plugin (library))) { fprintf (stderr, "Could not load `%s'\n", library); return prev; } - result = calloc (1, sizeof (struct EXTRACTOR_PluginList)); - if (result == NULL) + if (NULL == (result = malloc (sizeof (struct EXTRACTOR_PluginList)))) return prev; result->next = prev; - result->short_libname = strdup (library); - if (result->short_libname == NULL) + if (NULL == (result->short_libname = strdup (library))) { free (result); return NULL; @@ -310,7 +277,7 @@ EXTRACTOR_plugin_add (struct EXTRACTOR_PluginList * prev, * or if config was empty (or NULL). */ struct EXTRACTOR_PluginList * -EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev, +EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList *prev, const char *config, enum EXTRACTOR_Options flags) { @@ -320,37 +287,49 @@ EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev, ssize_t lastconf; size_t len; - if (config == NULL) + if (NULL == config) return prev; - len = strlen(config); - cpy = strdup(config); - if (cpy == NULL) + if (NULL == (cpy = strdup (config))) return prev; + len = strlen (config); pos = 0; last = 0; lastconf = 0; while (pos < len) { - while ((cpy[pos] != ':') && (cpy[pos] != '\0') && - (cpy[pos] != '(')) + while ( (':' != cpy[pos]) && + ('\0' != cpy[pos]) && + ('(' != cpy[pos]) ) pos++; - if( cpy[pos] == '(' ) { - cpy[pos++] = '\0'; /* replace '(' by termination */ - lastconf = pos; /* start config from here, after (. */ - while ((cpy[pos] != '\0') && (cpy[pos] != ')')) - pos++; /* config until ) or EOS. */ - if( cpy[pos] == ')' ) { - cpy[pos++] = '\0'; /* write end of config here. */ - while ((cpy[pos] != ':') && (cpy[pos] != '\0')) - pos++; /* forward until real end of string found. */ - cpy[pos++] = '\0'; - } else { - cpy[pos++] = '\0'; /* end of string. */ + switch (cpy[pos]) + { + case '(': + cpy[pos++] = '\0'; /* replace '(' by termination */ + lastconf = pos; /* start config from here, after (. */ + while ( ('\0' != cpy[pos]) && + (')' != cpy[pos])) + pos++; /* config until ) or EOS. */ + if (')' == cpy[pos]) + { + cpy[pos++] = '\0'; /* write end of config here. */ + while ( (':' != cpy[pos]) && + ('\0' != cpy[pos]) ) + pos++; /* forward until real end of string found. */ + cpy[pos++] = '\0'; + } + else + { + cpy[pos++] = '\0'; /* end of string. */ + } + break; + case ':': + case '\0': + lastconf = -1; /* NULL config when no (). */ + cpy[pos++] = '\0'; /* replace ':' by termination */ + break; + default: + abort (); } - } else { - lastconf = -1; /* NULL config when no (). */ - cpy[pos++] = '\0'; /* replace ':' by termination */ - } if (cpy[last] == '-') { last++; @@ -361,7 +340,7 @@ EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev, { prev = EXTRACTOR_plugin_add (prev, &cpy[last], - (lastconf != -1) ? &cpy[lastconf] : NULL, + (-1 != lastconf) ? &cpy[lastconf] : NULL, flags); } last = pos; @@ -369,3 +348,67 @@ EXTRACTOR_plugin_add_config (struct EXTRACTOR_PluginList * prev, free (cpy); return prev; } + + +/** + * Remove a plugin from a list. + * + * @param prev the current list of plugins + * @param library the name of the plugin to remove + * @return the reduced list, unchanged if the plugin was not loaded + */ +struct EXTRACTOR_PluginList * +EXTRACTOR_plugin_remove (struct EXTRACTOR_PluginList * prev, + const char * library) +{ + struct EXTRACTOR_PluginList *pos; + struct EXTRACTOR_PluginList *first; + + pos = prev; + first = prev; + while ( (NULL != pos) && + (0 != strcmp (pos->short_libname, library)) ) + { + prev = pos; + pos = pos->next; + } + if (NULL == pos) + { +#if DEBUG + fprintf(stderr, + "Unloading plugin `%s' failed!\n", + library); +#endif + return first; + } + /* found, close library */ + if (first == pos) + first = pos->next; + else + prev->next = pos->next; + /* found */ + stop_process (pos); + free (pos->short_libname); + free (pos->libname); + free (pos->plugin_options); + if (NULL != pos->libraryHandle) + lt_dlclose (pos->libraryHandle); + free (pos); + return first; +} + + +/** + * Remove all plugins from the given list (destroys the list). + * + * @param plugin the list of plugins + */ +void +EXTRACTOR_plugin_remove_all (struct EXTRACTOR_PluginList *plugins) +{ + while (NULL != plugins) + plugins = EXTRACTOR_plugin_remove (plugins, plugins->short_libname); +} + + +/* end of extractor_plugins.c */ diff --git a/src/main/extractor_plugins.h b/src/main/extractor_plugins.h @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -18,14 +18,12 @@ Boston, MA 02111-1307, USA. */ -#if !defined (EXTRACTOR_PLUGINS_H) +#ifndef EXTRACTOR_PLUGINS_H #define EXTRACTOR_PLUGINS_H #include "platform.h" #include "plibc.h" #include "extractor.h" -//#include <dirent.h> -//#include <sys/types.h> #ifndef WINDOWS #include <sys/wait.h> #include <sys/shm.h> @@ -57,7 +55,7 @@ struct EXTRACTOR_PluginList char *libname; /** - * Name of the library (i.e., 'libextractor_foo.so') + * Short name of the plugin (i.e., 'foo') */ char *short_libname; @@ -69,7 +67,7 @@ struct EXTRACTOR_PluginList /** * Options for the plugin. */ - char * plugin_options; + char *plugin_options; /** * Special options for the plugin @@ -211,6 +209,7 @@ struct EXTRACTOR_PluginList int waiting_for_update; }; + /** * Load a plugin. * @@ -218,18 +217,6 @@ struct EXTRACTOR_PluginList * @return 0 on success, -1 on error */ int -plugin_load (struct EXTRACTOR_PluginList *plugin); - -int64_t -pl_read (struct EXTRACTOR_PluginList *plugin, unsigned char **data, size_t count); - -int64_t -pl_seek (struct EXTRACTOR_PluginList *plugin, int64_t pos, int whence); - -int64_t -pl_get_fsize (struct EXTRACTOR_PluginList *plugin); - -int64_t -pl_get_pos (struct EXTRACTOR_PluginList *plugin); +EXTRACTOR_plugin_load_ (struct EXTRACTOR_PluginList *plugin); #endif /* EXTRACTOR_PLUGINS_H */ diff --git a/src/main/extractor_plugpath.c b/src/main/extractor_plugpath.c @@ -1,6 +1,6 @@ /* This file is part of libextractor. - (C) 2002, 2003, 2004, 2005, 2006, 2009 Vidyut Samanta and Christian Grothoff + (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published @@ -33,25 +33,36 @@ #include "extractor_plugpath.h" /** - * Remove a trailing '/bin' from in (if present). + * Remove a trailing '/bin/' from 'in' (if present). + * + * @param in input string, modified + * @return NULL if 'in' is NULL, otherwise 'in' with '/bin/' removed */ static char * -cut_bin(char * in) { +cut_bin (char * in) +{ size_t p; - if (in == NULL) + if (NULL == in) return NULL; - p = strlen(in); - if (p > 4) { - if ( (in[p-1] == '/') || - (in[p-1] == '\\') ) - in[--p] = '\0'; - if (0 == strcmp(&in[p-3], - "bin")) { - in[p-3] = '\0'; - p -= 3; + p = strlen (in); + if (p < 4) + return in; + if ( ('/' == in[p-1]) || + ('\\' == in[p-1]) ) + in[--p] = '\0'; + if (0 == strcmp (&in[p-4], + "/bin")) + { + in[p-4] = '\0'; + p -= 4; + } + else if (0 == strcmp (&in[p-4], + "\bin")) + { + in[p-4] = '\0'; + p -= 4; } - } return in; } @@ -64,208 +75,269 @@ cut_bin(char * in) { * and the binary linking against it sits elsewhere. */ static char * -get_path_from_proc_exe() { +get_path_from_proc_exe () +{ char fn[64]; char line[1024]; char dir[1024]; - char * lnk; - char * ret; - char * lestr; + char *lnk; + char *ret; + char *lestr; ssize_t size; - FILE * f; - - snprintf(fn, - sizeof (fn), - "/proc/%u/maps", - getpid()); - f = FOPEN(fn, "r"); - if (f != NULL) { - while (NULL != fgets(line, 1024, f)) { - if ( (1 == sscanf(line, - "%*x-%*x %*c%*c%*c%*c %*x %*2x:%*2x %*u%*[ ]%s", - dir)) && - (NULL != (lestr = strstr(dir, - "libextractor")) ) ) { - lestr[0] = '\0'; - fclose(f); - return strdup(dir); - } + FILE *f; + + snprintf (fn, + sizeof (fn), + "/proc/%u/maps", + getpid ()); + if (NULL != (f = FOPEN (fn, "r"))) + { + while (NULL != fgets (line, 1024, f)) + { + if ( (1 == sscanf (line, + "%*x-%*x %*c%*c%*c%*c %*x %*2x:%*2x %*u%*[ ]%s", + dir)) && + (NULL != (lestr = strstr (dir, + "libextractor")) ) ) + { + lestr[0] = '\0'; + fclose (f); + return strdup (dir); + } + } + fclose (f); } - fclose(f); - } - snprintf(fn, - sizeof (fn), - "/proc/%u/exe", - getpid()); - lnk = malloc(1029); /* 1024 + 5 for "lib/" catenation */ - if (lnk == NULL) - return NULL; - size = readlink(fn, lnk, 1023); - if ( (size <= 0) || (size >= 1024) ) { - free(lnk); + snprintf (fn, + sizeof (fn), + "/proc/%u/exe", + getpid ()); + if (NULL == (lnk = malloc (1029))) /* 1024 + 6 for "/lib/" catenation */ return NULL; - } + size = readlink (fn, lnk, 1023); + if ( (size <= 0) || (size >= 1024) ) + { + free (lnk); + return NULL; + } lnk[size] = '\0'; - while ( (lnk[size] != '/') && + while ( ('/' != lnk[size]) && (size > 0) ) size--; if ( (size < 4) || - (lnk[size-4] != '/') ) { - /* not installed in "/bin/" -- binary path probably useless */ - free(lnk); - return NULL; - } + ('/' != lnk[size-4]) ) + { + /* not installed in "/bin/" -- binary path probably useless */ + free (lnk); + return NULL; + } lnk[size] = '\0'; - lnk = cut_bin(lnk); - ret = realloc(lnk, strlen(lnk) + 5); - if (ret == NULL) + lnk = cut_bin (lnk); + if (NULL == (ret = realloc (lnk, strlen(lnk) + 6))) { free (lnk); return NULL; } - strcat(ret, "lib/"); /* guess "lib/" as the library dir */ + strcat (ret, "/lib/"); /* guess "lib/" as the library dir */ return ret; } #endif + #if WINDOWS /** * Try to determine path with win32-specific function */ static char * -get_path_from_module_filename() { - char * path; - char * ret; - char * idx; +get_path_from_module_filename () +{ + char *path; + char *ret; + char *idx; - path = malloc(4103); /* 4096+nil+6 for "/lib/" catenation */ - if (path == NULL) + if (NULL == (path = malloc (4103))) /* 4096+nil+6 for "/lib/" catenation */ return NULL; - GetModuleFileName(NULL, path, 4096); - idx = path + strlen(path); + GetModuleFileName (NULL, path, 4096); + idx = path + strlen (path); while ( (idx > path) && - (*idx != '\\') && - (*idx != '/') ) + ('\\' != *idx) && + ('/' != *idx) ) idx--; *idx = '\0'; - path = cut_bin(path); - ret = realloc(path, strlen(path) + 6); - if (ret == NULL) + path = cut_bin (path); + if (NULL == (ret = realloc (path, strlen(path) + 6))) { free (path); return NULL; } - strcat(ret, "/lib/"); /* guess "lib/" as the library dir */ + strcat (ret, "/lib/"); /* guess "lib/" as the library dir */ return ret; } #endif + #if DARWIN -static char * get_path_from_dyld_image() { - const char * path; - char * p, * s; - int i; +/** + * Signature of the '_NSGetExecutablePath" function. + * + * @param buf where to write the path + * @param number of bytes available in 'buf' + * @return 0 on success, otherwise desired number of bytes is stored in 'bufsize' + */ +typedef int (*MyNSGetExecutablePathProto) (char *buf, + size_t *bufsize); + + +/** + * Try to obtain the path of our executable using '_NSGetExecutablePath'. + * + * @return NULL on error + */ +static char * +get_path_from_NSGetExecutablePath () +{ + static char zero; + char *path; + size_t len; + MyNSGetExecutablePathProto func; + + path = NULL; + if (NULL == (func = + (MyNSGetExecutablePathProto) dlsym (RTLD_DEFAULT, + "_NSGetExecutablePath"))) + return NULL; + path = &zero; + len = 0; + /* get the path len, including the trailing \0 */ + (void) func (path, &len); + if (0 == len) + return NULL; + path = GNUNET_malloc (len); + if (0 != func (path, &len)) + { + GNUNET_free (path); + return NULL; + } + len = strlen (path); + while ((path[len] != '/') && (len > 0)) + len--; + path[len] = '\0'; + return path; +} + + +/** + * Try to obtain the path of our executable using '_dyld_image' API. + * + * @return NULL on error + */ +static char * +get_path_from_dyld_image () +{ + const char *path; + char *s; + unsigned int i; int c; - p = NULL; - c = _dyld_image_count(); - for (i = 0; i < c; i++) { - if (_dyld_get_image_header(i) == &_mh_dylib_header) { - path = _dyld_get_image_name(i); - if (path != NULL && strlen(path) > 0) { - p = strdup(path); - if (p == NULL) - return NULL; - s = p + strlen(p); - while ( (s > p) && (*s != '/') ) - s--; - s++; - *s = '\0'; - } - break; + c = _dyld_image_count (); + for (i = 0; i < c; i++) + { + if (_dyld_get_image_header (i) != &_mh_dylib_header) + continue; + path = _dyld_get_image_name (i); + if ( (NULL == path) || (0 == strlen (path)) ) + continue; + if (NULL == (p = strdup (path))) + return NULL; + s = p + strlen (p); + while ( (s > p) && ('/' != *s) ) + s--; + s++; + *s = '\0'; + return p; } - } - return p; + return NULL; } #endif + /** - * This may also fail -- for example, if extract - * is not also installed. + * Return the actual path to a file found in the current + * PATH environment variable. + * + * @return path to binary, NULL if not found */ static char * get_path_from_PATH() { struct stat sbuf; - char * path; - char * pos; - char * end; - char * buf; - char * ret; - const char * p; - - p = getenv("PATH"); - if (p == NULL) + char *path; + char *pos; + char *end; + char *buf; + char *ret; + const char *p; + + if (NULL == (p = getenv ("PATH"))) return NULL; - path = strdup(p); /* because we write on it */ - if (path == NULL) + if (NULL == (path = strdup (p))) /* because we write on it */ return NULL; - buf = malloc(strlen(path) + 20); - if (buf == NULL) + if (NULL == (buf = malloc (strlen(path) + 20))) { free (path); return NULL; } pos = path; - - while (NULL != (end = strchr(pos, ':'))) { - *end = '\0'; - sprintf(buf, "%s/%s", pos, "extract"); - if (0 == stat(buf, &sbuf)) { - pos = strdup(pos); - free(buf); - free(path); - if (pos == NULL) + while (NULL != (end = strchr(pos, ':'))) + { + *end = '\0'; + sprintf(buf, "%s/%s", pos, "extract"); + if (0 == stat(buf, &sbuf)) + { + pos = strdup(pos); + free (buf); + free (path); + if (NULL == pos) + return NULL; + pos = cut_bin (pos); + if (NULL == (ret = realloc (pos, strlen(pos) + 5))) + { + free (pos); + return NULL; + } + strcat (ret, "lib/"); + return ret; + } + pos = end + 1; + } + sprintf(buf, "%s/%s", pos, "extract"); + if (0 == stat (buf, &sbuf)) + { + pos = strdup (pos); + free (buf); + free (path); + if (NULL == pos) return NULL; - pos = cut_bin(pos); - ret = realloc(pos, strlen(pos) + 5); - if (ret == NULL) + pos = cut_bin (pos); + ret = realloc (pos, strlen(pos) + 5); + if (NULL == ret) { free (pos); return NULL; } - strcat(ret, "lib/"); + strcat (ret, "lib/"); return ret; } - pos = end + 1; - } - sprintf(buf, "%s/%s", pos, "extract"); - if (0 == stat(buf, &sbuf)) { - pos = strdup(pos); - free(buf); - free(path); - if (pos == NULL) - return NULL; - pos = cut_bin(pos); - ret = realloc(pos, strlen(pos) + 5); - if (ret == NULL) - { - free (pos); - return NULL; - } - strcat(ret, "lib/"); - return ret; - } free(buf); free(path); return NULL; } + /** * Create a filename by appending 'fname' to 'path'. * * @param path the base path * @param fname the filename to append - * @return '$path/$fname' + * @return '$path/$fname', NULL on error */ static char * append_to_dir (const char *path, @@ -274,16 +346,15 @@ append_to_dir (const char *path, char *ret; size_t slen; - slen = strlen (path); - if (slen == 0) + if (0 == (slen = strlen (path))) return NULL; - if (fname[0] == DIR_SEPARATOR) + if (DIR_SEPARATOR == fname[0]) fname++; ret = malloc (slen + strlen(fname) + 2); - if (ret == NULL) + if (NULL == ret) return NULL; #ifdef MINGW - if (path[slen-1] == '\\') + if ('\\' == path[slen-1]) sprintf (ret, "%s%s", path, @@ -294,16 +365,16 @@ append_to_dir (const char *path, path, fname); #else - if (path[slen-1] == '/') + if ('/' == path[slen-1]) sprintf (ret, - "%s%s", - path, - fname); + "%s%s", + path, + fname); else sprintf (ret, - "%s/%s", - path, - fname); + "%s/%s", + path, + fname); #endif return ret; } @@ -317,49 +388,47 @@ append_to_dir (const char *path, * @param pp_cls cls argument for pp. */ void -get_installation_paths (PathProcessor pp, - void *pp_cls) +EXTRACTOR_get_installation_paths_ (EXTRACTOR_PathProcessor pp, + void *pp_cls) { const char *p; - char * path; - char * prefix; - char * d; + char *path; + char *prefix; + char *d; prefix = NULL; - p = getenv("LIBEXTRACTOR_PREFIX"); - if (p != NULL) + if (NULL != (p = getenv ("LIBEXTRACTOR_PREFIX"))) { - d = strdup (p); - if (d == NULL) + if (NULL == (d = strdup (p))) return; - prefix = strtok (d, PATH_SEPARATOR_STR); - while (NULL != prefix) - { - pp (pp_cls, prefix); - prefix = strtok (NULL, PATH_SEPARATOR_STR); - } + for (prefix = strtok (d, PATH_SEPARATOR_STR); + NULL != prefix; + prefix = strtok (NULL, PATH_SEPARATOR_STR)) + pp (pp_cls, prefix); free (d); return; } #if LINUX - if (prefix == NULL) - prefix = get_path_from_proc_exe(); + if (NULL == prefix) + prefix = get_path_from_proc_exe (); #endif #if WINDOWS - if (prefix == NULL) - prefix = get_path_from_module_filename(); + if (NULL == prefix) + prefix = get_path_from_module_filename (); #endif #if DARWIN - if (prefix == NULL) - prefix = get_path_from_dyld_image(); + if (NULL == prefix) + prefix = get_path_from_NSGetExecutablePath (); + if (NULL == prefix) + prefix = get_path_from_dyld_image (); #endif - if (prefix == NULL) - prefix = get_path_from_PATH(); + if (NULL == prefix) + prefix = get_path_from_PATH (); pp (pp_cls, PLUGININSTDIR); - if (prefix == NULL) + if (NULL == prefix) return; path = append_to_dir (prefix, PLUGINDIR); - if (path != NULL) + if (NULL != path) { if (0 != strcmp (path, PLUGININSTDIR)) @@ -370,9 +439,19 @@ get_installation_paths (PathProcessor pp, } +/** + * Closure for 'find_plugin_in_path'. + */ struct SearchContext { + /** + * Name of the plugin we are looking for. + */ const char *short_name; + + /** + * Location for storing the path to the plugin upon success. + */ char *path; }; @@ -395,30 +474,28 @@ find_plugin_in_path (void *cls, char *sym; char *dot; - if (sc->path != NULL) + if (NULL != sc->path) return; - dir = OPENDIR (path); - if (NULL == dir) + if (NULL == (dir = OPENDIR (path))) return; while (NULL != (ent = READDIR (dir))) { - if (ent->d_name[0] == '.') + if ('.' == ent->d_name[0]) continue; if ( (NULL != (la = strstr (ent->d_name, ".la"))) && - (la[3] == '\0') ) + ('\0' == la[3]) ) continue; /* only load '.so' and '.dll' */ - sym_name = strrchr (ent->d_name, '_'); - if (sym_name == NULL) + if (NULL == (sym_name = strrchr (ent->d_name, '_'))) continue; sym_name++; sym = strdup (sym_name); - if (sym == NULL) + if (NULL == sym) { CLOSEDIR (dir); return; } dot = strchr (sym, '.'); - if (dot != NULL) + if (NULL != dot) *dot = '\0'; if (0 == strcmp (sym, sc->short_name)) { @@ -428,13 +505,6 @@ find_plugin_in_path (void *cls, } free (sym); } -#if DEBUG - if (sc->path == NULL) - fprintf (stderr, - "Failed to find plugin `%s' in `%s'\n", - sc->short_name, - path); -#endif CLOSEDIR (dir); } @@ -444,7 +514,7 @@ find_plugin_in_path (void *cls, * the full path of the respective plugin. */ char * -find_plugin (const char *short_name) +EXTRACTOR_find_plugin_ (const char *short_name) { struct SearchContext sc; @@ -456,13 +526,30 @@ find_plugin (const char *short_name) } +/** + * Closure for 'load_plugins_from_dir'. + */ +struct DefaultLoaderContext +{ + /** + * Accumulated result list. + */ + struct EXTRACTOR_PluginList *res; + + /** + * Flags to use for all plugins. + */ + enum EXTRACTOR_Options flags; +}; + + /** * Load all plugins from the given directory. * * @param cls pointer to the "struct EXTRACTOR_PluginList*" to extend * @param path path to a directory with plugins */ -void +static void load_plugins_from_dir (void *cls, const char *path) { @@ -514,3 +601,32 @@ load_plugins_from_dir (void *cls, closedir (dir); } + +/** + * Load the default set of plugins. The default can be changed + * by setting the LIBEXTRACTOR_LIBRARIES environment variable. + * If it is set to "env", then this function will return + * EXTRACTOR_plugin_add_config (NULL, env, flags). Otherwise, + * it will load all of the installed plugins and return them. + * + * @param flags options for all of the plugins loaded + * @return the default set of plugins, NULL if no plugins were found + */ +struct EXTRACTOR_PluginList * +EXTRACTOR_plugin_add_defaults (enum EXTRACTOR_Options flags) +{ + struct DefaultLoaderContext dlc; + char *env; + + env = getenv ("LIBEXTRACTOR_LIBRARIES"); + if (NULL != env) + return EXTRACTOR_plugin_add_config (NULL, env, flags); + dlc.res = NULL; + dlc.flags = flags; + get_installation_paths (&load_plugins_from_dir, + &dlc); + return dlc.res; +} + + +/* end of extractor_plugpath.c */ diff --git a/src/main/extractor_plugpath.h b/src/main/extractor_plugpath.h @@ -1,20 +1,15 @@ -#if !defined(EXTRACTOR_PLUGPATH_H) +#ifndef EXTRACTOR_PLUGPATH_H #define EXTRACTOR_PLUGPATH_H -struct DefaultLoaderContext -{ - struct EXTRACTOR_PluginList *res; - enum EXTRACTOR_Options flags; -}; - /** * Function to call on paths. * * @param cls closure * @param path a directory path */ -typedef void (*PathProcessor)(void *cls, - const char *path); +typedef void (*EXTRACTOR_PathProcessor)(void *cls, + const char *path); + /** * Iterate over all paths where we expect to find GNU libextractor @@ -24,24 +19,17 @@ typedef void (*PathProcessor)(void *cls, * @param pp_cls cls argument for pp. */ void -get_installation_paths (PathProcessor pp, - void *pp_cls); +EXTRACTOR_get_installation_paths_ (EXTRACTOR_PathProcessor pp, + void *pp_cls); + /** * Given a short name of a library (i.e. "mime"), find * the full path of the respective plugin. */ -char * -find_plugin (const char *short_name); +char * +EXTRACTOR_find_plugin_ (const char *short_name); -/** - * Load all plugins from the given directory. - * - * @param cls pointer to the "struct EXTRACTOR_PluginList*" to extend - * @param path path to a directory with plugins - */ -void -load_plugins_from_dir (void *cls, - const char *path); -#endif /* EXTRACTOR_PLUGPATH_H */ +#endif +/* EXTRACTOR_PLUGPATH_H */ diff --git a/src/main/extractor_print.c b/src/main/extractor_print.c @@ -40,44 +40,43 @@ * @return non-zero if printing failed, otherwise 0. */ int -EXTRACTOR_meta_data_print(void * handle, - const char *plugin_name, - enum EXTRACTOR_MetaType type, - enum EXTRACTOR_MetaFormat format, - const char *data_mime_type, - const char *data, - size_t data_len) +EXTRACTOR_meta_data_print (void *handle, + const char *plugin_name, + enum EXTRACTOR_MetaType type, + enum EXTRACTOR_MetaFormat format, + const char *data_mime_type, + const char *data, + size_t data_len) { iconv_t cd; char * buf; int ret; const char *mt; - if (format != EXTRACTOR_METAFORMAT_UTF8) + if (EXTRACTOR_METAFORMAT_UTF8 != format) return 0; - cd = iconv_open(nl_langinfo(CODESET), - "UTF-8"); - if (cd == (iconv_t) -1) + cd = iconv_open (nl_langinfo(CODESET), + "UTF-8"); + if (((iconv_t) -1) == cd) return 1; buf = iconv_helper(cd, data); - if (buf != NULL) + if (NULL == buf) { - mt = EXTRACTOR_metatype_to_string (type); - ret = fprintf(handle, - "%s - %s\n", - (mt == NULL) ? _("unknown") : dgettext ("libextractor", - mt), - buf); - free(buf); + ret = -1; } else { - ret = -1; + mt = EXTRACTOR_metatype_to_string (type); + ret = fprintf (handle, + "%s - %s\n", + (NULL == mt) + ? dgettext ("libextractor", gettext_noop ("unknown")) + : dgettext ("libextractor", mt), + buf); + free(buf); } iconv_close(cd); - if (ret < 0) - return 1; - return 0; + return (ret < 0) ? 1 : 0; } /* end of extractor_print.c */