[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r2048 - Extractor/src/plugins/ole2
From: |
grothoff |
Subject: |
[GNUnet-SVN] r2048 - Extractor/src/plugins/ole2 |
Date: |
Sun, 4 Sep 2005 00:48:01 -0700 (PDT) |
Author: grothoff
Date: 2005-09-04 00:47:58 -0700 (Sun, 04 Sep 2005)
New Revision: 2048
Modified:
Extractor/src/plugins/ole2/Makefile.am
Extractor/src/plugins/ole2/gsf-infile-msole.c
Extractor/src/plugins/ole2/gsf-infile-msole.h
Extractor/src/plugins/ole2/gsf-input.c
Extractor/src/plugins/ole2/gsf-utils.c
Extractor/src/plugins/ole2/gsf-utils.h
Extractor/src/plugins/ole2/ole2extractor.c
Log:
rewrite
Modified: Extractor/src/plugins/ole2/Makefile.am
===================================================================
--- Extractor/src/plugins/ole2/Makefile.am 2005-09-03 21:12:54 UTC (rev
2047)
+++ Extractor/src/plugins/ole2/Makefile.am 2005-09-04 07:47:58 UTC (rev
2048)
@@ -1,6 +1,7 @@
INCLUDES = \
-I$(top_srcdir)/src/include
+EXTRA_DIST = SYMBOLS
LIBS = \
@LTLIBINTL@ @LIBS@
@@ -8,25 +9,20 @@
# install plugins under:
plugindir = $(libdir)/libextractor
-PLUGINFLAGS = -export-dynamic -avoid-version -module
+PLUGINFLAGS = -Wl,-Bsymbolic -avoid-version -module -no-undefined
+
plugin_LTLIBRARIES = \
libextractor_ole2.la
AM_CFLAGS = $(GLIB_CFLAGS)
libextractor_ole2_la_CFLAGS = \
- $(GLIB_CFLAGS)
+ $(GLIB_CFLAGS)
libextractor_ole2_la_LIBADD = \
$(LIBADD) $(GLIB_LIBS) -lgobject-2.0 \
$(top_builddir)/src/main/libextractor.la
libextractor_ole2_la_LDFLAGS = \
- $(PLUGINFLAGS)
+ $(PLUGINFLAGS) -Wl,--retain-symbols-file -Wl,SYMBOLS
libextractor_ole2_la_SOURCES = \
- ole2extractor.c \
- gsf-utils.c \
- gsf-utils.h \
- gsf-input.c \
- gsf-input.h \
- gsf-infile-msole.c \
- gsf-infile-msole.h
+ ole2extractor.c
Modified: Extractor/src/plugins/ole2/gsf-infile-msole.c
===================================================================
--- Extractor/src/plugins/ole2/gsf-infile-msole.c 2005-09-03 21:12:54 UTC
(rev 2047)
+++ Extractor/src/plugins/ole2/gsf-infile-msole.c 2005-09-04 07:47:58 UTC
(rev 2048)
@@ -21,871 +21,3 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*/
-
-#include "platform.h"
-#include <glib-object.h>
-#include "gsf-input.h"
-#include "gsf-infile-msole.h"
-#include "gsf-utils.h"
-
-#include <string.h>
-#include <stdio.h>
-
-#define OLE_HEADER_SIZE 0x200 /* independent of big block
size size */
-#define OLE_HEADER_SIGNATURE 0x00
-#define OLE_HEADER_CLSID 0x08 /* See ReadClassStg */
-#define OLE_HEADER_MINOR_VER 0x18 /* 0x33 and 0x3e have been seen */
-#define OLE_HEADER_MAJOR_VER 0x1a /* 0x3 been seen in wild */
-#define OLE_HEADER_BYTE_ORDER 0x1c /* 0xfe 0xff == Intel Little Endian */
-#define OLE_HEADER_BB_SHIFT 0x1e
-#define OLE_HEADER_SB_SHIFT 0x20
-/* 0x22..0x27 reserved == 0 */
-#define OLE_HEADER_CSECTDIR 0x28
-#define OLE_HEADER_NUM_BAT 0x2c
-#define OLE_HEADER_DIRENT_START 0x30
-/* 0x34..0x37 transacting signature must be 0 */
-#define OLE_HEADER_THRESHOLD 0x38
-#define OLE_HEADER_SBAT_START 0x3c
-#define OLE_HEADER_NUM_SBAT 0x40
-#define OLE_HEADER_METABAT_BLOCK 0x44
-#define OLE_HEADER_NUM_METABAT 0x48
-#define OLE_HEADER_START_BAT 0x4c
-#define BAT_INDEX_SIZE 4
-#define OLE_HEADER_METABAT_SIZE ((OLE_HEADER_SIZE -
OLE_HEADER_START_BAT) / BAT_INDEX_SIZE)
-
-#define DIRENT_MAX_NAME_SIZE 0x40
-#define DIRENT_DETAILS_SIZE 0x40
-#define DIRENT_SIZE (DIRENT_MAX_NAME_SIZE + DIRENT_DETAILS_SIZE)
-#define DIRENT_NAME_LEN 0x40 /* length in bytes incl 0
terminator */
-#define DIRENT_TYPE 0x42
-#define DIRENT_COLOUR 0x43
-#define DIRENT_PREV 0x44
-#define DIRENT_NEXT 0x48
-#define DIRENT_CHILD 0x4c
-#define DIRENT_CLSID 0x50 /* only for dirs */
-#define DIRENT_USERFLAGS 0x60 /* only for dirs */
-#define DIRENT_CREATE_TIME 0x64 /* for files */
-#define DIRENT_MODIFY_TIME 0x6c /* for files */
-#define DIRENT_FIRSTBLOCK 0x74
-#define DIRENT_FILE_SIZE 0x78
-/* 0x7c..0x7f reserved == 0 */
-
-#define DIRENT_TYPE_INVALID 0
-#define DIRENT_TYPE_DIR 1
-#define DIRENT_TYPE_FILE 2
-#define DIRENT_TYPE_LOCKBYTES 3 /* ? */
-#define DIRENT_TYPE_PROPERTY 4 /* ? */
-#define DIRENT_TYPE_ROOTDIR 5
-#define DIRENT_MAGIC_END 0xffffffff
-
-/* flags in the block allocation list to denote special blocks */
-#define BAT_MAGIC_UNUSED 0xffffffff /* -1 */
-#define BAT_MAGIC_END_OF_CHAIN 0xfffffffe /* -2 */
-#define BAT_MAGIC_BAT 0xfffffffd /* a bat block, -3 */
-#define BAT_MAGIC_METABAT 0xfffffffc /* a metabat block -4 */
-
-
-
-
-typedef struct {
- guint32 *block;
- guint32 num_blocks;
-} MSOleBAT;
-
-typedef struct {
- char *name;
- char *collation_name;
- int index;
- size_t size;
- gboolean use_sb;
- guint32 first_block;
- gboolean is_directory;
- GList *children;
- unsigned char clsid[16]; /* 16 byte GUID used by some apps */
-} MSOleDirent;
-
-typedef struct {
- struct {
- MSOleBAT bat;
- unsigned shift;
- unsigned filter;
- size_t size;
- } bb, sb;
- off_t max_block;
- guint32 threshold; /* transition between small and big blocks */
- guint32 sbat_start, num_sbat;
-
- MSOleDirent *root_dir;
- struct GsfInput *sb_file;
-
- int ref_count;
-} MSOleInfo;
-
-typedef struct GsfInfileMSOle {
- off_t size;
- off_t cur_offset;
- struct GsfInput *input;
- MSOleInfo *info;
- MSOleDirent *dirent;
- MSOleBAT bat;
- off_t cur_block;
-
- struct {
- guint8 *buf;
- size_t buf_size;
- } stream;
-} GsfInfileMSOle;
-
-/* utility macros */
-#define OLE_BIG_BLOCK(index, ole) ((index) >> ole->info->bb.shift)
-
-static struct GsfInput *gsf_infile_msole_new_child (GsfInfileMSOle *parent,
- MSOleDirent *dirent);
-
-/**
- * ole_get_block :
- * @ole : the infile
- * @block :
- * @buffer : optionally NULL
- *
- * Read a block of data from the underlying input.
- * Be really anal.
- **/
-static const guint8 *
-ole_get_block (const GsfInfileMSOle *ole, guint32 block, guint8 *buffer)
-{
- g_return_val_if_fail (block < ole->info->max_block, NULL);
-
- /* OLE_HEADER_SIZE is fixed at 512, but the sector containing the
- * header is padded out to bb.size (sector size) when bb.size > 512. */
- if (gsf_input_seek (ole->input,
- (off_t)(MAX (OLE_HEADER_SIZE, ole->info->bb.size) + (block <<
ole->info->bb.shift)),
- SEEK_SET) < 0)
- return NULL;
-
- return gsf_input_read (ole->input, ole->info->bb.size, buffer);
-}
-
-/**
- * ole_make_bat :
- * @metabat : a meta bat to connect to the raw blocks (small or large)
- * @size_guess : An optional guess as to how many blocks are in the file
- * @block : The first block in the list.
- * @res : where to store the result.
- *
- * Walk the linked list of the supplied block allocation table and build up a
- * table for the list starting in @block.
- *
- * Returns TRUE on error.
- */
-static gboolean
-ole_make_bat (MSOleBAT const *metabat, size_t size_guess, guint32 block,
- MSOleBAT *res)
-{
- /* NOTE : Only use size as a suggestion, sometimes it is wrong */
- GArray *bat = g_array_sized_new (FALSE, FALSE,
- sizeof (guint32), size_guess);
-
- guint8 *used = (guint8*)g_alloca (1 + metabat->num_blocks / 8);
- memset (used, 0, 1 + metabat->num_blocks / 8);
-
- if (block < metabat->num_blocks)
- do {
- /* Catch cycles in the bat list */
- g_return_val_if_fail (0 == (used[block/8] & (1 <<
(block & 0x7))), TRUE);
- used[block/8] |= 1 << (block & 0x7);
-
- g_array_append_val (bat, block);
- block = metabat->block [block];
- } while (block < metabat->num_blocks);
-
- res->block = NULL;
-
- res->num_blocks = bat->len;
- res->block = (guint32 *) (gpointer) g_array_free (bat, FALSE);
-
- if (block != BAT_MAGIC_END_OF_CHAIN) {
-#if 0
- g_warning ("This OLE2 file is invalid.\n"
- "The Block Allocation Table for one of the streams
had %x instead of a terminator (%x).\n"
- "We might still be able to extract some data, but
you'll want to check the file.",
- block, BAT_MAGIC_END_OF_CHAIN);
-#endif
- }
-
- return FALSE;
-}
-
-static void
-ols_bat_release (MSOleBAT *bat)
-{
- if (bat->block != NULL) {
- g_free (bat->block);
- bat->block = NULL;
- bat->num_blocks = 0;
- }
-}
-
-/**
- * ole_info_read_metabat :
- * @ole :
- * @bats :
- *
- * A small utility routine to read a set of references to bat blocks
- * either from the OLE header, or a meta-bat block.
- *
- * Returns a pointer to the element after the last position filled.
- **/
-static guint32 *
-ole_info_read_metabat (GsfInfileMSOle *ole, guint32 *bats, guint32 max,
- guint32 const *metabat, guint32 const *metabat_end)
-{
- guint8 const *bat, *end;
-
- for (; metabat < metabat_end; metabat++) {
- bat = ole_get_block (ole, *metabat, NULL);
- if (bat == NULL)
- return NULL;
- end = bat + ole->info->bb.size;
- for ( ; bat < end ; bat += BAT_INDEX_SIZE, bats++) {
- *bats = GSF_LE_GET_GUINT32 (bat);
- g_return_val_if_fail (*bats < max ||
- *bats >= BAT_MAGIC_METABAT, NULL);
- }
- }
- return bats;
-}
-
-/**
- * gsf_ole_get_guint32s :
- * @dst :
- * @src :
- * @num_bytes :
- *
- * Copy some some raw data into an array of guint32.
- **/
-static void
-gsf_ole_get_guint32s (guint32 *dst, guint8 const *src, int num_bytes)
-{
- for (; (num_bytes -= BAT_INDEX_SIZE) >= 0 ; src += BAT_INDEX_SIZE)
- *dst++ = GSF_LE_GET_GUINT32 (src);
-}
-
-static struct GsfInput *
-ole_info_get_sb_file (GsfInfileMSOle *parent)
-{
- MSOleBAT meta_sbat;
-
- if (parent->info->sb_file != NULL)
- return parent->info->sb_file;
-
- parent->info->sb_file = gsf_infile_msole_new_child (parent,
- parent->info->root_dir);
-
- if (NULL == parent->info->sb_file)
- return NULL;
-
- g_return_val_if_fail (parent->info->sb.bat.block == NULL, NULL);
-
- if (ole_make_bat (&parent->info->bb.bat,
- parent->info->num_sbat,
- parent->info->sbat_start,
- &meta_sbat)) {
- return NULL;
- }
-
- parent->info->sb.bat.num_blocks = meta_sbat.num_blocks *
(parent->info->bb.size / BAT_INDEX_SIZE);
- parent->info->sb.bat.block = g_new0 (guint32,
parent->info->sb.bat.num_blocks);
- ole_info_read_metabat (parent, parent->info->sb.bat.block,
- parent->info->sb.bat.num_blocks,
- meta_sbat.block, meta_sbat.block + meta_sbat.num_blocks);
- ols_bat_release (&meta_sbat);
-
- return parent->info->sb_file;
-}
-
-static gint
-ole_dirent_cmp (const MSOleDirent *a, const MSOleDirent *b)
-{
- g_return_val_if_fail (a, 0);
- g_return_val_if_fail (b, 0);
-
- g_return_val_if_fail (a->collation_name, 0);
- g_return_val_if_fail (b->collation_name, 0);
-
- return strcmp (b->collation_name, a->collation_name);
-}
-
-/**
- * ole_dirent_new :
- * @ole :
- * @entry :
- * @parent : optional
- *
- * Parse dirent number @entry and recursively handle its siblings and children.
- **/
-static MSOleDirent *
-ole_dirent_new (GsfInfileMSOle *ole, guint32 entry, MSOleDirent *parent)
-{
- MSOleDirent *dirent;
- guint32 block, next, prev, child, size;
- guint8 const *data;
- guint8 type;
- guint16 name_len;
-
- if (entry >= DIRENT_MAGIC_END)
- return NULL;
-
- block = OLE_BIG_BLOCK (entry * DIRENT_SIZE, ole);
-
- g_return_val_if_fail (block < ole->bat.num_blocks, NULL);
- data = ole_get_block (ole, ole->bat.block [block], NULL);
- if (data == NULL)
- return NULL;
- data += (DIRENT_SIZE * entry) % ole->info->bb.size;
-
- type = GSF_LE_GET_GUINT8 (data + DIRENT_TYPE);
- if (type != DIRENT_TYPE_DIR &&
- type != DIRENT_TYPE_FILE &&
- type != DIRENT_TYPE_ROOTDIR) {
-#if 0
- g_warning ("Unknown stream type 0x%x", type);
-#endif
- return NULL;
- }
-
- /* It looks like directory (and root directory) sizes are sometimes
bogus */
- size = GSF_LE_GET_GUINT32 (data + DIRENT_FILE_SIZE);
- g_return_val_if_fail (type == DIRENT_TYPE_DIR || type ==
DIRENT_TYPE_ROOTDIR ||
- size <= (guint32)gsf_input_size(ole->input),
NULL);
-
- dirent = g_new0 (MSOleDirent, 1);
- dirent->index = entry;
- dirent->size = size;
- /* Store the class id which is 16 byte identifier used by some apps */
- memcpy(dirent->clsid, data + DIRENT_CLSID, sizeof(dirent->clsid));
-
- /* root dir is always big block */
- dirent->use_sb = parent && (size < ole->info->threshold);
- dirent->first_block = (GSF_LE_GET_GUINT32 (data + DIRENT_FIRSTBLOCK));
- dirent->is_directory = (type != DIRENT_TYPE_FILE);
- dirent->children = NULL;
- prev = GSF_LE_GET_GUINT32 (data + DIRENT_PREV);
- next = GSF_LE_GET_GUINT32 (data + DIRENT_NEXT);
- child = GSF_LE_GET_GUINT32 (data + DIRENT_CHILD);
- name_len = GSF_LE_GET_GUINT16 (data + DIRENT_NAME_LEN);
- dirent->name = NULL;
- if (0 < name_len && name_len <= DIRENT_MAX_NAME_SIZE) {
- gunichar2 uni_name [DIRENT_MAX_NAME_SIZE+1];
- gchar const *end;
- int i;
-
- /* address@hidden
- * Sometimes, rarely, people store the stream name as ascii
- * rather than utf16. Do a validation first just in case.
- */
- if (!g_utf8_validate (data, -1, &end) ||
- ((guint8 const *)end - data + 1) != name_len) {
- /* be wary about endianness */
- for (i = 0 ; i < name_len ; i += 2)
- uni_name [i/2] = GSF_LE_GET_GUINT16 (data + i);
- uni_name [i/2] = 0;
-
- dirent->name = g_utf16_to_utf8 (uni_name, -1, NULL,
NULL, NULL);
- } else
- dirent->name = g_strndup ((gchar *)data,
(gsize)((guint8 const *)end - data + 1));
- }
- /* be really anal in the face of screwups */
- if (dirent->name == NULL)
- dirent->name = g_strdup ("");
- dirent->collation_name = g_utf8_collate_key (dirent->name, -1);
-
- if (parent != NULL)
- parent->children = g_list_insert_sorted (parent->children,
- dirent, (GCompareFunc)ole_dirent_cmp);
-
- /* NOTE : These links are a tree, not a linked list */
- if (prev != entry)
- ole_dirent_new (ole, prev, parent);
- if (next != entry)
- ole_dirent_new (ole, next, parent);
-
- if (dirent->is_directory)
- ole_dirent_new (ole, child, dirent);
- return dirent;
-}
-
-static void
-ole_dirent_free (MSOleDirent *dirent)
-{
- GList *tmp;
- g_return_if_fail (dirent != NULL);
-
- g_free (dirent->name);
- g_free (dirent->collation_name);
-
- for (tmp = dirent->children; tmp; tmp = tmp->next)
- ole_dirent_free ((MSOleDirent *)tmp->data);
- g_list_free (dirent->children);
- g_free (dirent);
-}
-
-/*****************************************************************************/
-
-static void
-ole_info_unref (MSOleInfo *info)
-{
- if (info->ref_count-- != 1)
- return;
-
- ols_bat_release (&info->bb.bat);
- ols_bat_release (&info->sb.bat);
- if (info->root_dir != NULL) {
- ole_dirent_free (info->root_dir);
- info->root_dir = NULL;
- }
- if (info->sb_file != NULL) {
- gsf_input_finalize(info->sb_file);
- info->sb_file = NULL;
- }
- g_free (info);
-}
-
-static MSOleInfo *
-ole_info_ref (MSOleInfo *info)
-{
- info->ref_count++;
- return info;
-}
-
-static void
-gsf_infile_msole_init (GsfInfileMSOle * ole)
-{
- ole->cur_offset = 0;
- ole->size = 0;
- ole->input = NULL;
- ole->info = NULL;
- ole->bat.block = NULL;
- ole->bat.num_blocks = 0;
- ole->cur_block = BAT_MAGIC_UNUSED;
- ole->stream.buf = NULL;
- ole->stream.buf_size = 0;
-}
-
-/**
- * ole_dup :
- * @src :
- *
- * Utility routine to _partially_ replicate a file. It does NOT copy the bat
- * blocks, or init the dirent.
- *
- * Return value: the partial duplicate.
- **/
-static GsfInfileMSOle *
-ole_dup (GsfInfileMSOle const * src)
-{
- GsfInfileMSOle *dst;
- struct GsfInput *input;
-
- g_return_val_if_fail (src != NULL, NULL);
-
- dst = malloc(sizeof(GsfInfileMSOle));
- if (dst == NULL)
- return NULL;
- gsf_infile_msole_init(dst);
- input = gsf_input_dup (src->input);
- if (input == NULL) {
- gsf_infile_msole_finalize(dst);
- return NULL;
- }
- dst->input = input;
- dst->info = ole_info_ref (src->info);
-
- /* buf and buf_size are initialized to NULL */
-
- return dst;
-}
-
-/**
- * ole_init_info :
- * @ole :
- *
- * Read an OLE header and do some sanity checking
- * along the way.
- *
- * Return value: TRUE on error
- **/
-static gboolean
-ole_init_info (GsfInfileMSOle *ole)
-{
- static guint8 const signature[] =
- { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
- guint8 const *header, *tmp;
- guint32 *metabat = NULL;
- MSOleInfo *info;
- guint32 bb_shift, sb_shift, num_bat, num_metabat, last, dirent_start;
- guint32 metabat_block, *ptr;
-
- /* check the header */
- if (gsf_input_seek (ole->input, (off_t) 0, SEEK_SET) ||
- NULL == (header = gsf_input_read (ole->input, OLE_HEADER_SIZE,
NULL)) ||
- 0 != memcmp (header, signature, sizeof (signature))) {
- return TRUE;
- }
-
- bb_shift = GSF_LE_GET_GUINT16 (header + OLE_HEADER_BB_SHIFT);
- sb_shift = GSF_LE_GET_GUINT16 (header + OLE_HEADER_SB_SHIFT);
- num_bat = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_BAT);
- dirent_start = GSF_LE_GET_GUINT32 (header + OLE_HEADER_DIRENT_START);
- metabat_block = GSF_LE_GET_GUINT32 (header + OLE_HEADER_METABAT_BLOCK);
- num_metabat = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_METABAT);
-
- /* Some sanity checks
- * 1) There should always be at least 1 BAT block
- * 2) It makes no sense to have a block larger than 2^31 for now.
- * Maybe relax this later, but not much.
- */
- if (6 > bb_shift || bb_shift >= 31 || sb_shift > bb_shift) {
- return TRUE;
- }
-
- info = g_new0 (MSOleInfo, 1);
- ole->info = info;
-
- info->ref_count = 1;
- info->bb.shift = bb_shift;
- info->bb.size = 1 << info->bb.shift;
- info->bb.filter = info->bb.size - 1;
- info->sb.shift = sb_shift;
- info->sb.size = 1 << info->sb.shift;
- info->sb.filter = info->sb.size - 1;
- info->threshold = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_THRESHOLD);
- info->sbat_start = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_SBAT_START);
- info->num_sbat = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_NUM_SBAT);
- info->max_block = (gsf_input_size (ole->input) - OLE_HEADER_SIZE)
/ info->bb.size;
- info->sb_file = NULL;
-
- if (info->num_sbat == 0 && info->sbat_start != BAT_MAGIC_END_OF_CHAIN) {
-#if 0
- g_warning ("There is are not supposed to be any blocks in the
small block allocation table, yet there is a link to some. Ignoring it.");
-#endif
- }
-
- /* very rough heuristic, just in case */
- if (num_bat < info->max_block) {
- info->bb.bat.num_blocks = num_bat * (info->bb.size /
BAT_INDEX_SIZE);
- info->bb.bat.block = g_new0 (guint32,
info->bb.bat.num_blocks);
-
- metabat = (guint32 *)g_alloca (MAX (info->bb.size,
OLE_HEADER_SIZE));
-
- /* Reading the elements invalidates this memory, make copy */
- gsf_ole_get_guint32s (metabat, header + OLE_HEADER_START_BAT,
- OLE_HEADER_SIZE - OLE_HEADER_START_BAT);
- last = num_bat;
- if (last > OLE_HEADER_METABAT_SIZE)
- last = OLE_HEADER_METABAT_SIZE;
-
- ptr = ole_info_read_metabat (ole, info->bb.bat.block,
- info->bb.bat.num_blocks, metabat, metabat + last);
- num_bat -= last;
- } else
- ptr = NULL;
-
- last = (info->bb.size - BAT_INDEX_SIZE) / BAT_INDEX_SIZE;
- while (ptr != NULL && num_metabat-- > 0) {
- tmp = ole_get_block (ole, metabat_block, NULL);
- if (tmp == NULL) {
- ptr = NULL;
- break;
- }
-
- /* Reading the elements invalidates this memory, make copy */
- gsf_ole_get_guint32s (metabat, tmp, (int)info->bb.size);
-
- if (num_metabat == 0) {
- if (last < num_bat) {
- /* there should be less that a full metabat
block
- * remaining */
- ptr = NULL;
- break;
- }
- last = num_bat;
- } else if (num_metabat > 0) {
- metabat_block = metabat[last];
- num_bat -= last;
- }
-
- ptr = ole_info_read_metabat (ole, ptr,
- info->bb.bat.num_blocks, metabat, metabat + last);
- }
-
- if (ptr == NULL) {
- return TRUE;
- }
-
- /* Read the directory's bat, we do not know the size */
- if (ole_make_bat (&info->bb.bat, 0, dirent_start, &ole->bat)) {
- return TRUE;
- }
-
- /* Read the directory */
- ole->dirent = info->root_dir = ole_dirent_new (ole, 0, NULL);
- if (ole->dirent == NULL) {
- return TRUE;
- }
-
- return FALSE;
-}
-
-void
-gsf_infile_msole_finalize (GsfInfileMSOle * ole)
-{
- if (ole->input != NULL) {
- gsf_input_finalize(ole->input);
- ole->input = NULL;
- }
- if (ole->info != NULL) {
- ole_info_unref (ole->info);
- ole->info = NULL;
- }
- ols_bat_release (&ole->bat);
-
- g_free (ole->stream.buf);
- free(ole);
-}
-
-static guint8 const *
-gsf_infile_msole_read (GsfInfileMSOle *ole, size_t num_bytes, guint8 *buffer)
-{
- off_t first_block, last_block, raw_block, offset, i;
- guint8 const *data;
- guint8 *ptr;
- size_t count;
-
- /* small block files are preload */
- if (ole->dirent != NULL && ole->dirent->use_sb) {
- if (buffer != NULL) {
- memcpy (buffer, ole->stream.buf + ole->cur_offset,
num_bytes);
- ole->cur_offset += num_bytes;
- return buffer;
- }
- data = ole->stream.buf + ole->cur_offset;
- ole->cur_offset += num_bytes;
- return data;
- }
-
- /* GsfInput guarantees that num_bytes > 0 */
- first_block = OLE_BIG_BLOCK (ole->cur_offset, ole);
- last_block = OLE_BIG_BLOCK (ole->cur_offset + num_bytes - 1, ole);
- offset = ole->cur_offset & ole->info->bb.filter;
-
- /* optimization : are all the raw blocks contiguous */
- i = first_block;
- raw_block = ole->bat.block [i];
- while (++i <= last_block && ++raw_block == ole->bat.block [i])
- ;
- if (i > last_block) {
- /* optimization don't seek if we don't need to */
- if (ole->cur_block != first_block) {
- if (gsf_input_seek (ole->input,
- (off_t)(MAX (OLE_HEADER_SIZE,
ole->info->bb.size) + (ole->bat.block [first_block] << ole->info->bb.shift) +
offset),
- SEEK_SET) < 0)
- return NULL;
- }
- ole->cur_block = last_block;
- return gsf_input_read (ole->input, num_bytes, buffer);
- }
-
- /* damn, we need to copy it block by block */
- if (buffer == NULL) {
- if (ole->stream.buf_size < num_bytes) {
- if (ole->stream.buf != NULL)
- g_free (ole->stream.buf);
- ole->stream.buf_size = num_bytes;
- ole->stream.buf = g_new (guint8, num_bytes);
- }
- buffer = ole->stream.buf;
- }
-
- ptr = buffer;
- for (i = first_block ; i <= last_block ; i++ , ptr += count, num_bytes
-= count) {
- count = ole->info->bb.size - offset;
- if (count > num_bytes)
- count = num_bytes;
- data = ole_get_block (ole, ole->bat.block [i], NULL);
- if (data == NULL)
- return NULL;
-
- /* TODO : this could be optimized to avoid the copy */
- memcpy (ptr, data + offset, count);
- offset = 0;
- }
- ole->cur_block = BAT_MAGIC_UNUSED;
- ole->cur_offset += num_bytes;
- return buffer;
-}
-
-static struct GsfInput *
-gsf_infile_msole_new_child (GsfInfileMSOle *parent,
- MSOleDirent *dirent)
-{
- GsfInfileMSOle * child;
- MSOleInfo *info;
- MSOleBAT const *metabat;
- struct GsfInput *sb_file = NULL;
- size_t size_guess;
- char * buf;
-
-
- if ( (dirent->index != 0) &&
- (dirent->is_directory) ) {
- /* be wary. It seems as if some implementations pretend that
the
- * directories contain data */
- return gsf_input_new("",
- (off_t) 0,
- 0);
- }
- child = ole_dup (parent);
- if (child == NULL)
- return NULL;
- child->dirent = dirent;
- child->size = (off_t) dirent->size;
-
- info = parent->info;
-
- if (dirent->use_sb) { /* build the bat */
- metabat = &info->sb.bat;
- size_guess = dirent->size >> info->sb.shift;
- sb_file = ole_info_get_sb_file (parent);
- } else {
- metabat = &info->bb.bat;
- size_guess = dirent->size >> info->bb.shift;
- }
- if (ole_make_bat (metabat, size_guess + 1, dirent->first_block,
&child->bat)) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
-
- if (dirent->use_sb) {
- unsigned i;
- guint8 const *data;
-
- if (sb_file == NULL) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
-
- child->stream.buf_size = info->threshold;
- child->stream.buf = g_new (guint8, info->threshold);
-
- for (i = 0 ; i < child->bat.num_blocks; i++)
- if (gsf_input_seek (sb_file,
- (off_t)(child->bat.block [i] <<
info->sb.shift), SEEK_SET) < 0 ||
- (data = gsf_input_read (sb_file,
- info->sb.size,
- child->stream.buf + (i << info->sb.shift))) ==
NULL) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
- }
- buf = malloc(child->size);
- if (buf == NULL) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
- if (NULL == gsf_infile_msole_read(child,
- child->size,
- buf)) {
- gsf_infile_msole_finalize(child);
- return NULL;
- }
- gsf_infile_msole_finalize(child);
- return gsf_input_new(buf,
- (off_t) dirent->size,
- 1);
-}
-
-
-struct GsfInput *
-gsf_infile_msole_child_by_index (GsfInfileMSOle * ole, int target)
-{
- GList *p;
-
- for (p = ole->dirent->children; p != NULL ; p = p->next)
- if (target-- <= 0)
- return gsf_infile_msole_new_child (ole,
- (MSOleDirent *)p->data);
- return NULL;
-}
-
-char const *
-gsf_infile_msole_name_by_index (GsfInfileMSOle * ole, int target)
-{
- GList *p;
-
- for (p = ole->dirent->children; p != NULL ; p = p->next)
- if (target-- <= 0)
- return ((MSOleDirent *)p->data)->name;
- return NULL;
-}
-
-int
-gsf_infile_msole_num_children (GsfInfileMSOle * ole)
-{
- g_return_val_if_fail (ole->dirent != NULL, -1);
-
- if (!ole->dirent->is_directory)
- return -1;
- return g_list_length (ole->dirent->children);
-}
-
-
-/**
- * gsf_infile_msole_new :
- * @source :
- *
- * Opens the root directory of an MS OLE file.
- * NOTE : adds a reference to @source
- *
- * Returns : the new ole file handler
- **/
-GsfInfileMSOle *
-gsf_infile_msole_new (struct GsfInput *source)
-{
- GsfInfileMSOle * ole;
-
- ole = malloc(sizeof(GsfInfileMSOle));
- if (ole == NULL)
- return NULL;
- gsf_infile_msole_init(ole);
- ole->input = source;
- ole->size = (off_t) 0;
-
- if (ole_init_info (ole)) {
- gsf_infile_msole_finalize(ole);
- return NULL;
- }
-
- return ole;
-}
-
-/**
- * gsf_infile_msole_get_class_id :
- * @ole: a #GsfInfileMSOle
- * @res: 16 byte identifier (often a GUID in MS Windows apps)
- *
- * Retrieves the 16 byte indentifier (often a GUID in MS Windows apps)
- * stored within the directory associated with @ole and stores it in @res.
- *
- * Returns TRUE on success
- **/
-int
-gsf_infile_msole_get_class_id (const GsfInfileMSOle *ole,
- unsigned char * res)
-{
- g_return_val_if_fail (ole != NULL && ole->dirent != NULL, 0);
-
- memcpy (res, ole->dirent->clsid,
- sizeof(ole->dirent->clsid));
- return 1;
-}
Modified: Extractor/src/plugins/ole2/gsf-infile-msole.h
===================================================================
--- Extractor/src/plugins/ole2/gsf-infile-msole.h 2005-09-03 21:12:54 UTC
(rev 2047)
+++ Extractor/src/plugins/ole2/gsf-infile-msole.h 2005-09-04 07:47:58 UTC
(rev 2048)
@@ -27,8 +27,6 @@
struct GsfInfileMSOle;
struct GsfInfileMSOle * gsf_infile_msole_new (struct GsfInput *source);
-int gsf_infile_msole_get_class_id (const struct GsfInfileMSOle * ole,
- unsigned char * res);
int
gsf_infile_msole_num_children (struct GsfInfileMSOle *infile);
Modified: Extractor/src/plugins/ole2/gsf-input.c
===================================================================
--- Extractor/src/plugins/ole2/gsf-input.c 2005-09-03 21:12:54 UTC (rev
2047)
+++ Extractor/src/plugins/ole2/gsf-input.c 2005-09-04 07:47:58 UTC (rev
2048)
@@ -24,236 +24,5 @@
#include "gsf-utils.h"
#include <string.h>
-typedef struct GsfInput {
- off_t size;
- off_t cur_offset;
- char * name;
- const unsigned char * buf;
- int needs_free;
-} GsfInput;
-static void
-gsf_input_init (GsfInput * input)
-{
- input->size = 0;
- input->cur_offset = 0;
- input->name = NULL;
- input->buf = NULL;
-}
-
-/**
- * gsf_input_memory_new:
- * @buf: The input bytes
- * @length: The length of @buf
- * @needs_free: Whether you want this memory to be free'd at object destruction
- *
- * Returns: A new #GsfInputMemory
- */
-GsfInput *
-gsf_input_new (const unsigned char * buf,
- off_t length,
- int needs_free)
-{
- GsfInput *mem = malloc(sizeof(GsfInput));
- if (mem == NULL)
- return NULL;
- gsf_input_init(mem);
- mem->buf = buf;
- mem->size = length;
- mem->needs_free = needs_free;
- return mem;
-}
-
-void
-gsf_input_finalize (GsfInput * input)
-{
- if (input->name != NULL) {
- free (input->name);
- input->name = NULL;
- }
- if ( (input->buf) && input->needs_free)
- free((void*) input->buf);
- free(input);
-}
-
-GsfInput *
-gsf_input_dup (GsfInput *src)
-{
- GsfInput * dst = malloc(sizeof(GsfInput));
- if (dst == NULL)
- return NULL;
- gsf_input_init(dst);
- dst->buf = src->buf;
- dst->needs_free = 0;
- dst->size = src->size;
- if (src->name != NULL)
- gsf_input_set_name (dst, src->name);
- dst->cur_offset = src->cur_offset;
- return dst;
-}
-
-const unsigned char *
-gsf_input_read (GsfInput * mem, size_t num_bytes, unsigned char *
optional_buffer)
-{
- const unsigned char *src = mem->buf;
- if (src == NULL)
- return NULL;
- if (optional_buffer) {
- memcpy (optional_buffer, src + mem->cur_offset, num_bytes);
- mem->cur_offset += num_bytes;
-
- return optional_buffer;
- } else {
- const unsigned char * ret = src + mem->cur_offset;
- mem->cur_offset += num_bytes;
- return ret;
- }
-}
-
-/**
- * gsf_input_name :
- * @input :
- *
- * Returns @input's name in utf8 form, DO NOT FREE THIS STRING
- **/
-const char *
-gsf_input_name (GsfInput *input)
-{
- return input->name;
-}
-
-/**
- * gsf_input_size :
- * @input : The input
- *
- * Looks up and caches the number of bytes in the input
- *
- * Returns : the size or -1 on error
- **/
-off_t
-gsf_input_size (GsfInput *input)
-{
- g_return_val_if_fail (input != NULL, -1);
- return input->size;
-}
-
-/**
- * gsf_input_eof :
- * @input : the input
- *
- * Are we at the end of the file ?
- *
- * Returns : TRUE if the input is at the eof.
- **/
-int
-gsf_input_eof (GsfInput *input)
-{
- g_return_val_if_fail (input != NULL, 0);
-
- return input->cur_offset >= input->size;
-}
-
-/**
- * gsf_input_remaining :
- * @input :
- *
- * Returns the number of bytes left in the file.
- **/
-off_t
-gsf_input_remaining (GsfInput *input)
-{
- g_return_val_if_fail (input != NULL, 0);
-
- return input->size - input->cur_offset;
-}
-
-/**
- * gsf_input_tell :
- * @input :
- *
- * Returns the current offset in the file.
- **/
-off_t
-gsf_input_tell (GsfInput *input)
-{
- g_return_val_if_fail (input != NULL, 0);
-
- return input->cur_offset;
-}
-
-/**
- * gsf_input_seek :
- * @input :
- * @offset :
- * @whence :
- *
- * Returns TRUE on error.
- **/
-int
-gsf_input_seek (GsfInput *input, off_t offset, int whence)
-{
- off_t pos = offset;
-
- g_return_val_if_fail (input != NULL, 1);
-
- switch (whence) {
- case SEEK_SET : break;
- case SEEK_CUR : pos += input->cur_offset; break;
- case SEEK_END : pos += input->size; break;
- default : return 1;
- }
-
- if (pos < 0 || pos > input->size)
- return 1;
-
- /*
- * If we go nowhere, just return. This in particular handles null
- * seeks for streams with no seek method.
- */
- if (pos == input->cur_offset)
- return 0;
-
- input->cur_offset = pos;
- return 0;
-}
-
-/**
- * gsf_input_set_name :
- * @input :
- * @name :
- *
- * protected.
- *
- * Returns : TRUE if the assignment was ok.
- **/
-int
-gsf_input_set_name (GsfInput *input, char const *name)
-{
- char *buf;
-
- g_return_val_if_fail (input != NULL, 0);
-
- buf = strdup (name);
- if (input->name != NULL)
- free (input->name);
- input->name = buf;
- return 1;
-}
-
-/**
- * gsf_input_set_size :
- * @input :
- * @size :
- *
- * Returns : TRUE if the assignment was ok.
- */
-int
-gsf_input_set_size (GsfInput *input, off_t size)
-{
- g_return_val_if_fail (input != NULL, 0);
-
- input->size = size;
- return 1;
-}
-
Modified: Extractor/src/plugins/ole2/gsf-utils.c
===================================================================
--- Extractor/src/plugins/ole2/gsf-utils.c 2005-09-03 21:12:54 UTC (rev
2047)
+++ Extractor/src/plugins/ole2/gsf-utils.c 2005-09-04 07:47:58 UTC (rev
2048)
@@ -22,233 +22,3 @@
#include "platform.h"
#include "gsf-utils.h"
#include "gsf-input.h"
-
-#include <ctype.h>
-#include <stdio.h>
-#include <string.h>
-
-/*
- * Glib gets this wrong, really. ARM's floating point format is a weird
- * mixture.
- */
-#define G_ARMFLOAT_ENDIAN 56781234
-#if defined(__arm__) && !defined(__vfp__) && (G_BYTE_ORDER == G_LITTLE_ENDIAN)
-#define G_FLOAT_BYTE_ORDER G_ARMFLOAT_ENDIAN
-#else
-#define G_FLOAT_BYTE_ORDER G_BYTE_ORDER
-#endif
-
-guint64
-gsf_le_get_guint64 (void const *p)
-{
-#if G_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (guint64) == 8) {
- guint64 li;
- int i;
- guint8 *t = (guint8 *)&li;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (li);
-
- for (i = 0; i < sd; i++)
- t[i] = p2[sd - 1 - i];
-
- return li;
- } else {
- g_error ("Big endian machine, but weird size of guint64");
- }
-#elif G_BYTE_ORDER == G_LITTLE_ENDIAN
- if (sizeof (guint64) == 8) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- guint64 data;
- memcpy (&data, p, sizeof (data));
- return data;
- } else {
- g_error ("Little endian machine, but weird size of guint64");
- }
-#else
-#error "Byte order not recognised -- out of luck"
-#endif
-}
-
-float
-gsf_le_get_float (void const *p)
-{
-#if G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (float) == 4) {
- float f;
- int i;
- guint8 *t = (guint8 *)&f;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (f);
-
- for (i = 0; i < sd; i++)
- t[i] = p2[sd - 1 - i];
-
- return f;
- } else {
- g_error ("Big endian machine, but weird size of floats");
- }
-#elif (G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN) || (G_FLOAT_BYTE_ORDER ==
G_ARMFLOAT_ENDIAN)
- if (sizeof (float) == 4) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- float data;
- memcpy (&data, p, sizeof (data));
- return data;
- } else {
- g_error ("Little endian machine, but weird size of floats");
- }
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-void
-gsf_le_set_float (void *p, float d)
-{
-#if G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (float) == 4) {
- int i;
- guint8 *t = (guint8 *)&d;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (d);
-
- for (i = 0; i < sd; i++)
- p2[sd - 1 - i] = t[i];
- } else {
- g_error ("Big endian machine, but weird size of floats");
- }
-#elif (G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN) || (G_FLOAT_BYTE_ORDER ==
G_ARMFLOAT_ENDIAN)
- if (sizeof (float) == 4) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- memcpy (p, &d, sizeof (d));
- } else {
- g_error ("Little endian machine, but weird size of floats");
- }
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-double
-gsf_le_get_double (void const *p)
-{
-#if G_FLOAT_BYTE_ORDER == G_ARMFLOAT_ENDIAN
- double data;
- memcpy ((char *)&data + 4, p, 4);
- memcpy ((char *)&data, (const char *)p + 4, 4);
- return data;
-#elif G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (double) == 8) {
- double d;
- int i;
- guint8 *t = (guint8 *)&d;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (d);
-
- for (i = 0; i < sd; i++)
- t[i] = p2[sd - 1 - i];
-
- return d;
- } else {
- g_error ("Big endian machine, but weird size of doubles");
- }
-#elif G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN
- if (sizeof (double) == 8) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- double data;
- memcpy (&data, p, sizeof (data));
- return data;
- } else {
- g_error ("Little endian machine, but weird size of doubles");
- }
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-void
-gsf_le_set_double (void *p, double d)
-{
-#if G_FLOAT_BYTE_ORDER == G_ARMFLOAT_ENDIAN
- memcpy (p, (const char *)&d + 4, 4);
- memcpy ((char *)p + 4, &d, 4);
-#elif G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
- if (sizeof (double) == 8) {
- int i;
- guint8 *t = (guint8 *)&d;
- guint8 *p2 = (guint8 *)p;
- int sd = sizeof (d);
-
- for (i = 0; i < sd; i++)
- p2[sd - 1 - i] = t[i];
- } else {
- g_error ("Big endian machine, but weird size of doubles");
- }
-#elif G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN
- if (sizeof (double) == 8) {
- /*
- * On i86, we could access directly, but Alphas require
- * aligned access.
- */
- memcpy (p, &d, sizeof (d));
- } else {
- g_error ("Little endian machine, but weird size of doubles");
- }
-#else
-#error "Floating-point byte order not recognised -- out of luck"
-#endif
-}
-
-/**
- * gsf_extension_pointer:
- * @path: A filename or file path.
- *
- * Extracts the extension from the end of a filename (the part after the final
- * '.' in the filename).
- *
- * Returns: A pointer to the extension part of the filename, or a
- * pointer to the end of the string if the filename does not
- * have an extension.
- */
-char const *
-gsf_extension_pointer (char const *path)
-{
- char *s, *t;
-
- g_return_val_if_fail (path != NULL, NULL);
-
- t = strrchr (path, G_DIR_SEPARATOR);
- s = strrchr ((t != NULL) ? t : path, '.');
- if (s != NULL)
- return s + 1;
- return path + strlen(path);
-}
-
-/**
- * gsf_iconv_close : A utility wrapper to safely close an iconv handle
- * @handle :
- **/
-void
-gsf_iconv_close (GIConv handle)
-{
- if (handle != NULL && handle != ((GIConv)-1))
- g_iconv_close (handle);
-}
-
-/* FIXME: what about translations? */
-#ifndef _
-#define _(x) x
-#endif
-
Modified: Extractor/src/plugins/ole2/gsf-utils.h
===================================================================
--- Extractor/src/plugins/ole2/gsf-utils.h 2005-09-03 21:12:54 UTC (rev
2047)
+++ Extractor/src/plugins/ole2/gsf-utils.h 2005-09-04 07:47:58 UTC (rev
2048)
@@ -23,42 +23,6 @@
#define GSF_UTILS_H
#include <glib-object.h>
-
-/* Do this the ugly way so that we don't have to worry about alignment */
-#define GSF_LE_GET_GUINT8(p) (*(guint8 const *)(p))
-#define GSF_LE_GET_GUINT16(p) \
- (guint16)((((guint8 const *)(p))[0] << 0) | \
- (((guint8 const *)(p))[1] << 8))
-#define GSF_LE_GET_GUINT32(p) \
- (guint32)((((guint8 const *)(p))[0] << 0) | \
- (((guint8 const *)(p))[1] << 8) | \
- (((guint8 const *)(p))[2] << 16) | \
- (((guint8 const *)(p))[3] << 24))
-#define GSF_LE_GET_GUINT64(p) (gsf_le_get_guint64 (p))
-#define GSF_LE_GET_GINT8(p) ((gint8)GSF_LE_GET_GUINT8(p))
-#define GSF_LE_GET_GINT16(p) ((gint16)GSF_LE_GET_GUINT16(p))
-#define GSF_LE_GET_GINT32(p) ((gint32)GSF_LE_GET_GUINT32(p))
-#define GSF_LE_GET_FLOAT(p) (gsf_le_get_float (p))
-#define GSF_LE_GET_DOUBLE(p) (gsf_le_get_double (p))
-guint64 gsf_le_get_guint64 (void const *p);
-float gsf_le_get_float (void const *p);
-double gsf_le_get_double (void const *p);
-
-#define GSF_LE_SET_GUINT8(p, dat) \
- (*((guint8 *)(p)) = ((dat) & 0xff))
-#define GSF_LE_SET_GUINT16(p, dat) \
- ((*((guint8 *)(p) + 0) = ((dat) & 0xff)),\
- (*((guint8 *)(p) + 1) = ((dat) >> 8) & 0xff))
-#define GSF_LE_SET_GUINT32(p, dat) \
- ((*((guint8 *)(p) + 0) = ((dat)) & 0xff), \
- (*((guint8 *)(p) + 1) = ((dat) >> 8) & 0xff), \
- (*((guint8 *)(p) + 2) = ((dat) >> 16) & 0xff), \
- (*((guint8 *)(p) + 3) = ((dat) >> 24) & 0xff))
-#define GSF_LE_SET_GINT8(p,dat) GSF_LE_SET_GUINT8((p),(dat))
-#define GSF_LE_SET_GINT16(p,dat) GSF_LE_SET_GUINT16((p),(dat))
-#define GSF_LE_SET_GINT32(p,dat) GSF_LE_SET_GUINT32((p),(dat))
-#define GSF_LE_SET_FLOAT(p,dat) gsf_le_set_float((p),(dat))
-#define GSF_LE_SET_DOUBLE(p,dat) gsf_le_set_double((p),(dat))
void gsf_le_set_float (void *p, float f);
void gsf_le_set_double (void *p, double d);
Modified: Extractor/src/plugins/ole2/ole2extractor.c
===================================================================
--- Extractor/src/plugins/ole2/ole2extractor.c 2005-09-03 21:12:54 UTC (rev
2047)
+++ Extractor/src/plugins/ole2/ole2extractor.c 2005-09-04 07:47:58 UTC (rev
2048)
@@ -21,9 +21,9 @@
#include "platform.h"
#include "extractor.h"
#include <glib-object.h>
-#include "gsf-infile-msole.h"
-#include "gsf-input.h"
-#include "gsf-utils.h"
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
#define DEBUG_OLE2 0
@@ -35,7 +35,1352 @@
static void warning(const char * format, ...) {}
#endif
+/* *********************** formerly gsf-input.c ************* */
+typedef struct GsfInput {
+ off_t size;
+ off_t cur_offset;
+ char * name;
+ const unsigned char * buf;
+ int needs_free;
+} GsfInput;
+
+
+static void
+gsf_input_init (GsfInput * input)
+{
+ input->size = 0;
+ input->cur_offset = 0;
+ input->name = NULL;
+ input->buf = NULL;
+}
+
+/**
+ * gsf_input_memory_new:
+ * @buf: The input bytes
+ * @length: The length of @buf
+ * @needs_free: Whether you want this memory to be free'd at object destruction
+ *
+ * Returns: A new #GsfInputMemory
+ */
+static GsfInput *
+gsf_input_new (const unsigned char * buf,
+ off_t length,
+ int needs_free)
+{
+ GsfInput *mem = malloc(sizeof(GsfInput));
+ if (mem == NULL)
+ return NULL;
+ gsf_input_init(mem);
+ mem->buf = buf;
+ mem->size = length;
+ mem->needs_free = needs_free;
+ return mem;
+}
+
+static void
+gsf_input_finalize (GsfInput * input)
+{
+ if (input->name != NULL) {
+ free (input->name);
+ input->name = NULL;
+ }
+ if ( (input->buf) && input->needs_free)
+ free((void*) input->buf);
+ free(input);
+}
+
+/**
+ * gsf_input_set_name :
+ * @input :
+ * @name :
+ *
+ * protected.
+ *
+ * Returns : TRUE if the assignment was ok.
+ **/
+static int
+gsf_input_set_name (GsfInput *input, char const *name)
+{
+ char *buf;
+
+ g_return_val_if_fail (input != NULL, 0);
+
+ buf = strdup (name);
+ if (input->name != NULL)
+ free (input->name);
+ input->name = buf;
+ return 1;
+}
+
+
+
+static GsfInput *
+gsf_input_dup (GsfInput *src)
+{
+ GsfInput * dst = malloc(sizeof(GsfInput));
+ if (dst == NULL)
+ return NULL;
+ gsf_input_init(dst);
+ dst->buf = src->buf;
+ dst->needs_free = 0;
+ dst->size = src->size;
+ if (src->name != NULL)
+ gsf_input_set_name (dst, src->name);
+ dst->cur_offset = src->cur_offset;
+ return dst;
+}
+
+static const unsigned char *
+gsf_input_read (GsfInput * mem, size_t num_bytes, unsigned char *
optional_buffer)
+{
+ const unsigned char *src = mem->buf;
+ if (src == NULL)
+ return NULL;
+ if (optional_buffer) {
+ memcpy (optional_buffer, src + mem->cur_offset, num_bytes);
+ mem->cur_offset += num_bytes;
+
+ return optional_buffer;
+ } else {
+ const unsigned char * ret = src + mem->cur_offset;
+ mem->cur_offset += num_bytes;
+ return ret;
+ }
+}
+
+/**
+ * gsf_input_name :
+ * @input :
+ *
+ * Returns @input's name in utf8 form, DO NOT FREE THIS STRING
+ **/
+static const char *
+gsf_input_name (GsfInput *input)
+{
+ return input->name;
+}
+
+/**
+ * gsf_input_size :
+ * @input : The input
+ *
+ * Looks up and caches the number of bytes in the input
+ *
+ * Returns : the size or -1 on error
+ **/
+static off_t
+gsf_input_size (GsfInput *input)
+{
+ g_return_val_if_fail (input != NULL, -1);
+ return input->size;
+}
+
+/**
+ * gsf_input_eof :
+ * @input : the input
+ *
+ * Are we at the end of the file ?
+ *
+ * Returns : TRUE if the input is at the eof.
+ **/
+static int
+gsf_input_eof (GsfInput *input)
+{
+ g_return_val_if_fail (input != NULL, 0);
+
+ return input->cur_offset >= input->size;
+}
+
+/**
+ * gsf_input_remaining :
+ * @input :
+ *
+ * Returns the number of bytes left in the file.
+ **/
+static off_t
+gsf_input_remaining (GsfInput *input)
+{
+ g_return_val_if_fail (input != NULL, 0);
+
+ return input->size - input->cur_offset;
+}
+
+/**
+ * gsf_input_tell :
+ * @input :
+ *
+ * Returns the current offset in the file.
+ **/
+static off_t
+gsf_input_tell (GsfInput *input)
+{
+ g_return_val_if_fail (input != NULL, 0);
+
+ return input->cur_offset;
+}
+
+/**
+ * gsf_input_seek :
+ * @input :
+ * @offset :
+ * @whence :
+ *
+ * Returns TRUE on error.
+ **/
+static int
+gsf_input_seek (GsfInput *input, off_t offset, int whence)
+{
+ off_t pos = offset;
+
+ g_return_val_if_fail (input != NULL, 1);
+
+ switch (whence) {
+ case SEEK_SET : break;
+ case SEEK_CUR : pos += input->cur_offset; break;
+ case SEEK_END : pos += input->size; break;
+ default : return 1;
+ }
+
+ if (pos < 0 || pos > input->size)
+ return 1;
+
+ /*
+ * If we go nowhere, just return. This in particular handles null
+ * seeks for streams with no seek method.
+ */
+ if (pos == input->cur_offset)
+ return 0;
+
+ input->cur_offset = pos;
+ return 0;
+}
+
+/**
+ * gsf_input_set_size :
+ * @input :
+ * @size :
+ *
+ * Returns : TRUE if the assignment was ok.
+ */
+static int
+gsf_input_set_size (GsfInput *input, off_t size)
+{
+ g_return_val_if_fail (input != NULL, 0);
+
+ input->size = size;
+ return 1;
+}
+
+
+
+
+/* ******************** formerly gsf-utils.c **************** */
+
+
+/* Do this the ugly way so that we don't have to worry about alignment */
+#define GSF_LE_GET_GUINT8(p) (*(guint8 const *)(p))
+#define GSF_LE_GET_GUINT16(p) \
+ (guint16)((((guint8 const *)(p))[0] << 0) | \
+ (((guint8 const *)(p))[1] << 8))
+#define GSF_LE_GET_GUINT32(p) \
+ (guint32)((((guint8 const *)(p))[0] << 0) | \
+ (((guint8 const *)(p))[1] << 8) | \
+ (((guint8 const *)(p))[2] << 16) | \
+ (((guint8 const *)(p))[3] << 24))
+
+#define GSF_LE_GET_GUINT64(p) (gsf_le_get_guint64 (p))
+#define GSF_LE_GET_GINT64(p) ((gint64)GSF_LE_GET_GUINT64(p))
+#define GSF_LE_GET_GINT8(p) ((gint8)GSF_LE_GET_GUINT8(p))
+#define GSF_LE_GET_GINT16(p) ((gint16)GSF_LE_GET_GUINT16(p))
+#define GSF_LE_GET_GINT32(p) ((gint32)GSF_LE_GET_GUINT32(p))
+#define GSF_LE_GET_FLOAT(p) (gsf_le_get_float (p))
+#define GSF_LE_GET_DOUBLE(p) (gsf_le_get_double (p))
+#define GSF_LE_SET_GUINT8(p, dat) \
+ (*((guint8 *)(p)) = ((dat) & 0xff))
+#define GSF_LE_SET_GUINT16(p, dat) \
+ ((*((guint8 *)(p) + 0) = ((dat) & 0xff)),\
+ (*((guint8 *)(p) + 1) = ((dat) >> 8) & 0xff))
+#define GSF_LE_SET_GUINT32(p, dat) \
+ ((*((guint8 *)(p) + 0) = ((dat)) & 0xff), \
+ (*((guint8 *)(p) + 1) = ((dat) >> 8) & 0xff), \
+ (*((guint8 *)(p) + 2) = ((dat) >> 16) & 0xff), \
+ (*((guint8 *)(p) + 3) = ((dat) >> 24) & 0xff))
+#define GSF_LE_SET_GINT8(p,dat) GSF_LE_SET_GUINT8((p),(dat))
+#define GSF_LE_SET_GINT16(p,dat) GSF_LE_SET_GUINT16((p),(dat))
+#define GSF_LE_SET_GINT32(p,dat) GSF_LE_SET_GUINT32((p),(dat))
+#define GSF_LE_SET_FLOAT(p,dat) gsf_le_set_float((p),(dat))
+#define GSF_LE_SET_DOUBLE(p,dat) gsf_le_set_double((p),(dat))
+
+
+/*
+ * Glib gets this wrong, really. ARM's floating point format is a weird
+ * mixture.
+ */
+#define G_ARMFLOAT_ENDIAN 56781234
+#if defined(__arm__) && !defined(__vfp__) && (G_BYTE_ORDER == G_LITTLE_ENDIAN)
+#define G_FLOAT_BYTE_ORDER G_ARMFLOAT_ENDIAN
+#else
+#define G_FLOAT_BYTE_ORDER G_BYTE_ORDER
+#endif
+
+static guint64
+gsf_le_get_guint64 (void const *p)
+{
+#if G_BYTE_ORDER == G_BIG_ENDIAN
+ if (sizeof (guint64) == 8) {
+ guint64 li;
+ int i;
+ guint8 *t = (guint8 *)&li;
+ guint8 *p2 = (guint8 *)p;
+ int sd = sizeof (li);
+
+ for (i = 0; i < sd; i++)
+ t[i] = p2[sd - 1 - i];
+
+ return li;
+ } else {
+ g_error ("Big endian machine, but weird size of guint64");
+ }
+#elif G_BYTE_ORDER == G_LITTLE_ENDIAN
+ if (sizeof (guint64) == 8) {
+ /*
+ * On i86, we could access directly, but Alphas require
+ * aligned access.
+ */
+ guint64 data;
+ memcpy (&data, p, sizeof (data));
+ return data;
+ } else {
+ g_error ("Little endian machine, but weird size of guint64");
+ }
+#else
+#error "Byte order not recognised -- out of luck"
+#endif
+}
+
+static float
+gsf_le_get_float (void const *p)
+{
+#if G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
+ if (sizeof (float) == 4) {
+ float f;
+ int i;
+ guint8 *t = (guint8 *)&f;
+ guint8 *p2 = (guint8 *)p;
+ int sd = sizeof (f);
+
+ for (i = 0; i < sd; i++)
+ t[i] = p2[sd - 1 - i];
+
+ return f;
+ } else {
+ g_error ("Big endian machine, but weird size of floats");
+ }
+#elif (G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN) || (G_FLOAT_BYTE_ORDER ==
G_ARMFLOAT_ENDIAN)
+ if (sizeof (float) == 4) {
+ /*
+ * On i86, we could access directly, but Alphas require
+ * aligned access.
+ */
+ float data;
+ memcpy (&data, p, sizeof (data));
+ return data;
+ } else {
+ g_error ("Little endian machine, but weird size of floats");
+ }
+#else
+#error "Floating-point byte order not recognised -- out of luck"
+#endif
+}
+
+static void
+gsf_le_set_float (void *p, float d)
+{
+#if G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
+ if (sizeof (float) == 4) {
+ int i;
+ guint8 *t = (guint8 *)&d;
+ guint8 *p2 = (guint8 *)p;
+ int sd = sizeof (d);
+
+ for (i = 0; i < sd; i++)
+ p2[sd - 1 - i] = t[i];
+ } else {
+ g_error ("Big endian machine, but weird size of floats");
+ }
+#elif (G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN) || (G_FLOAT_BYTE_ORDER ==
G_ARMFLOAT_ENDIAN)
+ if (sizeof (float) == 4) {
+ /*
+ * On i86, we could access directly, but Alphas require
+ * aligned access.
+ */
+ memcpy (p, &d, sizeof (d));
+ } else {
+ g_error ("Little endian machine, but weird size of floats");
+ }
+#else
+#error "Floating-point byte order not recognised -- out of luck"
+#endif
+}
+
+static double
+gsf_le_get_double (void const *p)
+{
+#if G_FLOAT_BYTE_ORDER == G_ARMFLOAT_ENDIAN
+ double data;
+ memcpy ((char *)&data + 4, p, 4);
+ memcpy ((char *)&data, (const char *)p + 4, 4);
+ return data;
+#elif G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
+ if (sizeof (double) == 8) {
+ double d;
+ int i;
+ guint8 *t = (guint8 *)&d;
+ guint8 *p2 = (guint8 *)p;
+ int sd = sizeof (d);
+
+ for (i = 0; i < sd; i++)
+ t[i] = p2[sd - 1 - i];
+
+ return d;
+ } else {
+ g_error ("Big endian machine, but weird size of doubles");
+ }
+#elif G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN
+ if (sizeof (double) == 8) {
+ /*
+ * On i86, we could access directly, but Alphas require
+ * aligned access.
+ */
+ double data;
+ memcpy (&data, p, sizeof (data));
+ return data;
+ } else {
+ g_error ("Little endian machine, but weird size of doubles");
+ }
+#else
+#error "Floating-point byte order not recognised -- out of luck"
+#endif
+}
+
+static void
+gsf_le_set_double (void *p, double d)
+{
+#if G_FLOAT_BYTE_ORDER == G_ARMFLOAT_ENDIAN
+ memcpy (p, (const char *)&d + 4, 4);
+ memcpy ((char *)p + 4, &d, 4);
+#elif G_FLOAT_BYTE_ORDER == G_BIG_ENDIAN
+ if (sizeof (double) == 8) {
+ int i;
+ guint8 *t = (guint8 *)&d;
+ guint8 *p2 = (guint8 *)p;
+ int sd = sizeof (d);
+
+ for (i = 0; i < sd; i++)
+ p2[sd - 1 - i] = t[i];
+ } else {
+ g_error ("Big endian machine, but weird size of doubles");
+ }
+#elif G_FLOAT_BYTE_ORDER == G_LITTLE_ENDIAN
+ if (sizeof (double) == 8) {
+ /*
+ * On i86, we could access directly, but Alphas require
+ * aligned access.
+ */
+ memcpy (p, &d, sizeof (d));
+ } else {
+ g_error ("Little endian machine, but weird size of doubles");
+ }
+#else
+#error "Floating-point byte order not recognised -- out of luck"
+#endif
+}
+
+/**
+ * gsf_extension_pointer:
+ * @path: A filename or file path.
+ *
+ * Extracts the extension from the end of a filename (the part after the final
+ * '.' in the filename).
+ *
+ * Returns: A pointer to the extension part of the filename, or a
+ * pointer to the end of the string if the filename does not
+ * have an extension.
+ */
+static char const *
+gsf_extension_pointer (char const *path)
+{
+ char *s, *t;
+
+ g_return_val_if_fail (path != NULL, NULL);
+
+ t = strrchr (path, G_DIR_SEPARATOR);
+ s = strrchr ((t != NULL) ? t : path, '.');
+ if (s != NULL)
+ return s + 1;
+ return path + strlen(path);
+}
+
+/**
+ * gsf_iconv_close : A utility wrapper to safely close an iconv handle
+ * @handle :
+ **/
+static void
+gsf_iconv_close (GIConv handle)
+{
+ if (handle != NULL && handle != ((GIConv)-1))
+ g_iconv_close (handle);
+}
+
+
+/* ***************************** formerly gsf-infile-msole.c
********************* */
+
+#define OLE_HEADER_SIZE 0x200 /* independent of big block
size size */
+#define OLE_HEADER_SIGNATURE 0x00
+#define OLE_HEADER_CLSID 0x08 /* See ReadClassStg */
+#define OLE_HEADER_MINOR_VER 0x18 /* 0x33 and 0x3e have been seen */
+#define OLE_HEADER_MAJOR_VER 0x1a /* 0x3 been seen in wild */
+#define OLE_HEADER_BYTE_ORDER 0x1c /* 0xfe 0xff == Intel Little Endian */
+#define OLE_HEADER_BB_SHIFT 0x1e
+#define OLE_HEADER_SB_SHIFT 0x20
+/* 0x22..0x27 reserved == 0 */
+#define OLE_HEADER_CSECTDIR 0x28
+#define OLE_HEADER_NUM_BAT 0x2c
+#define OLE_HEADER_DIRENT_START 0x30
+/* 0x34..0x37 transacting signature must be 0 */
+#define OLE_HEADER_THRESHOLD 0x38
+#define OLE_HEADER_SBAT_START 0x3c
+#define OLE_HEADER_NUM_SBAT 0x40
+#define OLE_HEADER_METABAT_BLOCK 0x44
+#define OLE_HEADER_NUM_METABAT 0x48
+#define OLE_HEADER_START_BAT 0x4c
+#define BAT_INDEX_SIZE 4
+#define OLE_HEADER_METABAT_SIZE ((OLE_HEADER_SIZE -
OLE_HEADER_START_BAT) / BAT_INDEX_SIZE)
+
+#define DIRENT_MAX_NAME_SIZE 0x40
+#define DIRENT_DETAILS_SIZE 0x40
+#define DIRENT_SIZE (DIRENT_MAX_NAME_SIZE + DIRENT_DETAILS_SIZE)
+#define DIRENT_NAME_LEN 0x40 /* length in bytes incl 0
terminator */
+#define DIRENT_TYPE 0x42
+#define DIRENT_COLOUR 0x43
+#define DIRENT_PREV 0x44
+#define DIRENT_NEXT 0x48
+#define DIRENT_CHILD 0x4c
+#define DIRENT_CLSID 0x50 /* only for dirs */
+#define DIRENT_USERFLAGS 0x60 /* only for dirs */
+#define DIRENT_CREATE_TIME 0x64 /* for files */
+#define DIRENT_MODIFY_TIME 0x6c /* for files */
+#define DIRENT_FIRSTBLOCK 0x74
+#define DIRENT_FILE_SIZE 0x78
+/* 0x7c..0x7f reserved == 0 */
+
+#define DIRENT_TYPE_INVALID 0
+#define DIRENT_TYPE_DIR 1
+#define DIRENT_TYPE_FILE 2
+#define DIRENT_TYPE_LOCKBYTES 3 /* ? */
+#define DIRENT_TYPE_PROPERTY 4 /* ? */
+#define DIRENT_TYPE_ROOTDIR 5
+#define DIRENT_MAGIC_END 0xffffffff
+
+/* flags in the block allocation list to denote special blocks */
+#define BAT_MAGIC_UNUSED 0xffffffff /* -1 */
+#define BAT_MAGIC_END_OF_CHAIN 0xfffffffe /* -2 */
+#define BAT_MAGIC_BAT 0xfffffffd /* a bat block, -3 */
+#define BAT_MAGIC_METABAT 0xfffffffc /* a metabat block -4 */
+
+
+
+
+typedef struct {
+ guint32 *block;
+ guint32 num_blocks;
+} MSOleBAT;
+
+typedef struct {
+ char *name;
+ char *collation_name;
+ int index;
+ size_t size;
+ gboolean use_sb;
+ guint32 first_block;
+ gboolean is_directory;
+ GList *children;
+ unsigned char clsid[16]; /* 16 byte GUID used by some apps */
+} MSOleDirent;
+
+typedef struct {
+ struct {
+ MSOleBAT bat;
+ unsigned shift;
+ unsigned filter;
+ size_t size;
+ } bb, sb;
+ off_t max_block;
+ guint32 threshold; /* transition between small and big blocks */
+ guint32 sbat_start, num_sbat;
+
+ MSOleDirent *root_dir;
+ struct GsfInput *sb_file;
+
+ int ref_count;
+} MSOleInfo;
+
+typedef struct GsfInfileMSOle {
+ off_t size;
+ off_t cur_offset;
+ struct GsfInput *input;
+ MSOleInfo *info;
+ MSOleDirent *dirent;
+ MSOleBAT bat;
+ off_t cur_block;
+
+ struct {
+ guint8 *buf;
+ size_t buf_size;
+ } stream;
+} GsfInfileMSOle;
+
+/* utility macros */
+#define OLE_BIG_BLOCK(index, ole) ((index) >> ole->info->bb.shift)
+
+static struct GsfInput *gsf_infile_msole_new_child (GsfInfileMSOle *parent,
+ MSOleDirent *dirent);
+
+/**
+ * ole_get_block :
+ * @ole : the infile
+ * @block :
+ * @buffer : optionally NULL
+ *
+ * Read a block of data from the underlying input.
+ * Be really anal.
+ **/
+static const guint8 *
+ole_get_block (const GsfInfileMSOle *ole, guint32 block, guint8 *buffer)
+{
+ g_return_val_if_fail (block < ole->info->max_block, NULL);
+
+ /* OLE_HEADER_SIZE is fixed at 512, but the sector containing the
+ * header is padded out to bb.size (sector size) when bb.size > 512. */
+ if (gsf_input_seek (ole->input,
+ (off_t)(MAX (OLE_HEADER_SIZE, ole->info->bb.size) + (block <<
ole->info->bb.shift)),
+ SEEK_SET) < 0)
+ return NULL;
+
+ return gsf_input_read (ole->input, ole->info->bb.size, buffer);
+}
+
+/**
+ * ole_make_bat :
+ * @metabat : a meta bat to connect to the raw blocks (small or large)
+ * @size_guess : An optional guess as to how many blocks are in the file
+ * @block : The first block in the list.
+ * @res : where to store the result.
+ *
+ * Walk the linked list of the supplied block allocation table and build up a
+ * table for the list starting in @block.
+ *
+ * Returns TRUE on error.
+ */
+static gboolean
+ole_make_bat (MSOleBAT const *metabat, size_t size_guess, guint32 block,
+ MSOleBAT *res)
+{
+ /* NOTE : Only use size as a suggestion, sometimes it is wrong */
+ GArray *bat = g_array_sized_new (FALSE, FALSE,
+ sizeof (guint32), size_guess);
+
+ guint8 *used = (guint8*)g_alloca (1 + metabat->num_blocks / 8);
+ memset (used, 0, 1 + metabat->num_blocks / 8);
+
+ if (block < metabat->num_blocks)
+ do {
+ /* Catch cycles in the bat list */
+ g_return_val_if_fail (0 == (used[block/8] & (1 <<
(block & 0x7))), TRUE);
+ used[block/8] |= 1 << (block & 0x7);
+
+ g_array_append_val (bat, block);
+ block = metabat->block [block];
+ } while (block < metabat->num_blocks);
+
+ res->block = NULL;
+
+ res->num_blocks = bat->len;
+ res->block = (guint32 *) (gpointer) g_array_free (bat, FALSE);
+
+ if (block != BAT_MAGIC_END_OF_CHAIN) {
+#if 0
+ g_warning ("This OLE2 file is invalid.\n"
+ "The Block Allocation Table for one of the streams
had %x instead of a terminator (%x).\n"
+ "We might still be able to extract some data, but
you'll want to check the file.",
+ block, BAT_MAGIC_END_OF_CHAIN);
+#endif
+ }
+
+ return FALSE;
+}
+
+static void
+ols_bat_release (MSOleBAT *bat)
+{
+ if (bat->block != NULL) {
+ g_free (bat->block);
+ bat->block = NULL;
+ bat->num_blocks = 0;
+ }
+}
+
+/**
+ * ole_info_read_metabat :
+ * @ole :
+ * @bats :
+ *
+ * A small utility routine to read a set of references to bat blocks
+ * either from the OLE header, or a meta-bat block.
+ *
+ * Returns a pointer to the element after the last position filled.
+ **/
+static guint32 *
+ole_info_read_metabat (GsfInfileMSOle *ole, guint32 *bats, guint32 max,
+ guint32 const *metabat, guint32 const *metabat_end)
+{
+ guint8 const *bat, *end;
+
+ for (; metabat < metabat_end; metabat++) {
+ bat = ole_get_block (ole, *metabat, NULL);
+ if (bat == NULL)
+ return NULL;
+ end = bat + ole->info->bb.size;
+ for ( ; bat < end ; bat += BAT_INDEX_SIZE, bats++) {
+ *bats = GSF_LE_GET_GUINT32 (bat);
+ g_return_val_if_fail (*bats < max ||
+ *bats >= BAT_MAGIC_METABAT, NULL);
+ }
+ }
+ return bats;
+}
+
+/**
+ * gsf_ole_get_guint32s :
+ * @dst :
+ * @src :
+ * @num_bytes :
+ *
+ * Copy some some raw data into an array of guint32.
+ **/
+static void
+gsf_ole_get_guint32s (guint32 *dst, guint8 const *src, int num_bytes)
+{
+ for (; (num_bytes -= BAT_INDEX_SIZE) >= 0 ; src += BAT_INDEX_SIZE)
+ *dst++ = GSF_LE_GET_GUINT32 (src);
+}
+
+static struct GsfInput *
+ole_info_get_sb_file (GsfInfileMSOle *parent)
+{
+ MSOleBAT meta_sbat;
+
+ if (parent->info->sb_file != NULL)
+ return parent->info->sb_file;
+
+ parent->info->sb_file = gsf_infile_msole_new_child (parent,
+ parent->info->root_dir);
+
+ if (NULL == parent->info->sb_file)
+ return NULL;
+
+ g_return_val_if_fail (parent->info->sb.bat.block == NULL, NULL);
+
+ if (ole_make_bat (&parent->info->bb.bat,
+ parent->info->num_sbat,
+ parent->info->sbat_start,
+ &meta_sbat)) {
+ return NULL;
+ }
+
+ parent->info->sb.bat.num_blocks = meta_sbat.num_blocks *
(parent->info->bb.size / BAT_INDEX_SIZE);
+ parent->info->sb.bat.block = g_new0 (guint32,
parent->info->sb.bat.num_blocks);
+ ole_info_read_metabat (parent, parent->info->sb.bat.block,
+ parent->info->sb.bat.num_blocks,
+ meta_sbat.block, meta_sbat.block + meta_sbat.num_blocks);
+ ols_bat_release (&meta_sbat);
+
+ return parent->info->sb_file;
+}
+
+static gint
+ole_dirent_cmp (const MSOleDirent *a, const MSOleDirent *b)
+{
+ g_return_val_if_fail (a, 0);
+ g_return_val_if_fail (b, 0);
+
+ g_return_val_if_fail (a->collation_name, 0);
+ g_return_val_if_fail (b->collation_name, 0);
+
+ return strcmp (b->collation_name, a->collation_name);
+}
+
+/**
+ * ole_dirent_new :
+ * @ole :
+ * @entry :
+ * @parent : optional
+ *
+ * Parse dirent number @entry and recursively handle its siblings and children.
+ **/
+static MSOleDirent *
+ole_dirent_new (GsfInfileMSOle *ole, guint32 entry, MSOleDirent *parent)
+{
+ MSOleDirent *dirent;
+ guint32 block, next, prev, child, size;
+ guint8 const *data;
+ guint8 type;
+ guint16 name_len;
+
+ if (entry >= DIRENT_MAGIC_END)
+ return NULL;
+
+ block = OLE_BIG_BLOCK (entry * DIRENT_SIZE, ole);
+
+ g_return_val_if_fail (block < ole->bat.num_blocks, NULL);
+ data = ole_get_block (ole, ole->bat.block [block], NULL);
+ if (data == NULL)
+ return NULL;
+ data += (DIRENT_SIZE * entry) % ole->info->bb.size;
+
+ type = GSF_LE_GET_GUINT8 (data + DIRENT_TYPE);
+ if (type != DIRENT_TYPE_DIR &&
+ type != DIRENT_TYPE_FILE &&
+ type != DIRENT_TYPE_ROOTDIR) {
+#if 0
+ g_warning ("Unknown stream type 0x%x", type);
+#endif
+ return NULL;
+ }
+
+ /* It looks like directory (and root directory) sizes are sometimes
bogus */
+ size = GSF_LE_GET_GUINT32 (data + DIRENT_FILE_SIZE);
+ g_return_val_if_fail (type == DIRENT_TYPE_DIR || type ==
DIRENT_TYPE_ROOTDIR ||
+ size <= (guint32)gsf_input_size(ole->input),
NULL);
+
+ dirent = g_new0 (MSOleDirent, 1);
+ dirent->index = entry;
+ dirent->size = size;
+ /* Store the class id which is 16 byte identifier used by some apps */
+ memcpy(dirent->clsid, data + DIRENT_CLSID, sizeof(dirent->clsid));
+
+ /* root dir is always big block */
+ dirent->use_sb = parent && (size < ole->info->threshold);
+ dirent->first_block = (GSF_LE_GET_GUINT32 (data + DIRENT_FIRSTBLOCK));
+ dirent->is_directory = (type != DIRENT_TYPE_FILE);
+ dirent->children = NULL;
+ prev = GSF_LE_GET_GUINT32 (data + DIRENT_PREV);
+ next = GSF_LE_GET_GUINT32 (data + DIRENT_NEXT);
+ child = GSF_LE_GET_GUINT32 (data + DIRENT_CHILD);
+ name_len = GSF_LE_GET_GUINT16 (data + DIRENT_NAME_LEN);
+ dirent->name = NULL;
+ if (0 < name_len && name_len <= DIRENT_MAX_NAME_SIZE) {
+ gunichar2 uni_name [DIRENT_MAX_NAME_SIZE+1];
+ gchar const *end;
+ int i;
+
+ /* address@hidden
+ * Sometimes, rarely, people store the stream name as ascii
+ * rather than utf16. Do a validation first just in case.
+ */
+ if (!g_utf8_validate (data, -1, &end) ||
+ ((guint8 const *)end - data + 1) != name_len) {
+ /* be wary about endianness */
+ for (i = 0 ; i < name_len ; i += 2)
+ uni_name [i/2] = GSF_LE_GET_GUINT16 (data + i);
+ uni_name [i/2] = 0;
+
+ dirent->name = g_utf16_to_utf8 (uni_name, -1, NULL,
NULL, NULL);
+ } else
+ dirent->name = g_strndup ((gchar *)data,
(gsize)((guint8 const *)end - data + 1));
+ }
+ /* be really anal in the face of screwups */
+ if (dirent->name == NULL)
+ dirent->name = g_strdup ("");
+ dirent->collation_name = g_utf8_collate_key (dirent->name, -1);
+
+ if (parent != NULL)
+ parent->children = g_list_insert_sorted (parent->children,
+ dirent, (GCompareFunc)ole_dirent_cmp);
+
+ /* NOTE : These links are a tree, not a linked list */
+ if (prev != entry)
+ ole_dirent_new (ole, prev, parent);
+ if (next != entry)
+ ole_dirent_new (ole, next, parent);
+
+ if (dirent->is_directory)
+ ole_dirent_new (ole, child, dirent);
+ return dirent;
+}
+
+static void
+ole_dirent_free (MSOleDirent *dirent)
+{
+ GList *tmp;
+ g_return_if_fail (dirent != NULL);
+
+ g_free (dirent->name);
+ g_free (dirent->collation_name);
+
+ for (tmp = dirent->children; tmp; tmp = tmp->next)
+ ole_dirent_free ((MSOleDirent *)tmp->data);
+ g_list_free (dirent->children);
+ g_free (dirent);
+}
+
+/*****************************************************************************/
+
+static void
+ole_info_unref (MSOleInfo *info)
+{
+ if (info->ref_count-- != 1)
+ return;
+
+ ols_bat_release (&info->bb.bat);
+ ols_bat_release (&info->sb.bat);
+ if (info->root_dir != NULL) {
+ ole_dirent_free (info->root_dir);
+ info->root_dir = NULL;
+ }
+ if (info->sb_file != NULL) {
+ gsf_input_finalize(info->sb_file);
+ info->sb_file = NULL;
+ }
+ g_free (info);
+}
+
+static MSOleInfo *
+ole_info_ref (MSOleInfo *info)
+{
+ info->ref_count++;
+ return info;
+}
+
+static void
+gsf_infile_msole_init (GsfInfileMSOle * ole)
+{
+ ole->cur_offset = 0;
+ ole->size = 0;
+ ole->input = NULL;
+ ole->info = NULL;
+ ole->bat.block = NULL;
+ ole->bat.num_blocks = 0;
+ ole->cur_block = BAT_MAGIC_UNUSED;
+ ole->stream.buf = NULL;
+ ole->stream.buf_size = 0;
+}
+
+static void
+gsf_infile_msole_finalize (GsfInfileMSOle * ole)
+{
+ if (ole->input != NULL) {
+ gsf_input_finalize(ole->input);
+ ole->input = NULL;
+ }
+ if (ole->info != NULL) {
+ ole_info_unref (ole->info);
+ ole->info = NULL;
+ }
+ ols_bat_release (&ole->bat);
+
+ g_free (ole->stream.buf);
+ free(ole);
+}
+
+/**
+ * ole_dup :
+ * @src :
+ *
+ * Utility routine to _partially_ replicate a file. It does NOT copy the bat
+ * blocks, or init the dirent.
+ *
+ * Return value: the partial duplicate.
+ **/
+static GsfInfileMSOle *
+ole_dup (GsfInfileMSOle const * src)
+{
+ GsfInfileMSOle *dst;
+ struct GsfInput *input;
+
+ g_return_val_if_fail (src != NULL, NULL);
+
+ dst = malloc(sizeof(GsfInfileMSOle));
+ if (dst == NULL)
+ return NULL;
+ gsf_infile_msole_init(dst);
+ input = gsf_input_dup (src->input);
+ if (input == NULL) {
+ gsf_infile_msole_finalize(dst);
+ return NULL;
+ }
+ dst->input = input;
+ dst->info = ole_info_ref (src->info);
+
+ /* buf and buf_size are initialized to NULL */
+
+ return dst;
+}
+
+/**
+ * ole_init_info :
+ * @ole :
+ *
+ * Read an OLE header and do some sanity checking
+ * along the way.
+ *
+ * Return value: TRUE on error
+ **/
+static gboolean
+ole_init_info (GsfInfileMSOle *ole)
+{
+ static guint8 const signature[] =
+ { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
+ guint8 const *header, *tmp;
+ guint32 *metabat = NULL;
+ MSOleInfo *info;
+ guint32 bb_shift, sb_shift, num_bat, num_metabat, last, dirent_start;
+ guint32 metabat_block, *ptr;
+
+ /* check the header */
+ if (gsf_input_seek (ole->input, (off_t) 0, SEEK_SET) ||
+ NULL == (header = gsf_input_read (ole->input, OLE_HEADER_SIZE,
NULL)) ||
+ 0 != memcmp (header, signature, sizeof (signature))) {
+ return TRUE;
+ }
+
+ bb_shift = GSF_LE_GET_GUINT16 (header + OLE_HEADER_BB_SHIFT);
+ sb_shift = GSF_LE_GET_GUINT16 (header + OLE_HEADER_SB_SHIFT);
+ num_bat = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_BAT);
+ dirent_start = GSF_LE_GET_GUINT32 (header + OLE_HEADER_DIRENT_START);
+ metabat_block = GSF_LE_GET_GUINT32 (header + OLE_HEADER_METABAT_BLOCK);
+ num_metabat = GSF_LE_GET_GUINT32 (header + OLE_HEADER_NUM_METABAT);
+
+ /* Some sanity checks
+ * 1) There should always be at least 1 BAT block
+ * 2) It makes no sense to have a block larger than 2^31 for now.
+ * Maybe relax this later, but not much.
+ */
+ if (6 > bb_shift || bb_shift >= 31 || sb_shift > bb_shift) {
+ return TRUE;
+ }
+
+ info = g_new0 (MSOleInfo, 1);
+ ole->info = info;
+
+ info->ref_count = 1;
+ info->bb.shift = bb_shift;
+ info->bb.size = 1 << info->bb.shift;
+ info->bb.filter = info->bb.size - 1;
+ info->sb.shift = sb_shift;
+ info->sb.size = 1 << info->sb.shift;
+ info->sb.filter = info->sb.size - 1;
+ info->threshold = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_THRESHOLD);
+ info->sbat_start = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_SBAT_START);
+ info->num_sbat = GSF_LE_GET_GUINT32 (header +
OLE_HEADER_NUM_SBAT);
+ info->max_block = (gsf_input_size (ole->input) - OLE_HEADER_SIZE)
/ info->bb.size;
+ info->sb_file = NULL;
+
+ if (info->num_sbat == 0 && info->sbat_start != BAT_MAGIC_END_OF_CHAIN) {
+#if 0
+ g_warning ("There is are not supposed to be any blocks in the
small block allocation table, yet there is a link to some. Ignoring it.");
+#endif
+ }
+
+ /* very rough heuristic, just in case */
+ if (num_bat < info->max_block) {
+ info->bb.bat.num_blocks = num_bat * (info->bb.size /
BAT_INDEX_SIZE);
+ info->bb.bat.block = g_new0 (guint32,
info->bb.bat.num_blocks);
+
+ metabat = (guint32 *)g_alloca (MAX (info->bb.size,
OLE_HEADER_SIZE));
+
+ /* Reading the elements invalidates this memory, make copy */
+ gsf_ole_get_guint32s (metabat, header + OLE_HEADER_START_BAT,
+ OLE_HEADER_SIZE - OLE_HEADER_START_BAT);
+ last = num_bat;
+ if (last > OLE_HEADER_METABAT_SIZE)
+ last = OLE_HEADER_METABAT_SIZE;
+
+ ptr = ole_info_read_metabat (ole, info->bb.bat.block,
+ info->bb.bat.num_blocks, metabat, metabat + last);
+ num_bat -= last;
+ } else
+ ptr = NULL;
+
+ last = (info->bb.size - BAT_INDEX_SIZE) / BAT_INDEX_SIZE;
+ while (ptr != NULL && num_metabat-- > 0) {
+ tmp = ole_get_block (ole, metabat_block, NULL);
+ if (tmp == NULL) {
+ ptr = NULL;
+ break;
+ }
+
+ /* Reading the elements invalidates this memory, make copy */
+ gsf_ole_get_guint32s (metabat, tmp, (int)info->bb.size);
+
+ if (num_metabat == 0) {
+ if (last < num_bat) {
+ /* there should be less that a full metabat
block
+ * remaining */
+ ptr = NULL;
+ break;
+ }
+ last = num_bat;
+ } else if (num_metabat > 0) {
+ metabat_block = metabat[last];
+ num_bat -= last;
+ }
+
+ ptr = ole_info_read_metabat (ole, ptr,
+ info->bb.bat.num_blocks, metabat, metabat + last);
+ }
+
+ if (ptr == NULL) {
+ return TRUE;
+ }
+
+ /* Read the directory's bat, we do not know the size */
+ if (ole_make_bat (&info->bb.bat, 0, dirent_start, &ole->bat)) {
+ return TRUE;
+ }
+
+ /* Read the directory */
+ ole->dirent = info->root_dir = ole_dirent_new (ole, 0, NULL);
+ if (ole->dirent == NULL) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static guint8 const *
+gsf_infile_msole_read (GsfInfileMSOle *ole, size_t num_bytes, guint8 *buffer)
+{
+ off_t first_block, last_block, raw_block, offset, i;
+ guint8 const *data;
+ guint8 *ptr;
+ size_t count;
+
+ /* small block files are preload */
+ if (ole->dirent != NULL && ole->dirent->use_sb) {
+ if (buffer != NULL) {
+ memcpy (buffer, ole->stream.buf + ole->cur_offset,
num_bytes);
+ ole->cur_offset += num_bytes;
+ return buffer;
+ }
+ data = ole->stream.buf + ole->cur_offset;
+ ole->cur_offset += num_bytes;
+ return data;
+ }
+
+ /* GsfInput guarantees that num_bytes > 0 */
+ first_block = OLE_BIG_BLOCK (ole->cur_offset, ole);
+ last_block = OLE_BIG_BLOCK (ole->cur_offset + num_bytes - 1, ole);
+ offset = ole->cur_offset & ole->info->bb.filter;
+
+ /* optimization : are all the raw blocks contiguous */
+ i = first_block;
+ raw_block = ole->bat.block [i];
+ while (++i <= last_block && ++raw_block == ole->bat.block [i])
+ ;
+ if (i > last_block) {
+ /* optimization don't seek if we don't need to */
+ if (ole->cur_block != first_block) {
+ if (gsf_input_seek (ole->input,
+ (off_t)(MAX (OLE_HEADER_SIZE,
ole->info->bb.size) + (ole->bat.block [first_block] << ole->info->bb.shift) +
offset),
+ SEEK_SET) < 0)
+ return NULL;
+ }
+ ole->cur_block = last_block;
+ return gsf_input_read (ole->input, num_bytes, buffer);
+ }
+
+ /* damn, we need to copy it block by block */
+ if (buffer == NULL) {
+ if (ole->stream.buf_size < num_bytes) {
+ if (ole->stream.buf != NULL)
+ g_free (ole->stream.buf);
+ ole->stream.buf_size = num_bytes;
+ ole->stream.buf = g_new (guint8, num_bytes);
+ }
+ buffer = ole->stream.buf;
+ }
+
+ ptr = buffer;
+ for (i = first_block ; i <= last_block ; i++ , ptr += count, num_bytes
-= count) {
+ count = ole->info->bb.size - offset;
+ if (count > num_bytes)
+ count = num_bytes;
+ data = ole_get_block (ole, ole->bat.block [i], NULL);
+ if (data == NULL)
+ return NULL;
+
+ /* TODO : this could be optimized to avoid the copy */
+ memcpy (ptr, data + offset, count);
+ offset = 0;
+ }
+ ole->cur_block = BAT_MAGIC_UNUSED;
+ ole->cur_offset += num_bytes;
+ return buffer;
+}
+
+static struct GsfInput *
+gsf_infile_msole_new_child (GsfInfileMSOle *parent,
+ MSOleDirent *dirent)
+{
+ GsfInfileMSOle * child;
+ MSOleInfo *info;
+ MSOleBAT const *metabat;
+ struct GsfInput *sb_file = NULL;
+ size_t size_guess;
+ char * buf;
+
+
+ if ( (dirent->index != 0) &&
+ (dirent->is_directory) ) {
+ /* be wary. It seems as if some implementations pretend that
the
+ * directories contain data */
+ return gsf_input_new("",
+ (off_t) 0,
+ 0);
+ }
+ child = ole_dup (parent);
+ if (child == NULL)
+ return NULL;
+ child->dirent = dirent;
+ child->size = (off_t) dirent->size;
+
+ info = parent->info;
+
+ if (dirent->use_sb) { /* build the bat */
+ metabat = &info->sb.bat;
+ size_guess = dirent->size >> info->sb.shift;
+ sb_file = ole_info_get_sb_file (parent);
+ } else {
+ metabat = &info->bb.bat;
+ size_guess = dirent->size >> info->bb.shift;
+ }
+ if (ole_make_bat (metabat, size_guess + 1, dirent->first_block,
&child->bat)) {
+ gsf_infile_msole_finalize(child);
+ return NULL;
+ }
+
+ if (dirent->use_sb) {
+ unsigned i;
+ guint8 const *data;
+
+ if (sb_file == NULL) {
+ gsf_infile_msole_finalize(child);
+ return NULL;
+ }
+
+ child->stream.buf_size = info->threshold;
+ child->stream.buf = g_new (guint8, info->threshold);
+
+ for (i = 0 ; i < child->bat.num_blocks; i++)
+ if (gsf_input_seek (sb_file,
+ (off_t)(child->bat.block [i] <<
info->sb.shift), SEEK_SET) < 0 ||
+ (data = gsf_input_read (sb_file,
+ info->sb.size,
+ child->stream.buf + (i << info->sb.shift))) ==
NULL) {
+ gsf_infile_msole_finalize(child);
+ return NULL;
+ }
+ }
+ buf = malloc(child->size);
+ if (buf == NULL) {
+ gsf_infile_msole_finalize(child);
+ return NULL;
+ }
+ if (NULL == gsf_infile_msole_read(child,
+ child->size,
+ buf)) {
+ gsf_infile_msole_finalize(child);
+ return NULL;
+ }
+ gsf_infile_msole_finalize(child);
+ return gsf_input_new(buf,
+ (off_t) dirent->size,
+ 1);
+}
+
+
+static struct GsfInput *
+gsf_infile_msole_child_by_index (GsfInfileMSOle * ole, int target)
+{
+ GList *p;
+
+ for (p = ole->dirent->children; p != NULL ; p = p->next)
+ if (target-- <= 0)
+ return gsf_infile_msole_new_child (ole,
+ (MSOleDirent *)p->data);
+ return NULL;
+}
+
+static char const *
+gsf_infile_msole_name_by_index (GsfInfileMSOle * ole, int target)
+{
+ GList *p;
+
+ for (p = ole->dirent->children; p != NULL ; p = p->next)
+ if (target-- <= 0)
+ return ((MSOleDirent *)p->data)->name;
+ return NULL;
+}
+
+static int
+gsf_infile_msole_num_children (GsfInfileMSOle * ole)
+{
+ g_return_val_if_fail (ole->dirent != NULL, -1);
+
+ if (!ole->dirent->is_directory)
+ return -1;
+ return g_list_length (ole->dirent->children);
+}
+
+
+/**
+ * gsf_infile_msole_new :
+ * @source :
+ *
+ * Opens the root directory of an MS OLE file.
+ * NOTE : adds a reference to @source
+ *
+ * Returns : the new ole file handler
+ **/
+static GsfInfileMSOle *
+gsf_infile_msole_new (struct GsfInput *source)
+{
+ GsfInfileMSOle * ole;
+
+ ole = malloc(sizeof(GsfInfileMSOle));
+ if (ole == NULL)
+ return NULL;
+ gsf_infile_msole_init(ole);
+ ole->input = source;
+ ole->size = (off_t) 0;
+
+ if (ole_init_info (ole)) {
+ gsf_infile_msole_finalize(ole);
+ return NULL;
+ }
+
+ return ole;
+}
+
+
+
+
+
+
+/* ******************************** main extraction code
************************ */
+
/* using libgobject, needs init! */
void __attribute__ ((constructor)) ole_gobject_init(void) {
g_type_init();
@@ -45,8 +1390,8 @@
addKeyword(EXTRACTOR_KeywordList *oldhead,
const char *phrase,
EXTRACTOR_KeywordType type) {
+ EXTRACTOR_KeywordList * keyword;
- EXTRACTOR_KeywordList * keyword;
if (strlen(phrase) == 0)
return oldhead;
if (0 == strcmp(phrase, "\"\""))
@@ -279,6 +1624,7 @@
guint32 len;
gsize gslen;
gboolean const is_vector = type & LE_VT_VECTOR;
+ GError * error;
g_return_val_if_fail (!(type & (unsigned)(~0x1fff)), NULL); /* not valid in
a prop set */
@@ -346,6 +1692,11 @@
break;
case LE_VT_CY : d (puts ("VT_CY"););
+ /* 8-byte two's complement integer (scaled by 10,000) */
+ /* CHEAT : just store as an int64 for now */
+ g_return_val_if_fail (*data + 8 <= data_end, NULL);
+ g_value_init (res, G_TYPE_INT64);
+ g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
break;
case LE_VT_DATE : d (puts ("VT_DATE"););
@@ -394,53 +1745,82 @@
case LE_VT_I8 : d (puts ("VT_I8"););
g_return_val_if_fail (*data + 8 <= data_end, NULL);
g_value_init (res, G_TYPE_INT64);
- *data += 8;
+ g_value_set_int64 (res, GSF_LE_GET_GINT64 (*data));
+ *data += 8;
break;
case LE_VT_UI8 : d (puts ("VT_UI8"););
g_return_val_if_fail (*data + 8 <= data_end, NULL);
g_value_init (res, G_TYPE_UINT64);
+ g_value_set_uint64 (res, GSF_LE_GET_GUINT64 (*data));
*data += 8;
break;
case LE_VT_LPSTR : d (puts ("VT_LPSTR"););
+ /*
+ * This is the representation of many strings. It is stored in
+ * the same representation as VT_BSTR. Note that the serialized
+ * representation of VP_LPSTR has a preceding byte count, whereas
+ * the in-memory representation does not.
+ */
/* be anal and safe */
g_return_val_if_fail (*data + 4 <= data_end, NULL);
-
+
len = GSF_LE_GET_GUINT32 (*data);
-
+
g_return_val_if_fail (len < 0x10000, NULL);
g_return_val_if_fail (*data + 4 + len*section->char_size <= data_end,
NULL);
-
- gslen = 0;
+
+ error = NULL;
+ d (gsf_mem_dump (*data + 4, len * section->char_size););
str = g_convert_with_iconv (*data + 4,
len * section->char_size,
- section->iconv_handle, &gslen, NULL, NULL);
- len = (guint32)gslen;
-
+ section->iconv_handle, NULL, NULL, &error);
+
g_value_init (res, G_TYPE_STRING);
- g_value_set_string (res, str);
- g_free (str);
- *data += 4 + len;
+ if (NULL != str) {
+ g_value_set_string (res, str);
+ g_free (str);
+ } else if (NULL != error) {
+ g_warning ("error: %s", error->message);
+ g_error_free (error);
+ } else {
+ g_warning ("unknown error converting string property, using blank");
+ }
+ *data += 4 + len * section->char_size;
break;
case LE_VT_LPWSTR : d (puts ("VT_LPWSTR"););
+ /*
+ * A counted and null-terminated Unicode string; a DWORD character
+ * count (where the count includes the terminating null) followed
+ * by that many Unicode (16-bit) characters. Note that the count
+ * is character count, not byte count.
+ */
/* be anal and safe */
g_return_val_if_fail (*data + 4 <= data_end, NULL);
-
+
len = GSF_LE_GET_GUINT32 (*data);
-
+
g_return_val_if_fail (len < 0x10000, NULL);
g_return_val_if_fail (*data + 4 + len <= data_end, NULL);
-
+
+ error = NULL;
+ d (gsf_mem_dump (*data + 4, len*2););
str = g_convert (*data + 4, len*2,
- "UTF-8", "UTF-16LE", &gslen, NULL, NULL);
- len = (guint32)gslen;
-
+ "UTF-8", "UTF-16LE", NULL, NULL, &error);
+
g_value_init (res, G_TYPE_STRING);
- g_value_set_string (res, str);
- g_free (str);
- *data += 4 + len;
+ if (NULL != str) {
+ g_value_set_string (res, str);
+ g_free (str);
+ } else if (NULL != error) {
+ g_warning ("error: %s", error->message);
+ g_error_free (error);
+ } else {
+ g_warning ("unknown error converting string property, using blank");
+ }
+ *data += 4 + len*2;
break;
case LE_VT_FILETIME : d (puts ("VT_FILETIME"););
@@ -467,21 +1847,35 @@
break;
}
case LE_VT_BLOB : d (puts ("VT_BLOB"););
+ g_free (res);
+ res = NULL;
break;
case LE_VT_STREAM : d (puts ("VT_STREAM"););
- break;
+ g_free (res);
+ res = NULL;
+ break;
case LE_VT_STORAGE : d (puts ("VT_STORAGE"););
+ g_free (res);
+ res = NULL;
break;
case LE_VT_STREAMED_OBJECT: d (puts ("VT_STREAMED_OBJECT"););
+ g_free (res);
+ res = NULL;
break;
case LE_VT_STORED_OBJECT : d (puts ("VT_STORED_OBJECT"););
+ g_free (res);
+ res = NULL;
break;
case LE_VT_BLOB_OBJECT : d (puts ("VT_BLOB_OBJECT"););
+ g_free (res);
+ res = NULL;
break;
case LE_VT_CF : d (puts ("VT_CF"););
break;
case LE_VT_CLSID : d (puts ("VT_CLSID"););
*data += 16;
+ g_free (res);
+ res = NULL;
break;
case LE_VT_ERROR :
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r2048 - Extractor/src/plugins/ole2,
grothoff <=