709 lines
17 KiB
C
709 lines
17 KiB
C
/*
|
|
pptHtml - Format a PowerPoint Presentation into Html
|
|
Copyright 2002 Charles N Wyble <jackshck@thewybles.com>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
/* The code used to extract title, author, subject and keywords was based
|
|
on original code from libcole2 written by
|
|
|
|
Michael Meeks (michael@helixcode.com)
|
|
Arturo Tena (arturo@directmail.org)
|
|
*/
|
|
|
|
#if !(defined( __BORLANDC__ ) || defined( __WIN32__ ))
|
|
#include "config.h" /* Created by ./configure script */
|
|
#include "support.h" /* Needs to be before internal.h */
|
|
#include "internal.h" /* Needs to be before cole */
|
|
#include "cole.h"
|
|
#else
|
|
#include "config.h.in" /* Created by ./configure script */
|
|
#include "support.h" /* Needs to be before internal.h */
|
|
#include "internal.h" /* Needs to be before cole */
|
|
#include "cole.h.in"
|
|
#include <dir.h>
|
|
#endif
|
|
|
|
#include "vector.h"
|
|
#include <stdio.h>
|
|
#include <string.h> /* For strcpy() */
|
|
#include <ctype.h> /* For isprint */
|
|
#include <stdlib.h> /* For exitt() */
|
|
#include <limits.h> /* For MAX_PATH */
|
|
#include "ppthtml.h" /* For SummaryInfo things*/
|
|
|
|
|
|
#define PRGNAME "pptHtml"
|
|
#if !(defined( __WIN32__ ) || defined( __BORLANDC__ ))
|
|
#define PRGVER VERSION
|
|
#else
|
|
#define PRGVER "0.4"
|
|
#endif
|
|
#define WORK_SIZE 8192
|
|
static char FileName[2][32] = /* The section of the PowerPoint File we read */
|
|
{
|
|
"/PowerPoint Document", /* Power Point 97 & 2000 */
|
|
"/PP40" /* Everything else ? */
|
|
};
|
|
|
|
#define BUFFER_SIZE 128
|
|
|
|
|
|
/* Function Prototypes */
|
|
COLE_LOCATE_ACTION_FUNC dump_file;
|
|
static void container_processor(int);
|
|
static void atom_processor(int, int, int, unsigned char);
|
|
static void print_unicode(unsigned char *, int);
|
|
static void print_utf8(unsigned short c);
|
|
static void put_utf8(unsigned short c);
|
|
static OleSummary *summary_open_stream(COLEFILE *cf, const PropertySetID psid);
|
|
static void close_summary(OleSummary *si);
|
|
static gboolean read_items(OleSummary *si, PropertySetID ps_id);
|
|
static char *summary_get_string (OleSummary *si,OleSummaryPID id,gboolean *available);
|
|
|
|
/* Global data */
|
|
static char filename[128];
|
|
static unsigned char working_buffer[WORK_SIZE];
|
|
static int buf_idx=0;
|
|
static int output_this_container = 0;
|
|
static int past_first_slide = 0;
|
|
static int last_container = 0;
|
|
static char *title;
|
|
static char *author;
|
|
static char *subject;
|
|
static char *keywords;
|
|
|
|
int main (int argc, char **argv)
|
|
{
|
|
int f_ptr = 0;
|
|
COLEFS * cfs;
|
|
COLEFILE * cf;
|
|
COLERRNO colerrno;
|
|
char buffer[BUFFER_SIZE];
|
|
size_t char_read;
|
|
size_t char_read_total;
|
|
|
|
gboolean ok;
|
|
OleSummary *si;
|
|
|
|
if (argc < 2)
|
|
{
|
|
fprintf (stderr, "pptHtml - Outputs Power Point files as Html.\n"
|
|
"Usage: "PRGNAME" <FILE>\n");
|
|
exit (1);
|
|
}
|
|
else
|
|
{
|
|
strncpy(filename, argv[1], 124);
|
|
cfs = cole_mount (filename, &colerrno);
|
|
if (cfs == NULL)
|
|
{
|
|
cole_perror (PRGNAME, colerrno);
|
|
exit (1);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
cf = cole_fopen (cfs, "/\005SummaryInformation", &colerrno);
|
|
|
|
if (!cf) {
|
|
printf("ERROR-No Summary Information available for this document\n");
|
|
cole_umount (cfs, NULL);
|
|
exit (1);
|
|
}
|
|
|
|
si = summary_open_stream(cf, OLE_PS_SUMMARY_INFO);
|
|
if (!si) {
|
|
printf ("Could not open SummaryInformation\n");
|
|
exit(1);
|
|
}
|
|
|
|
title = summary_get_string(si, OLE_SUMMARY_TITLE, &ok);
|
|
/* if (ok) */
|
|
/* printf ("The title is %s\n", title); */
|
|
/* else */
|
|
/* printf ("no title found\n"); */
|
|
|
|
subject = summary_get_string (si, OLE_SUMMARY_SUBJECT, &ok);
|
|
/* if (ok) */
|
|
/* printf ("The subject is %s\n", subject); */
|
|
/* else */
|
|
/* printf ("no subject found\n"); */
|
|
|
|
|
|
author =summary_get_string (si, OLE_SUMMARY_AUTHOR, &ok);
|
|
/* if (ok) */
|
|
/* printf ("The author is %s\n", author); */
|
|
/* else */
|
|
/* printf ("no author found\n"); */
|
|
|
|
|
|
keywords = summary_get_string (si, OLE_SUMMARY_KEYWORDS, &ok);
|
|
/* if (ok) */
|
|
/* printf ("The keywords are %s\n", keywords); */
|
|
/* else */
|
|
/* printf ("no keywords found\n"); */
|
|
|
|
cole_fclose (cf, &colerrno);
|
|
|
|
|
|
while (cole_locate_filename (cfs, FileName[f_ptr], NULL, dump_file, &colerrno))
|
|
{
|
|
if (f_ptr)
|
|
{ /* Two strikes...we're out! */
|
|
cole_perror (PRGNAME, colerrno);
|
|
if (colerrno == COLE_EFILENOTFOUND)
|
|
fprintf(stderr, "Section: PowerPoint Document\n");
|
|
break;
|
|
}
|
|
else /* Don't do this... */
|
|
f_ptr++;
|
|
}
|
|
|
|
free (title);
|
|
free (subject);
|
|
free (author);
|
|
free (keywords);
|
|
|
|
if (cole_umount (cfs, &colerrno))
|
|
{
|
|
cole_perror ("travel", colerrno);
|
|
exit (1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void dump_file(COLEDIRENT *cde, void *_info)
|
|
{
|
|
|
|
unsigned long version=0, instance=0, type=0, length=0, target=0, count=0;
|
|
unsigned char buf[16];
|
|
COLEFILE *cf;
|
|
COLERRNO err;
|
|
|
|
cf = cole_fopen_direntry(cde, &err);
|
|
/* Ouput Header */
|
|
printf("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n");
|
|
printf("<HTML><HEAD><TITLE>%s</TITLE>\n", title);
|
|
if(subject)
|
|
printf("<meta name=\"description\" content=\"%s\">\n", subject);
|
|
if(author)
|
|
printf("<meta name=\"author\" content=\"%s\">\n", author);
|
|
if(keywords)
|
|
printf("<meta name=\"keywords\" content=\"%s\">\n", keywords);
|
|
printf("</HEAD><BODY>\n");
|
|
|
|
/* Output body */
|
|
while (cole_fread(cf, buf, 1, &err))
|
|
{
|
|
if (count == 0)
|
|
{
|
|
instance = buf[0];
|
|
type = 0;
|
|
length = 0;
|
|
target = 80; /* ficticious number */
|
|
}
|
|
else if (count == 1)
|
|
{
|
|
instance |= (buf[0]<<8);
|
|
version = instance &0x000F;
|
|
instance = (instance>>4);
|
|
}
|
|
else if (count == 2)
|
|
type = (unsigned)buf[0];
|
|
else if (count == 3)
|
|
type |= (buf[0]<<8)&0x00000FFFL;
|
|
else if (count == 4)
|
|
length = (unsigned)buf[0];
|
|
else if (count == 5)
|
|
length |= (buf[0]<<8);
|
|
else if (count == 6)
|
|
length |= (buf[0]<<16);
|
|
else if (count == 7)
|
|
{
|
|
length |= (buf[0]<<24);
|
|
target = length;
|
|
if (version == 0x0F)
|
|
{ /* Do container level Processing */
|
|
container_processor(type);
|
|
count = -1;
|
|
}
|
|
}
|
|
if (count > 7)
|
|
{ /* Here is where we want to process the data
|
|
based on the Atom type... */
|
|
atom_processor(type, count-8, target-1, buf[0]);
|
|
}
|
|
if (count == (target+7))
|
|
count = 0;
|
|
else
|
|
count++;
|
|
}
|
|
|
|
if (past_first_slide)
|
|
printf("<HR>");
|
|
|
|
printf(" <br>\n");
|
|
|
|
/* Output Credit */
|
|
printf("<hr><FONT SIZE=-1>Created with <a href=\"http://chicago.sf.net/xlhtml\">pptHtml</a></FONT><br>\n" );
|
|
|
|
/* Output Tail */
|
|
printf("</BODY></HTML>\n");
|
|
cole_fclose(cf, &err);
|
|
}
|
|
|
|
static void container_processor(int type)
|
|
{
|
|
if (type == 0x03EE)
|
|
{
|
|
if (past_first_slide)
|
|
printf("<BR><HR><BR>\n");
|
|
else
|
|
past_first_slide = 1;
|
|
}
|
|
switch (type)
|
|
{
|
|
case 0x000D:
|
|
if (last_container == 0x11) /* suppress notes info */
|
|
output_this_container = 0;
|
|
else
|
|
output_this_container = 1;
|
|
break;
|
|
case 0x0FF0:
|
|
output_this_container = 1;
|
|
break;
|
|
default:
|
|
/* printf("Cont:%x|\n", type); */
|
|
output_this_container = 0;
|
|
break;
|
|
}
|
|
last_container = type;
|
|
}
|
|
|
|
static void atom_processor(int type, int count, int buf_last, unsigned char data)
|
|
{
|
|
if ((buf_idx >= WORK_SIZE)||(output_this_container == 0))
|
|
return;
|
|
|
|
if (count == 0)
|
|
{
|
|
memset(working_buffer, 0, WORK_SIZE);
|
|
buf_idx = 0;
|
|
}
|
|
|
|
switch (type)
|
|
{
|
|
case 0x0FA0: /* Text String in unicode */
|
|
working_buffer[buf_idx++] = data;
|
|
if (count == buf_last)
|
|
{
|
|
/* printf("Atom:%x|\n", type); */
|
|
/* working_buffer[buf_idx++] = 0; */
|
|
/* printf("%s<BR>\n", working_buffer); */
|
|
print_unicode(working_buffer, buf_idx);
|
|
printf("<BR>\n");
|
|
}
|
|
break;
|
|
case 0x0FA8: /* Text String in ASCII */
|
|
working_buffer[buf_idx++] = data;
|
|
if (count == buf_last)
|
|
{
|
|
int i;
|
|
/* working_buffer[buf_idx++] = 0; */
|
|
/* printf("Atom:%x|\n", type); */
|
|
for (i=0;i<buf_idx; i++)
|
|
{
|
|
/* printf("%02X ", (int)working_buffer[i]); */ /* Debug */
|
|
if (working_buffer[i] == 0x0D)
|
|
printf("<BR>\n");
|
|
else
|
|
putchar(working_buffer[i]);
|
|
}
|
|
printf("<BR>\n");
|
|
}
|
|
break;
|
|
case 0x0FBA: /* CString - unicode... */
|
|
working_buffer[buf_idx++] = data;
|
|
if (count == buf_last)
|
|
{
|
|
/* working_buffer[buf_idx++] = 0; */
|
|
/* printf("%s<BR>\n", working_buffer); */
|
|
/* printf("Atom:%x|\n", type); */
|
|
print_unicode(working_buffer, buf_idx);
|
|
printf("<BR>\n");
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void print_unicode(unsigned char *ucs, int len)
|
|
{
|
|
int i;
|
|
for (i = 0; i < len; i += 2)
|
|
print_utf8(ucs[i] | (ucs[i+1] << 8));
|
|
}
|
|
|
|
static void OutputCharCorrected(unsigned char c)
|
|
{
|
|
switch (c)
|
|
{ /* Special char handlers here... */
|
|
case '\r':
|
|
printf("<BR>\n");
|
|
break;
|
|
case 0x3C:
|
|
printf("<");
|
|
break;
|
|
case 0x3E:
|
|
printf(">");
|
|
break;
|
|
case 0x26:
|
|
printf("&");
|
|
break;
|
|
case 0x22:
|
|
printf(""");
|
|
break;
|
|
/* Also need to cover 128-159 since MS uses this area... */
|
|
case 0x80: /* Euro Symbol */
|
|
printf("€");
|
|
break;
|
|
case 0x82: /* baseline single quote */
|
|
printf("‚");
|
|
break;
|
|
case 0x83: /* florin */
|
|
printf("ƒ");
|
|
break;
|
|
case 0x84: /* baseline double quote */
|
|
printf("„");
|
|
break;
|
|
case 0x85: /* ellipsis */
|
|
printf("…");
|
|
break;
|
|
case 0x86: /* dagger */
|
|
printf("†");
|
|
break;
|
|
case 0x87: /* double dagger */
|
|
printf("‡");
|
|
break;
|
|
case 0x88: /* circumflex accent */
|
|
printf("ˆ");
|
|
break;
|
|
case 0x89: /* permile */
|
|
printf("‰");
|
|
break;
|
|
case 0x8A: /* S Hacek */
|
|
printf("Š");
|
|
break;
|
|
case 0x8B: /* left single guillemet */
|
|
printf("‹");
|
|
break;
|
|
case 0x8C: /* OE ligature */
|
|
printf("Œ");
|
|
break;
|
|
case 0x8E: /* #LATIN CAPITAL LETTER Z WITH CARON */
|
|
printf("Ž");
|
|
break;
|
|
case 0x91: /* left single quote ? */
|
|
printf("‘");
|
|
break;
|
|
case 0x92: /* right single quote ? */
|
|
printf("’");
|
|
break;
|
|
case 0x93: /* left double quote */
|
|
printf("“");
|
|
break;
|
|
case 0x94: /* right double quote */
|
|
printf("”");
|
|
break;
|
|
case 0x95: /* bullet */
|
|
printf("•");
|
|
break;
|
|
case 0x96: /* endash */
|
|
printf("–");
|
|
break;
|
|
case 0x97: /* emdash */
|
|
printf("—");
|
|
break;
|
|
case 0x98: /* tilde accent */
|
|
printf("˜");
|
|
break;
|
|
case 0x99: /* trademark ligature */
|
|
printf("™");
|
|
break;
|
|
case 0x9A: /* s Haceks Hacek */
|
|
printf("š");
|
|
break;
|
|
case 0x9B: /* right single guillemet */
|
|
printf("›");
|
|
break;
|
|
case 0x9C: /* oe ligature */
|
|
printf("œ");
|
|
break;
|
|
case 0x9F: /* Y Dieresis */
|
|
printf("Ÿ");
|
|
break;
|
|
default:
|
|
putchar(c);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void print_utf8(unsigned short c)
|
|
{
|
|
if (c == 0)
|
|
return;
|
|
|
|
if (c < 0x80)
|
|
OutputCharCorrected(c);
|
|
else if (c < 0x800)
|
|
{
|
|
putchar(0xC0 | (c >> 6));
|
|
put_utf8(c);
|
|
}
|
|
else
|
|
{
|
|
putchar(0xE0 | (c >> 12));
|
|
put_utf8(c >> 6);
|
|
put_utf8(c);
|
|
}
|
|
}
|
|
|
|
static void put_utf8(unsigned short c)
|
|
{
|
|
putchar(0x0080 | ((short)c & 0x003F));
|
|
}
|
|
|
|
OleSummary *
|
|
summary_open_stream (COLEFILE *cf, const PropertySetID psid)
|
|
{
|
|
uint8_t data[64];
|
|
uint16_t byte_order;
|
|
gboolean panic=FALSE;
|
|
uint32_t os_version;
|
|
OleSummary *si;
|
|
int i, sections;
|
|
COLERRNO colerrno;
|
|
|
|
if (cf == NULL){
|
|
printf("No cole file info available\n");
|
|
return NULL;
|
|
}
|
|
|
|
/* reading data */
|
|
cole_fread (cf, data, 28, &colerrno);
|
|
|
|
si = malloc(sizeof(OleSummary));
|
|
si->sections = malloc(sizeof(ole_vector));
|
|
si->items = malloc(sizeof(item_vector));
|
|
|
|
si->s = cf;
|
|
si->write_items = NULL;
|
|
si->read_mode = TRUE;
|
|
|
|
byte_order = GET_UINT16(data);
|
|
if (byte_order != 0xfffe)
|
|
panic = TRUE;
|
|
|
|
if (GET_UINT16 (data + 2) != 0) /* Format */
|
|
panic = TRUE;
|
|
|
|
os_version = GET_UINT32 (data + 4);
|
|
|
|
for (i = 0; i < 16; i++)
|
|
si->class_id[i] = data[8 + i];
|
|
|
|
sections = GET_UINT32 (data + 24);
|
|
|
|
if (panic) {
|
|
close_summary(si);
|
|
return NULL;
|
|
}
|
|
|
|
create_ole_vector(si->sections);
|
|
|
|
for (i = 0; i < sections; i++) {
|
|
OleSummarySection sect;
|
|
if (!cole_fread (cf, data, 16 + 4, &colerrno)){
|
|
close_summary(si);
|
|
return NULL;
|
|
}
|
|
|
|
if (psid == OLE_PS_SUMMARY_INFO) {
|
|
if (GET_UINT32 (data + 0) == sum_fmtid[0] &&
|
|
GET_UINT32 (data + 4) == sum_fmtid[1] &&
|
|
GET_UINT32 (data + 8) == sum_fmtid[2] &&
|
|
GET_UINT32 (data + 12) == sum_fmtid[3]) {
|
|
si->ps_id = OLE_PS_SUMMARY_INFO;
|
|
sect.ps_id = OLE_PS_SUMMARY_INFO;
|
|
|
|
} else {
|
|
close_summary(si);
|
|
return NULL;
|
|
}
|
|
|
|
}
|
|
sect.offset = GET_UINT32 (data + 16);
|
|
append_ole(si->sections,sect);
|
|
}
|
|
|
|
create_item_vector(si->items);
|
|
|
|
for (i = 0; i < sections; i++) {
|
|
OleSummarySection st;
|
|
|
|
st = ole_at(si->sections,i);
|
|
if (!read_items(si, st.ps_id)) {
|
|
printf("Serious error reading items\n");
|
|
close_summary(si);
|
|
return NULL;
|
|
}
|
|
}
|
|
return si;
|
|
}
|
|
|
|
void
|
|
close_summary(OleSummary *si)
|
|
{
|
|
|
|
if(si->sections)
|
|
free(si->sections);
|
|
destroy_ole_vector(si->sections);
|
|
si->sections = NULL;
|
|
|
|
if(si->items)
|
|
free(si->items);
|
|
destroy_item_vector(si->items);
|
|
si->items=NULL;
|
|
|
|
free(si);
|
|
|
|
}
|
|
|
|
gboolean
|
|
read_items (OleSummary *si, PropertySetID ps_id)
|
|
{
|
|
int sect;
|
|
COLERRNO colerrno;
|
|
|
|
for (sect = 0; sect < si->sections->index; sect++) {
|
|
OleSummarySection st;
|
|
uint8_t data[8];
|
|
int i;
|
|
|
|
st = ole_at(si->sections,sect);
|
|
|
|
if (st.ps_id != ps_id)
|
|
continue;
|
|
|
|
cole_fseek(si->s, st.offset, COLE_SEEK_SET, &colerrno);
|
|
if (!cole_fread (si->s, data, 8, &colerrno))
|
|
return FALSE;
|
|
|
|
st.bytes = GET_UINT32 (data);
|
|
st.props = GET_UINT32 (data + 4);
|
|
|
|
if (st.props == 0)
|
|
continue;
|
|
|
|
for (i = 0; i < st.props; i++) {
|
|
item_t item;
|
|
if (!cole_fread (si->s, data, 8, &colerrno))
|
|
return FALSE;
|
|
|
|
item.id = GET_UINT32 (data);
|
|
item.offset = GET_UINT32 (data + 4);
|
|
item.offset = item.offset + st.offset;
|
|
item.ps_id = ps_id;
|
|
append_item(si->items, item);
|
|
}
|
|
}
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Seeks to the correct place, and returns a handle or NULL on failure */
|
|
static item_t *
|
|
seek_to_record (OleSummary *si, OleSummaryPID id)
|
|
{
|
|
int i;
|
|
COLERRNO colerrno;
|
|
|
|
for (i = 0; i < si->items->index; i++) {
|
|
item_t *item = &item_at(si->items,i);
|
|
if (item->id == SUMMARY_ID(id)) {
|
|
gboolean is_summary;
|
|
|
|
is_summary = ((si->ps_id == OLE_PS_SUMMARY_INFO) &&
|
|
(item->ps_id == OLE_PS_SUMMARY_INFO));
|
|
|
|
if (is_summary) {
|
|
cole_fseek(si->s, item->offset, COLE_SEEK_SET, &colerrno);
|
|
return item;
|
|
}
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
char *
|
|
summary_get_string (OleSummary *si, OleSummaryPID id, gboolean *available)
|
|
{
|
|
uint8_t data[8];
|
|
uint32_t type, len;
|
|
char *ans;
|
|
item_t *item;
|
|
COLERRNO colerrno;
|
|
|
|
*available = FALSE;
|
|
|
|
if (!(item = seek_to_record (si, id)))
|
|
return NULL;
|
|
|
|
if (!cole_fread(si->s, data, 8, &colerrno))
|
|
return NULL;
|
|
|
|
type = GET_UINT32 (data);
|
|
len = GET_UINT32 (data + 4);
|
|
|
|
if (type != TYPE_STRING) {
|
|
printf("Summary string type mismatch\n");
|
|
return NULL;
|
|
}
|
|
|
|
ans = allocate_mem(char, len + 1);
|
|
|
|
if (!cole_fread(si->s, ans, len,&colerrno)) {
|
|
free (ans);
|
|
return NULL;
|
|
}
|
|
|
|
ans[len] = '\0';
|
|
|
|
*available = TRUE;
|
|
|
|
return ans;
|
|
}
|
|
|
|
|
|
|