xlhtml/ppthtml/ppthtml.c

709 lines
17 KiB
C

/*
pptHtml - Format a PowerPoint Presentation into Html
Copyright 2002 Charles N Wyble <jackshck@thewybles.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* The code used to extract title, author, subject and keywords was based
on original code from libcole2 written by
Michael Meeks (michael@helixcode.com)
Arturo Tena (arturo@directmail.org)
*/
#if !(defined( __BORLANDC__ ) || defined( __WIN32__ ))
#include "config.h" /* Created by ./configure script */
#include "support.h" /* Needs to be before internal.h */
#include "internal.h" /* Needs to be before cole */
#include "cole.h"
#else
#include "config.h.in" /* Created by ./configure script */
#include "support.h" /* Needs to be before internal.h */
#include "internal.h" /* Needs to be before cole */
#include "cole.h.in"
#include <dir.h>
#endif
#include "vector.h"
#include <stdio.h>
#include <string.h> /* For strcpy() */
#include <ctype.h> /* For isprint */
#include <stdlib.h> /* For exitt() */
#include <limits.h> /* For MAX_PATH */
#include "ppthtml.h" /* For SummaryInfo things*/
#define PRGNAME "pptHtml"
#if !(defined( __WIN32__ ) || defined( __BORLANDC__ ))
#define PRGVER VERSION
#else
#define PRGVER "0.4"
#endif
#define WORK_SIZE 8192
static char FileName[2][32] = /* The section of the PowerPoint File we read */
{
"/PowerPoint Document", /* Power Point 97 & 2000 */
"/PP40" /* Everything else ? */
};
#define BUFFER_SIZE 128
/* Function Prototypes */
COLE_LOCATE_ACTION_FUNC dump_file;
static void container_processor(int);
static void atom_processor(int, int, int, unsigned char);
static void print_unicode(unsigned char *, int);
static void print_utf8(unsigned short c);
static void put_utf8(unsigned short c);
static OleSummary *summary_open_stream(COLEFILE *cf, const PropertySetID psid);
static void close_summary(OleSummary *si);
static gboolean read_items(OleSummary *si, PropertySetID ps_id);
static char *summary_get_string (OleSummary *si,OleSummaryPID id,gboolean *available);
/* Global data */
static char filename[128];
static unsigned char working_buffer[WORK_SIZE];
static int buf_idx=0;
static int output_this_container = 0;
static int past_first_slide = 0;
static int last_container = 0;
static char *title;
static char *author;
static char *subject;
static char *keywords;
int main (int argc, char **argv)
{
int f_ptr = 0;
COLEFS * cfs;
COLEFILE * cf;
COLERRNO colerrno;
char buffer[BUFFER_SIZE];
size_t char_read;
size_t char_read_total;
gboolean ok;
OleSummary *si;
if (argc < 2)
{
fprintf (stderr, "pptHtml - Outputs Power Point files as Html.\n"
"Usage: "PRGNAME" <FILE>\n");
exit (1);
}
else
{
strncpy(filename, argv[1], 124);
cfs = cole_mount (filename, &colerrno);
if (cfs == NULL)
{
cole_perror (PRGNAME, colerrno);
exit (1);
}
}
cf = cole_fopen (cfs, "/\005SummaryInformation", &colerrno);
if (!cf) {
printf("ERROR-No Summary Information available for this document\n");
cole_umount (cfs, NULL);
exit (1);
}
si = summary_open_stream(cf, OLE_PS_SUMMARY_INFO);
if (!si) {
printf ("Could not open SummaryInformation\n");
exit(1);
}
title = summary_get_string(si, OLE_SUMMARY_TITLE, &ok);
/* if (ok) */
/* printf ("The title is %s\n", title); */
/* else */
/* printf ("no title found\n"); */
subject = summary_get_string (si, OLE_SUMMARY_SUBJECT, &ok);
/* if (ok) */
/* printf ("The subject is %s\n", subject); */
/* else */
/* printf ("no subject found\n"); */
author =summary_get_string (si, OLE_SUMMARY_AUTHOR, &ok);
/* if (ok) */
/* printf ("The author is %s\n", author); */
/* else */
/* printf ("no author found\n"); */
keywords = summary_get_string (si, OLE_SUMMARY_KEYWORDS, &ok);
/* if (ok) */
/* printf ("The keywords are %s\n", keywords); */
/* else */
/* printf ("no keywords found\n"); */
cole_fclose (cf, &colerrno);
while (cole_locate_filename (cfs, FileName[f_ptr], NULL, dump_file, &colerrno))
{
if (f_ptr)
{ /* Two strikes...we're out! */
cole_perror (PRGNAME, colerrno);
if (colerrno == COLE_EFILENOTFOUND)
fprintf(stderr, "Section: PowerPoint Document\n");
break;
}
else /* Don't do this... */
f_ptr++;
}
free (title);
free (subject);
free (author);
free (keywords);
if (cole_umount (cfs, &colerrno))
{
cole_perror ("travel", colerrno);
exit (1);
}
return 0;
}
void dump_file(COLEDIRENT *cde, void *_info)
{
unsigned long version=0, instance=0, type=0, length=0, target=0, count=0;
unsigned char buf[16];
COLEFILE *cf;
COLERRNO err;
cf = cole_fopen_direntry(cde, &err);
/* Ouput Header */
printf("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n");
printf("<HTML><HEAD><TITLE>%s</TITLE>\n", title);
if(subject)
printf("<meta name=\"description\" content=\"%s\">\n", subject);
if(author)
printf("<meta name=\"author\" content=\"%s\">\n", author);
if(keywords)
printf("<meta name=\"keywords\" content=\"%s\">\n", keywords);
printf("</HEAD><BODY>\n");
/* Output body */
while (cole_fread(cf, buf, 1, &err))
{
if (count == 0)
{
instance = buf[0];
type = 0;
length = 0;
target = 80; /* ficticious number */
}
else if (count == 1)
{
instance |= (buf[0]<<8);
version = instance &0x000F;
instance = (instance>>4);
}
else if (count == 2)
type = (unsigned)buf[0];
else if (count == 3)
type |= (buf[0]<<8)&0x00000FFFL;
else if (count == 4)
length = (unsigned)buf[0];
else if (count == 5)
length |= (buf[0]<<8);
else if (count == 6)
length |= (buf[0]<<16);
else if (count == 7)
{
length |= (buf[0]<<24);
target = length;
if (version == 0x0F)
{ /* Do container level Processing */
container_processor(type);
count = -1;
}
}
if (count > 7)
{ /* Here is where we want to process the data
based on the Atom type... */
atom_processor(type, count-8, target-1, buf[0]);
}
if (count == (target+7))
count = 0;
else
count++;
}
if (past_first_slide)
printf("<HR>");
printf("&nbsp;<br>\n");
/* Output Credit */
printf("<hr><FONT SIZE=-1>Created with <a href=\"http://chicago.sf.net/xlhtml\">pptHtml</a></FONT><br>\n" );
/* Output Tail */
printf("</BODY></HTML>\n");
cole_fclose(cf, &err);
}
static void container_processor(int type)
{
if (type == 0x03EE)
{
if (past_first_slide)
printf("<BR><HR><BR>\n");
else
past_first_slide = 1;
}
switch (type)
{
case 0x000D:
if (last_container == 0x11) /* suppress notes info */
output_this_container = 0;
else
output_this_container = 1;
break;
case 0x0FF0:
output_this_container = 1;
break;
default:
/* printf("Cont:%x|\n", type); */
output_this_container = 0;
break;
}
last_container = type;
}
static void atom_processor(int type, int count, int buf_last, unsigned char data)
{
if ((buf_idx >= WORK_SIZE)||(output_this_container == 0))
return;
if (count == 0)
{
memset(working_buffer, 0, WORK_SIZE);
buf_idx = 0;
}
switch (type)
{
case 0x0FA0: /* Text String in unicode */
working_buffer[buf_idx++] = data;
if (count == buf_last)
{
/* printf("Atom:%x|\n", type); */
/* working_buffer[buf_idx++] = 0; */
/* printf("%s<BR>\n", working_buffer); */
print_unicode(working_buffer, buf_idx);
printf("<BR>\n");
}
break;
case 0x0FA8: /* Text String in ASCII */
working_buffer[buf_idx++] = data;
if (count == buf_last)
{
int i;
/* working_buffer[buf_idx++] = 0; */
/* printf("Atom:%x|\n", type); */
for (i=0;i<buf_idx; i++)
{
/* printf("%02X &nbsp;", (int)working_buffer[i]); */ /* Debug */
if (working_buffer[i] == 0x0D)
printf("<BR>\n");
else
putchar(working_buffer[i]);
}
printf("<BR>\n");
}
break;
case 0x0FBA: /* CString - unicode... */
working_buffer[buf_idx++] = data;
if (count == buf_last)
{
/* working_buffer[buf_idx++] = 0; */
/* printf("%s<BR>\n", working_buffer); */
/* printf("Atom:%x|\n", type); */
print_unicode(working_buffer, buf_idx);
printf("<BR>\n");
}
break;
default:
break;
}
}
static void print_unicode(unsigned char *ucs, int len)
{
int i;
for (i = 0; i < len; i += 2)
print_utf8(ucs[i] | (ucs[i+1] << 8));
}
static void OutputCharCorrected(unsigned char c)
{
switch (c)
{ /* Special char handlers here... */
case '\r':
printf("<BR>\n");
break;
case 0x3C:
printf("&lt;");
break;
case 0x3E:
printf("&gt;");
break;
case 0x26:
printf("&amp;");
break;
case 0x22:
printf("&quot;");
break;
/* Also need to cover 128-159 since MS uses this area... */
case 0x80: /* Euro Symbol */
printf("&#8364;");
break;
case 0x82: /* baseline single quote */
printf("&#8218;");
break;
case 0x83: /* florin */
printf("&#402;");
break;
case 0x84: /* baseline double quote */
printf("&#8222;");
break;
case 0x85: /* ellipsis */
printf("&#8230;");
break;
case 0x86: /* dagger */
printf("&#8224;");
break;
case 0x87: /* double dagger */
printf("&#8225;");
break;
case 0x88: /* circumflex accent */
printf("&#710;");
break;
case 0x89: /* permile */
printf("&#8240;");
break;
case 0x8A: /* S Hacek */
printf("&#352;");
break;
case 0x8B: /* left single guillemet */
printf("&#8249;");
break;
case 0x8C: /* OE ligature */
printf("&#338;");
break;
case 0x8E: /* #LATIN CAPITAL LETTER Z WITH CARON */
printf("&#381;");
break;
case 0x91: /* left single quote ? */
printf("&#8216;");
break;
case 0x92: /* right single quote ? */
printf("&#8217;");
break;
case 0x93: /* left double quote */
printf("&#8220;");
break;
case 0x94: /* right double quote */
printf("&#8221;");
break;
case 0x95: /* bullet */
printf("&#8226;");
break;
case 0x96: /* endash */
printf("&#8211;");
break;
case 0x97: /* emdash */
printf("&#8212;");
break;
case 0x98: /* tilde accent */
printf("&#732;");
break;
case 0x99: /* trademark ligature */
printf("&#8482;");
break;
case 0x9A: /* s Haceks Hacek */
printf("&#353;");
break;
case 0x9B: /* right single guillemet */
printf("&#8250;");
break;
case 0x9C: /* oe ligature */
printf("&#339;");
break;
case 0x9F: /* Y Dieresis */
printf("&#376;");
break;
default:
putchar(c);
break;
}
}
static void print_utf8(unsigned short c)
{
if (c == 0)
return;
if (c < 0x80)
OutputCharCorrected(c);
else if (c < 0x800)
{
putchar(0xC0 | (c >> 6));
put_utf8(c);
}
else
{
putchar(0xE0 | (c >> 12));
put_utf8(c >> 6);
put_utf8(c);
}
}
static void put_utf8(unsigned short c)
{
putchar(0x0080 | ((short)c & 0x003F));
}
OleSummary *
summary_open_stream (COLEFILE *cf, const PropertySetID psid)
{
uint8_t data[64];
uint16_t byte_order;
gboolean panic=FALSE;
uint32_t os_version;
OleSummary *si;
int i, sections;
COLERRNO colerrno;
if (cf == NULL){
printf("No cole file info available\n");
return NULL;
}
/* reading data */
cole_fread (cf, data, 28, &colerrno);
si = malloc(sizeof(OleSummary));
si->sections = malloc(sizeof(ole_vector));
si->items = malloc(sizeof(item_vector));
si->s = cf;
si->write_items = NULL;
si->read_mode = TRUE;
byte_order = GET_UINT16(data);
if (byte_order != 0xfffe)
panic = TRUE;
if (GET_UINT16 (data + 2) != 0) /* Format */
panic = TRUE;
os_version = GET_UINT32 (data + 4);
for (i = 0; i < 16; i++)
si->class_id[i] = data[8 + i];
sections = GET_UINT32 (data + 24);
if (panic) {
close_summary(si);
return NULL;
}
create_ole_vector(si->sections);
for (i = 0; i < sections; i++) {
OleSummarySection sect;
if (!cole_fread (cf, data, 16 + 4, &colerrno)){
close_summary(si);
return NULL;
}
if (psid == OLE_PS_SUMMARY_INFO) {
if (GET_UINT32 (data + 0) == sum_fmtid[0] &&
GET_UINT32 (data + 4) == sum_fmtid[1] &&
GET_UINT32 (data + 8) == sum_fmtid[2] &&
GET_UINT32 (data + 12) == sum_fmtid[3]) {
si->ps_id = OLE_PS_SUMMARY_INFO;
sect.ps_id = OLE_PS_SUMMARY_INFO;
} else {
close_summary(si);
return NULL;
}
}
sect.offset = GET_UINT32 (data + 16);
append_ole(si->sections,sect);
}
create_item_vector(si->items);
for (i = 0; i < sections; i++) {
OleSummarySection st;
st = ole_at(si->sections,i);
if (!read_items(si, st.ps_id)) {
printf("Serious error reading items\n");
close_summary(si);
return NULL;
}
}
return si;
}
void
close_summary(OleSummary *si)
{
if(si->sections)
free(si->sections);
destroy_ole_vector(si->sections);
si->sections = NULL;
if(si->items)
free(si->items);
destroy_item_vector(si->items);
si->items=NULL;
free(si);
}
gboolean
read_items (OleSummary *si, PropertySetID ps_id)
{
int sect;
COLERRNO colerrno;
for (sect = 0; sect < si->sections->index; sect++) {
OleSummarySection st;
uint8_t data[8];
int i;
st = ole_at(si->sections,sect);
if (st.ps_id != ps_id)
continue;
cole_fseek(si->s, st.offset, COLE_SEEK_SET, &colerrno);
if (!cole_fread (si->s, data, 8, &colerrno))
return FALSE;
st.bytes = GET_UINT32 (data);
st.props = GET_UINT32 (data + 4);
if (st.props == 0)
continue;
for (i = 0; i < st.props; i++) {
item_t item;
if (!cole_fread (si->s, data, 8, &colerrno))
return FALSE;
item.id = GET_UINT32 (data);
item.offset = GET_UINT32 (data + 4);
item.offset = item.offset + st.offset;
item.ps_id = ps_id;
append_item(si->items, item);
}
}
return TRUE;
}
/* Seeks to the correct place, and returns a handle or NULL on failure */
static item_t *
seek_to_record (OleSummary *si, OleSummaryPID id)
{
int i;
COLERRNO colerrno;
for (i = 0; i < si->items->index; i++) {
item_t *item = &item_at(si->items,i);
if (item->id == SUMMARY_ID(id)) {
gboolean is_summary;
is_summary = ((si->ps_id == OLE_PS_SUMMARY_INFO) &&
(item->ps_id == OLE_PS_SUMMARY_INFO));
if (is_summary) {
cole_fseek(si->s, item->offset, COLE_SEEK_SET, &colerrno);
return item;
}
}
}
return NULL;
}
char *
summary_get_string (OleSummary *si, OleSummaryPID id, gboolean *available)
{
uint8_t data[8];
uint32_t type, len;
char *ans;
item_t *item;
COLERRNO colerrno;
*available = FALSE;
if (!(item = seek_to_record (si, id)))
return NULL;
if (!cole_fread(si->s, data, 8, &colerrno))
return NULL;
type = GET_UINT32 (data);
len = GET_UINT32 (data + 4);
if (type != TYPE_STRING) {
printf("Summary string type mismatch\n");
return NULL;
}
ans = allocate_mem(char, len + 1);
if (!cole_fread(si->s, ans, len,&colerrno)) {
free (ans);
return NULL;
}
ans[len] = '\0';
*available = TRUE;
return ans;
}