/* pptHtml - Format a PowerPoint Presentation into Html Copyright 2002 Charles N Wyble This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* The code used to extract title, author, subject and keywords was based on original code from libcole2 written by Michael Meeks (michael@helixcode.com) Arturo Tena (arturo@directmail.org) */ #if !(defined( __BORLANDC__ ) || defined( __WIN32__ )) #include "config.h" /* Created by ./configure script */ #include "support.h" /* Needs to be before internal.h */ #include "internal.h" /* Needs to be before cole */ #include "cole.h" #else #include "config.h.in" /* Created by ./configure script */ #include "support.h" /* Needs to be before internal.h */ #include "internal.h" /* Needs to be before cole */ #include "cole.h.in" #include #endif #include "vector.h" #include #include /* For strcpy() */ #include /* For isprint */ #include /* For exitt() */ #include /* For MAX_PATH */ #include "ppthtml.h" /* For SummaryInfo things*/ #define PRGNAME "pptHtml" #if !(defined( __WIN32__ ) || defined( __BORLANDC__ )) #define PRGVER VERSION #else #define PRGVER "0.4" #endif #define WORK_SIZE 8192 static char FileName[2][32] = /* The section of the PowerPoint File we read */ { "/PowerPoint Document", /* Power Point 97 & 2000 */ "/PP40" /* Everything else ? */ }; #define BUFFER_SIZE 128 /* Function Prototypes */ COLE_LOCATE_ACTION_FUNC dump_file; static void container_processor(int); static void atom_processor(int, int, int, unsigned char); static void print_unicode(unsigned char *, int); static void print_utf8(unsigned short c); static void put_utf8(unsigned short c); static OleSummary *summary_open_stream(COLEFILE *cf, const PropertySetID psid); static void close_summary(OleSummary *si); static gboolean read_items(OleSummary *si, PropertySetID ps_id); static char *summary_get_string (OleSummary *si,OleSummaryPID id,gboolean *available); /* Global data */ static char filename[128]; static unsigned char working_buffer[WORK_SIZE]; static int buf_idx=0; static int output_this_container = 0; static int past_first_slide = 0; static int last_container = 0; static char *title; static char *author; static char *subject; static char *keywords; int main (int argc, char **argv) { int f_ptr = 0; COLEFS * cfs; COLEFILE * cf; COLERRNO colerrno; char buffer[BUFFER_SIZE]; size_t char_read; size_t char_read_total; gboolean ok; OleSummary *si; if (argc < 2) { fprintf (stderr, "pptHtml - Outputs Power Point files as Html.\n" "Usage: "PRGNAME" \n"); exit (1); } else { strncpy(filename, argv[1], 124); cfs = cole_mount (filename, &colerrno); if (cfs == NULL) { cole_perror (PRGNAME, colerrno); exit (1); } } cf = cole_fopen (cfs, "/\005SummaryInformation", &colerrno); if (!cf) { printf("ERROR-No Summary Information available for this document\n"); cole_umount (cfs, NULL); exit (1); } si = summary_open_stream(cf, OLE_PS_SUMMARY_INFO); if (!si) { printf ("Could not open SummaryInformation\n"); exit(1); } title = summary_get_string(si, OLE_SUMMARY_TITLE, &ok); /* if (ok) */ /* printf ("The title is %s\n", title); */ /* else */ /* printf ("no title found\n"); */ subject = summary_get_string (si, OLE_SUMMARY_SUBJECT, &ok); /* if (ok) */ /* printf ("The subject is %s\n", subject); */ /* else */ /* printf ("no subject found\n"); */ author =summary_get_string (si, OLE_SUMMARY_AUTHOR, &ok); /* if (ok) */ /* printf ("The author is %s\n", author); */ /* else */ /* printf ("no author found\n"); */ keywords = summary_get_string (si, OLE_SUMMARY_KEYWORDS, &ok); /* if (ok) */ /* printf ("The keywords are %s\n", keywords); */ /* else */ /* printf ("no keywords found\n"); */ cole_fclose (cf, &colerrno); while (cole_locate_filename (cfs, FileName[f_ptr], NULL, dump_file, &colerrno)) { if (f_ptr) { /* Two strikes...we're out! */ cole_perror (PRGNAME, colerrno); if (colerrno == COLE_EFILENOTFOUND) fprintf(stderr, "Section: PowerPoint Document\n"); break; } else /* Don't do this... */ f_ptr++; } free (title); free (subject); free (author); free (keywords); if (cole_umount (cfs, &colerrno)) { cole_perror ("travel", colerrno); exit (1); } return 0; } void dump_file(COLEDIRENT *cde, void *_info) { unsigned long version=0, instance=0, type=0, length=0, target=0, count=0; unsigned char buf[16]; COLEFILE *cf; COLERRNO err; cf = cole_fopen_direntry(cde, &err); /* Ouput Header */ printf("\n"); printf("%s\n", title); if(subject) printf("\n", subject); if(author) printf("\n", author); if(keywords) printf("\n", keywords); printf("\n"); /* Output body */ while (cole_fread(cf, buf, 1, &err)) { if (count == 0) { instance = buf[0]; type = 0; length = 0; target = 80; /* ficticious number */ } else if (count == 1) { instance |= (buf[0]<<8); version = instance &0x000F; instance = (instance>>4); } else if (count == 2) type = (unsigned)buf[0]; else if (count == 3) type |= (buf[0]<<8)&0x00000FFFL; else if (count == 4) length = (unsigned)buf[0]; else if (count == 5) length |= (buf[0]<<8); else if (count == 6) length |= (buf[0]<<16); else if (count == 7) { length |= (buf[0]<<24); target = length; if (version == 0x0F) { /* Do container level Processing */ container_processor(type); count = -1; } } if (count > 7) { /* Here is where we want to process the data based on the Atom type... */ atom_processor(type, count-8, target-1, buf[0]); } if (count == (target+7)) count = 0; else count++; } if (past_first_slide) printf("
"); printf(" 
\n"); /* Output Credit */ printf("
Created with pptHtml
\n" ); /* Output Tail */ printf("\n"); cole_fclose(cf, &err); } static void container_processor(int type) { if (type == 0x03EE) { if (past_first_slide) printf("


\n"); else past_first_slide = 1; } switch (type) { case 0x000D: if (last_container == 0x11) /* suppress notes info */ output_this_container = 0; else output_this_container = 1; break; case 0x0FF0: output_this_container = 1; break; default: /* printf("Cont:%x|\n", type); */ output_this_container = 0; break; } last_container = type; } static void atom_processor(int type, int count, int buf_last, unsigned char data) { if ((buf_idx >= WORK_SIZE)||(output_this_container == 0)) return; if (count == 0) { memset(working_buffer, 0, WORK_SIZE); buf_idx = 0; } switch (type) { case 0x0FA0: /* Text String in unicode */ working_buffer[buf_idx++] = data; if (count == buf_last) { /* printf("Atom:%x|\n", type); */ /* working_buffer[buf_idx++] = 0; */ /* printf("%s
\n", working_buffer); */ print_unicode(working_buffer, buf_idx); printf("
\n"); } break; case 0x0FA8: /* Text String in ASCII */ working_buffer[buf_idx++] = data; if (count == buf_last) { int i; /* working_buffer[buf_idx++] = 0; */ /* printf("Atom:%x|\n", type); */ for (i=0;i\n"); else putchar(working_buffer[i]); } printf("
\n"); } break; case 0x0FBA: /* CString - unicode... */ working_buffer[buf_idx++] = data; if (count == buf_last) { /* working_buffer[buf_idx++] = 0; */ /* printf("%s
\n", working_buffer); */ /* printf("Atom:%x|\n", type); */ print_unicode(working_buffer, buf_idx); printf("
\n"); } break; default: break; } } static void print_unicode(unsigned char *ucs, int len) { int i; for (i = 0; i < len; i += 2) print_utf8(ucs[i] | (ucs[i+1] << 8)); } static void OutputCharCorrected(unsigned char c) { switch (c) { /* Special char handlers here... */ case '\r': printf("
\n"); break; case 0x3C: printf("<"); break; case 0x3E: printf(">"); break; case 0x26: printf("&"); break; case 0x22: printf("""); break; /* Also need to cover 128-159 since MS uses this area... */ case 0x80: /* Euro Symbol */ printf("€"); break; case 0x82: /* baseline single quote */ printf("‚"); break; case 0x83: /* florin */ printf("ƒ"); break; case 0x84: /* baseline double quote */ printf("„"); break; case 0x85: /* ellipsis */ printf("…"); break; case 0x86: /* dagger */ printf("†"); break; case 0x87: /* double dagger */ printf("‡"); break; case 0x88: /* circumflex accent */ printf("ˆ"); break; case 0x89: /* permile */ printf("‰"); break; case 0x8A: /* S Hacek */ printf("Š"); break; case 0x8B: /* left single guillemet */ printf("‹"); break; case 0x8C: /* OE ligature */ printf("Œ"); break; case 0x8E: /* #LATIN CAPITAL LETTER Z WITH CARON */ printf("Ž"); break; case 0x91: /* left single quote ? */ printf("‘"); break; case 0x92: /* right single quote ? */ printf("’"); break; case 0x93: /* left double quote */ printf("“"); break; case 0x94: /* right double quote */ printf("”"); break; case 0x95: /* bullet */ printf("•"); break; case 0x96: /* endash */ printf("–"); break; case 0x97: /* emdash */ printf("—"); break; case 0x98: /* tilde accent */ printf("˜"); break; case 0x99: /* trademark ligature */ printf("™"); break; case 0x9A: /* s Haceks Hacek */ printf("š"); break; case 0x9B: /* right single guillemet */ printf("›"); break; case 0x9C: /* oe ligature */ printf("œ"); break; case 0x9F: /* Y Dieresis */ printf("Ÿ"); break; default: putchar(c); break; } } static void print_utf8(unsigned short c) { if (c == 0) return; if (c < 0x80) OutputCharCorrected(c); else if (c < 0x800) { putchar(0xC0 | (c >> 6)); put_utf8(c); } else { putchar(0xE0 | (c >> 12)); put_utf8(c >> 6); put_utf8(c); } } static void put_utf8(unsigned short c) { putchar(0x0080 | ((short)c & 0x003F)); } OleSummary * summary_open_stream (COLEFILE *cf, const PropertySetID psid) { uint8_t data[64]; uint16_t byte_order; gboolean panic=FALSE; uint32_t os_version; OleSummary *si; int i, sections; COLERRNO colerrno; if (cf == NULL){ printf("No cole file info available\n"); return NULL; } /* reading data */ cole_fread (cf, data, 28, &colerrno); si = malloc(sizeof(OleSummary)); si->sections = malloc(sizeof(ole_vector)); si->items = malloc(sizeof(item_vector)); si->s = cf; si->write_items = NULL; si->read_mode = TRUE; byte_order = GET_UINT16(data); if (byte_order != 0xfffe) panic = TRUE; if (GET_UINT16 (data + 2) != 0) /* Format */ panic = TRUE; os_version = GET_UINT32 (data + 4); for (i = 0; i < 16; i++) si->class_id[i] = data[8 + i]; sections = GET_UINT32 (data + 24); if (panic) { close_summary(si); return NULL; } create_ole_vector(si->sections); for (i = 0; i < sections; i++) { OleSummarySection sect; if (!cole_fread (cf, data, 16 + 4, &colerrno)){ close_summary(si); return NULL; } if (psid == OLE_PS_SUMMARY_INFO) { if (GET_UINT32 (data + 0) == sum_fmtid[0] && GET_UINT32 (data + 4) == sum_fmtid[1] && GET_UINT32 (data + 8) == sum_fmtid[2] && GET_UINT32 (data + 12) == sum_fmtid[3]) { si->ps_id = OLE_PS_SUMMARY_INFO; sect.ps_id = OLE_PS_SUMMARY_INFO; } else { close_summary(si); return NULL; } } sect.offset = GET_UINT32 (data + 16); append_ole(si->sections,sect); } create_item_vector(si->items); for (i = 0; i < sections; i++) { OleSummarySection st; st = ole_at(si->sections,i); if (!read_items(si, st.ps_id)) { printf("Serious error reading items\n"); close_summary(si); return NULL; } } return si; } void close_summary(OleSummary *si) { if(si->sections) free(si->sections); destroy_ole_vector(si->sections); si->sections = NULL; if(si->items) free(si->items); destroy_item_vector(si->items); si->items=NULL; free(si); } gboolean read_items (OleSummary *si, PropertySetID ps_id) { int sect; COLERRNO colerrno; for (sect = 0; sect < si->sections->index; sect++) { OleSummarySection st; uint8_t data[8]; int i; st = ole_at(si->sections,sect); if (st.ps_id != ps_id) continue; cole_fseek(si->s, st.offset, COLE_SEEK_SET, &colerrno); if (!cole_fread (si->s, data, 8, &colerrno)) return FALSE; st.bytes = GET_UINT32 (data); st.props = GET_UINT32 (data + 4); if (st.props == 0) continue; for (i = 0; i < st.props; i++) { item_t item; if (!cole_fread (si->s, data, 8, &colerrno)) return FALSE; item.id = GET_UINT32 (data); item.offset = GET_UINT32 (data + 4); item.offset = item.offset + st.offset; item.ps_id = ps_id; append_item(si->items, item); } } return TRUE; } /* Seeks to the correct place, and returns a handle or NULL on failure */ static item_t * seek_to_record (OleSummary *si, OleSummaryPID id) { int i; COLERRNO colerrno; for (i = 0; i < si->items->index; i++) { item_t *item = &item_at(si->items,i); if (item->id == SUMMARY_ID(id)) { gboolean is_summary; is_summary = ((si->ps_id == OLE_PS_SUMMARY_INFO) && (item->ps_id == OLE_PS_SUMMARY_INFO)); if (is_summary) { cole_fseek(si->s, item->offset, COLE_SEEK_SET, &colerrno); return item; } } } return NULL; } char * summary_get_string (OleSummary *si, OleSummaryPID id, gboolean *available) { uint8_t data[8]; uint32_t type, len; char *ans; item_t *item; COLERRNO colerrno; *available = FALSE; if (!(item = seek_to_record (si, id))) return NULL; if (!cole_fread(si->s, data, 8, &colerrno)) return NULL; type = GET_UINT32 (data); len = GET_UINT32 (data + 4); if (type != TYPE_STRING) { printf("Summary string type mismatch\n"); return NULL; } ans = allocate_mem(char, len + 1); if (!cole_fread(si->s, ans, len,&colerrno)) { free (ans); return NULL; } ans[len] = '\0'; *available = TRUE; return ans; }