Added some more fixes.

2002-05-15 01:58:35 +00:00
parent 1466e3d614
commit ddf4a5df21
12 changed files with 1085 additions and 391 deletions
--- a/xlhtml/ChangeLog
+++ b/xlhtml/ChangeLog
@@ -1,16 +1,22 @@
 							xlHtml ChangeLog

 HEAD
+	- Vaclav Dvorak:
 	  * Temporary fix for non-Unicode 8-bit characters in utf-8 output until
 	    real charset conversion is in place
 	  * Fixed some typos and formatting
 	  * Fixed closing of <FONT> tag
-	* Fixed raw Unicode (A.B.C.D., where . is character 0x00) in output
-	  when string in XLS begins as 8-bit but continues as 16-bit
 	  * Fixed invalid XML output - missing quotes aroud size attribute
 	    of <FONT>
 	  * Made rowspan and colspan attributes instead of content of <cell>
 	    in XML output
+	  * Updated doxygen.conf and in-source documentation and comments
+	  * Fixed SST code when a string on the boundary of a BIFF SST and
+	    CONTINUE starts as Unicode and continues as 8-bit or vice versa;
+	    indication: raw Unicode (A.b.c.d., where . is character 0x00) in
+	    output, or MaxStringsExceeded
+	  * Enabled compilation warnings, made sure there are none
+	  * More code modularization
 	* Added some Alpha portability fixes.  

 0.5 04/13/02
--- a/xlhtml/Makefile.am
+++ b/xlhtml/Makefile.am
@@ -12,9 +12,9 @@ LDADD = ../cole/libcole.a -lm
 xlhtml_SOURCES = support.c xlhtml.c html.c ascii.c xml.c 
 xldump_SOURCES = xldump.c 
 xlcdump_SOURCES = xlcdump.c 
-#AM_CFLAGS =   -Wall  -Wshadow -Wcast-align -Wpointer-arith 
+AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith

-doc:
+doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
 	@echo Generating documentation...
 	doxygen doxygen.conf
 	@echo done.
--- a/xlhtml/Makefile.in
+++ b/xlhtml/Makefile.in
@@ -109,6 +109,7 @@ xlcdump_LDADD = $(LDADD)
 xlcdump_DEPENDENCIES =  ../cole/libcole.a
 xlcdump_LDFLAGS = 
 SCRIPTS =  $(bin_SCRIPTS) $(noinst_SCRIPTS)
+AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith

 CFLAGS = @CFLAGS@
 COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
@@ -383,9 +384,7 @@ install-am install uninstall-am uninstall all-redirect all-am all \
 installdirs mostlyclean-generic distclean-generic clean-generic \
 maintainer-clean-generic clean mostlyclean distclean maintainer-clean

-#AM_CFLAGS =   -Wall  -Wshadow -Wcast-align -Wpointer-arith 
-
-doc:
+doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
 	@echo Generating documentation...
 	doxygen doxygen.conf
 	@echo done.
--- a/xlhtml/TODO
+++ b/xlhtml/TODO
@@ -1,5 +1,13 @@
-0.4 ?
-Fix nasty Unicode bug when very large shared string tables are encountereds
-Support String formulas
-Reduce memory footprint
-Support data types by using their format string
+- Update documentation
+- Implement real character set conversion (using iconv); there should be a
+  command-line option to set output charset and another to override input
+  charset. Default: auto-detection for both.
+- Define an XML DTD and stick to it. Look into the OpenOffice spreadsheet
+  format - we don't need two different spreadsheet XML formats...
+- Modularize code. Separate XLS reading into a library, make a frontend with
+  various output formats: XML, HTML, CSV...
+- Make main_line_processor()'s working buffer grow dynamically
+- Notes from Steve Grubb:
+  - Support String formulas
+  - Reduce memory footprint
+  - Support data types by using their format string
--- a/xlhtml/ascii.c
+++ b/xlhtml/ascii.c
@@ -1,21 +1,29 @@
+/*! \file ascii.c
+    \brief ASCII and CSV output for xlhtml
+*/
+
+/*
+   Copyright 2002  Charles N Wyble  <jackshck@yahoo.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published  by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+

 #include "xlhtml.h"
+#include "support.h"

-extern void do_cr(void);
-extern int  first_sheet;
-extern int  last_sheet;
-extern void OutputString(uni_string * );
-extern char filename[256];
-extern void output_cell( cell *, int); 
-extern int Csv;
-work_sheet **ws_array;
-xf_attr **xf_array;
-
-extern int IsCellNumeric(cell *);
-extern int IsCellSafe(cell *);
-extern int IsCellFormula(cell *);
-extern void output_formatted_data(uni_string *, U16, int, int);
-extern void SetupExtraction(void);


 void OutputPartialTableAscii(void)
--- a/xlhtml/doxygen.conf
+++ b/xlhtml/doxygen.conf
@@ -1,164 +1,732 @@
-# Doxyfile 0.1
+# Doxyfile 1.2.3
+
+# This file describes the settings to be used by doxygen for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")

 #---------------------------------------------------------------------------
 # General configuration options
 #---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
+# by quotes) that should identify the project. 
+
 PROJECT_NAME           = xlhtml
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
+# This could be handy for archiving the generated documentation or 
+# if some version control system is used.
+
 PROJECT_NUMBER         = 
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
+# base path where the generated documentation will be put. 
+# If a relative path is entered, it will be relative to the location 
+# where doxygen was started. If left blank the current directory will be used.
+
 OUTPUT_DIRECTORY       = doc
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
+# documentation generated by doxygen is written. Doxygen will use this 
+# information to generate all constant output in the proper language. 
+# The default language is English, other supported languages are: 
+# Dutch, French, Italian, Czech, Swedish, German, Finnish, Japanese, 
+# Korean, Hungarian, Spanish, Romanian, Russian, Croatian, Polish, 
+# Portuguese and Slovene.
+
 OUTPUT_LANGUAGE        = English
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
+# documentation are documented, even if no documentation was available. 
+# Private class members and static file members will be hidden unless 
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES 
+
 EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
+# will be included in the documentation. 
+
 EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file 
+# will be included in the documentation. 
+
 EXTRACT_STATIC         = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
+# undocumented members of documented classes, files or namespaces. 
+# If set to NO (the default) these members will be included in the 
+# various overviews, but no documentation section is generated. 
+# This option has no effect if EXTRACT_ALL is enabled. 
+
 HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
+# undocumented classes that are normally visible in the class hierarchy. 
+# If set to NO (the default) these class will be included in the various 
+# overviews. This option has no effect if EXTRACT_ALL is enabled. 
+
 HIDE_UNDOC_CLASSES     = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
+# include brief member descriptions after the members that are listed in 
+# the file and class documentation (similar to JavaDoc). 
+# Set to NO to disable this. 
+
 BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
+# the brief description of a member or function before the detailed description. 
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
+# brief descriptions will be completely suppressed. 
+
 REPEAT_BRIEF           = YES
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
+# Doxygen will generate a detailed section even if there is only a brief 
+# description. 
+
 ALWAYS_DETAILED_SEC    = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
+# path before files name in the file list and in the header files. If set 
+# to NO the shortest path that makes the file name unique will be used. 
+
 FULL_PATH_NAMES        = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
+# can be used to strip a user defined part of the path. Stripping is 
+# only done if one of the specified strings matches the left-hand part of 
+# the path. It is allowed to use relative paths in the argument list.
+
 STRIP_FROM_PATH        = 
+
+# The INTERNAL_DOCS tag determines if documentation 
+# that is typed after a \internal command is included. If the tag is set 
+# to NO (the default) then the documentation will be excluded. 
+# Set it to YES to include the internal documentation. 
+
 INTERNAL_DOCS          = NO
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
+# generate a class diagram (in Html and LaTeX) for classes with base or 
+# super classes. Setting the tag to NO turns the diagrams off. 
+
 CLASS_DIAGRAMS         = NO
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
+# be generated. Documented entities will be cross-referenced with these sources. 
+
 SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body 
+# of functions and classes directly in the documentation. 
+
 INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
+# doxygen to hide any special comment blocks from generated source code 
+# fragments. Normal C and C++ comments will always remain visible. 
+
 STRIP_CODE_COMMENTS    = YES
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
+# file names in lower case letters. If set to YES upper case letters are also 
+# allowed. This is useful if you have classes or files whose names only differ 
+# in case and if your file system supports case sensitive file names. Windows 
+# users are adviced to set this option to NO.
+
 CASE_SENSE_NAMES       = YES
-SHORT_NAMES            = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
+# will show members with their full class and namespace scopes in the 
+# documentation. If set to YES the scope will be hidden. 
+
 HIDE_SCOPE_NAMES       = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
+# will generate a verbatim copy of the header file for each class for 
+# which an include is specified. Set to NO to disable this. 
+
 VERBATIM_HEADERS       = NO
-SHOW_INCLUDE_FILES     = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
+# will put list of the files that are included by a file in the documentation 
+# of that file. 
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
+# will interpret the first line (until the first dot) of a JavaDoc-style 
+# comment as the brief description. If set to NO, the JavaDoc 
+# comments  will behave just like the Qt-style comments (thus requiring an 
+# explict @brief command for a brief description. 
+
 JAVADOC_AUTOBRIEF      = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
+# member inherits the documentation from any documented member that it 
+# reimplements. 
+
 INHERIT_DOCS           = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
+# is inserted in the documentation for inline members. 
+
 INLINE_INFO            = NO
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
+# will sort the (detailed) documentation of file and class members 
+# alphabetically by member name. If set to NO the members will appear in 
+# declaration order. 
+
 SORT_MEMBER_DOCS       = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
+# tag is set to YES, then doxygen will reuse the documentation of the first 
+# member in the group (if any) for the other members of the group. By default 
+# all members of a group must be documented explicitly.
+
 DISTRIBUTE_GROUP_DOC   = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
+# Doxygen uses this value to replace tabs by spaces in code fragments. 
+
 TAB_SIZE               = 8
-GENERATE_TODOLIST      = YES
-GENERATE_TESTLIST      = YES
-GENERATE_BUGLIST       = YES
-ALIASES                = 
+
+# The ENABLE_SECTIONS tag can be used to enable conditional 
+# documentation sections, marked by \if sectionname ... \endif. 
+
 ENABLED_SECTIONS       = 
-MAX_INITIALIZER_LINES  = 30
-OPTIMIZE_OUTPUT_FOR_C  = YES
-SHOW_USED_FILES        = YES
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or 
+# disable (NO) the todo list. This list is created by putting \todo 
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or 
+# disable (NO) the test list. This list is created by putting \test 
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# This tag can be used to specify a number of aliases that acts 
+# as commands in the documentation. An alias has the form "name=value". 
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
+# put the command \sideeffect (or @sideeffect) in the documentation, which 
+# will result in a user defined paragraph with heading "Side Effects:". 
+# You can put \n's in the value part of an alias to insert newlines. 
+
+ALIASES                = 
+
 #---------------------------------------------------------------------------
 # configuration options related to warning and progress messages
 #---------------------------------------------------------------------------
-QUIET                  = NO
+
+# The QUIET tag can be used to turn on/off the messages that are generated 
+# by doxygen. Possible values are YES and NO. If left blank NO is used. 
+
+QUIET                  = YES
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are 
+# generated by doxygen. Possible values are YES and NO. If left blank 
+# NO is used. 
+
 WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
+# automatically be disabled. 
+
 WARN_IF_UNDOCUMENTED   = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that 
+# doxygen can produce. The string should contain the $file, $line, and $text 
+# tags, which will be replaced by the file and line number from which the 
+# warning originated and the warning text. 
+
 WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning 
+# and error messages should be written. If left blank the output is written 
+# to stderr. 
+
 WARN_LOGFILE           = 
+
 #---------------------------------------------------------------------------
 # configuration options related to the input files
 #---------------------------------------------------------------------------
-INPUT                  = xlhtml.c
+
+# The INPUT tag can be used to specify the files and/or directories that contain 
+# documented source files. You may enter file names like "myfile.cpp" or 
+# directories like "/usr/src/myproject". Separate the files or directories 
+# with spaces. 
+
+INPUT                  = xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
+
+# If the value of the INPUT tag contains directories, you can use the 
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank all files are included. 
+
 FILE_PATTERNS          = 
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
+# should be searched for input files as well. Possible values are YES and NO. 
+# If left blank NO is used. 
+
 RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should 
+# excluded from the INPUT source files. This way you can easily exclude a 
+# subdirectory from a directory tree whose root is specified with the INPUT tag. 
+
 EXCLUDE                = 
+
+# If the value of the INPUT tag contains directories, you can use the 
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
+# certain files from those directories. 
+
 EXCLUDE_PATTERNS       = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or 
+# directories that contain example code fragments that are included (see 
+# the \include command). 
+
 EXAMPLE_PATH           = 
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank all files are included. 
+
 EXAMPLE_PATTERNS       = 
+
+# The IMAGE_PATH tag can be used to specify one or more files or 
+# directories that contain image that are included in the documentation (see 
+# the \image command). 
+
 IMAGE_PATH             = 
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should 
+# invoke to filter for each input file. Doxygen will invoke the filter program 
+# by executing (via popen()) the command <filter> <input-file>, where <filter> 
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
+# input file. Doxygen will then use the output that the filter program writes 
+# to standard output. 
+
 INPUT_FILTER           = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
+# INPUT_FILTER) will be used to filter the input files when producing source 
+# files to browse. 
+
 FILTER_SOURCE_FILES    = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
+# of all compounds will be generated. Enable this if the project 
+# contains a lot of classes, structs, unions or interfaces. 
+
 ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
+# in which this list will be split (can be a number in the range [1..20]) 
+
 COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all 
+# classes will be put under the same header in the alphabetical index. 
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
+# should be ignored while generating the index headers. 
+
 IGNORE_PREFIX          = 
+
 #---------------------------------------------------------------------------
 # configuration options related to the HTML output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# generate HTML output. 
+
 GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `html' will be used as the default path. 
+
 HTML_OUTPUT            = html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard header.
+
 HTML_HEADER            = 
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard footer.
+
 HTML_FOOTER            = 
+
+# The HTML_STYLESHEET tag can be used to specify a user defined cascading 
+# style sheet that is used by each HTML page. It can be used to 
+# fine-tune the look of the HTML output. If the tag is left blank doxygen 
+# will generate a default style sheet 
+
 HTML_STYLESHEET        = 
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
+# files or namespaces will be aligned in HTML using tables. If set to 
+# NO a bullet list will be used. 
+
 HTML_ALIGN_MEMBERS     = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
+# will be generated that can be used as input for tools like the 
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) 
+# of the generated HTML documentation. 
+
 GENERATE_HTMLHELP      = NO
-GENERATE_CHI           = NO
-BINARY_TOC             = NO
-TOC_EXPAND             = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
+# top of each HTML page. The value NO (the default) enables the index and 
+# the value YES disables it. 
+
 DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20]) 
+# that doxygen will group on one line in the generated HTML documentation. 
+
 ENUM_VALUES_PER_LINE   = 4
-GENERATE_TREEVIEW      = NO
-TREEVIEW_WIDTH         = 250
+
 #---------------------------------------------------------------------------
 # configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# generate Latex output. 
+
 GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `latex' will be used as the default path. 
+
 LATEX_OUTPUT           = latex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
+# LaTeX documents. This may be useful for small projects and may help to 
+# save some trees in general. 
+
 COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used 
+# by the printer. Possible values are: a4, a4wide, letter, legal and 
+# executive. If left blank a4wide will be used. 
+
 PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# packages that should be included in the LaTeX output. 
+
 EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
+# the generated latex document. The header should contain everything until 
+# the first chapter. If it is left blank doxygen will generate a 
+# standard header. Notice: only use this tag if you know what you are doing! 
+
 LATEX_HEADER           = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
+# contain links (just like the HTML output) instead of page references 
+# This makes the output suitable for online browsing using a pdf viewer. 
+
 PDF_HYPERLINKS         = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
+# plain latex in the generated Makefile. Set this option to YES to get a 
+# higher quality PDF documentation. 
+
 USE_PDFLATEX           = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
+# command to the generated LaTeX files. This will instruct LaTeX to keep 
+# running if errors occur, instead of asking the user for help. 
+# This option is also used when generating formulas in HTML. 
+
 LATEX_BATCHMODE        = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the RTF output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
+# The RTF output is optimised for Word 97 and may not look very pretty with 
+# other RTF readers or editors.
+
 GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `rtf' will be used as the default path. 
+
 RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
+# RTF documents. This may be useful for small projects and may help to 
+# save some trees in general. 
+
 COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
+# will contain hyperlink fields. The RTF file will 
+# contain links (just like the HTML output) instead of page references. 
+# This makes the output suitable for online browsing using a WORD or other. 
+# programs which support those fields. 
+# Note: wordpad (write) and others do not support links. 
+
 RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's 
+# config file, i.e. a series of assigments. You only have to provide 
+# replacements, missing definitions are set to their default value. 
+
 RTF_STYLESHEET_FILE    = 
-RTF_EXTENSIONS_FILE    = 
+
 #---------------------------------------------------------------------------
 # configuration options related to the man page output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# generate man pages 
+
 GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `man' will be used as the default path. 
+
 MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to 
+# the generated man pages (default is the subroutine's section .3) 
+
 MAN_EXTENSION          = .3
-MAN_LINKS              = NO
+
 #---------------------------------------------------------------------------
 # configuration options related to the XML output
 #---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will 
+# generate an XML file that captures the structure of 
+# the code including all documentation. Warning: This feature 
+# is still experimental and very incomplete.
+
 GENERATE_XML           = NO
+
 #---------------------------------------------------------------------------
 # Configuration options related to the preprocessor   
 #---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
+# evaluate all C-preprocessor directives found in the sources and include 
+# files. 
+
 ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
+# names in the source code. If set to NO (the default) only conditional 
+# compilation will be performed. Macro expansion can be done in a controlled 
+# way by setting EXPAND_ONLY_PREDEF to YES. 
+
 MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
+# then the macro expansion is limited to the macros specified with the 
+# PREDEFINED and EXPAND_AS_PREDEFINED tags. 
+
 EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# in the INCLUDE_PATH (see below) will be search if a #include is found. 
+
 SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that 
+# contain include files that are not input files but should be processed by 
+# the preprocessor. 
+
 INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
+# patterns (like *.h and *.hpp) to filter out the header-files in the 
+# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# be used. 
+
 INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that 
+# are defined before the preprocessor is started (similar to the -D option of 
+# gcc). The argument of the tag is a list of macros of the form: name 
+# or name=definition (no spaces). If the definition and the = are 
+# omitted =1 is assumed. 
+
 PREDEFINED             = 
+
+# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then 
+# this tag can be used to specify a list of macro names that should be expanded. 
+# The macro definition that is found in the sources will be used. 
+# Use the PREDEFINED tag if you want to use a different macro definition. 
+
 EXPAND_AS_DEFINED      = 
-SKIP_FUNCTION_MACROS   = YES
+
 #---------------------------------------------------------------------------
 # Configuration::addtions related to external references   
 #---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tagfiles. 
+
 TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
+# a tag file that is based on the input files it reads. 
+
 GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
+# in the class index. If set to NO only the inherited external classes 
+# will be listed. 
+
 ALLEXTERNALS           = NO
+
+# The PERL_PATH should be the absolute path and name of the perl script 
+# interpreter (i.e. the result of `which perl'). 
+
 PERL_PATH              = /usr/bin/perl
+
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool   
 #---------------------------------------------------------------------------
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
+# available from the path. This tool is part of Graphviz, a graph visualization 
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
+# have no effect if this option is set to NO (the default) 
+
 HAVE_DOT               = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect inheritance relations. Setting this tag to YES will force the 
+# the CLASS_DIAGRAMS tag to NO.
+
 CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect implementation dependencies (inheritance, containment, and 
+# class references variables) of the class with other documented classes. 
+
 COLLABORATION_GRAPH    = YES
-TEMPLATE_RELATIONS     = YES
+
+# If the ENABLE_PREPROCESSING, INCLUDE_GRAPH, and HAVE_DOT tags are set to 
+# YES then doxygen will generate a graph for each documented file showing 
+# the direct and indirect include dependencies of the file with other 
+# documented files. 
+
 INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, INCLUDED_BY_GRAPH, and HAVE_DOT tags are set to 
+# YES then doxygen will generate a graph for each documented header file showing 
+# the documented files that directly or indirectly include this file 
+
 INCLUDED_BY_GRAPH      = YES
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
+# will graphical hierarchy of all classes instead of a textual one. 
+
 GRAPHICAL_HIERARCHY    = YES
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be 
+# found. If left blank, it is assumed the dot tool can be found on the path. 
+
 DOT_PATH               = 
-DOTFILE_DIRS           = 
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width 
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
+# this value, doxygen will try to truncate the graph, so that it fits within 
+# the specified constraint. Beware that most browsers cannot cope with very 
+# large images. 
+
 MAX_DOT_GRAPH_WIDTH    = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height 
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
+# this value, doxygen will try to truncate the graph, so that it fits within 
+# the specified constraint. Beware that most browsers cannot cope with very 
+# large images. 
+
 MAX_DOT_GRAPH_HEIGHT   = 1024
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
+# generate a legend page explaining the meaning of the various boxes and 
+# arrows in the dot generated graphs. 
+
 GENERATE_LEGEND        = YES
-DOT_CLEANUP            = YES
+
 #---------------------------------------------------------------------------
 # Configuration::addtions related to the search engine   
 #---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be 
+# used. If set to NO the values of all tags below this one will be ignored. 
+
 SEARCHENGINE           = NO
+
+# The CGI_NAME tag should be the name of the CGI script that 
+# starts the search engine (doxysearch) with the correct parameters. 
+# A script with this name will be generated by doxygen. 
+
 CGI_NAME               = search.cgi
+
+# The CGI_URL tag should be the absolute URL to the directory where the 
+# cgi binaries are located. See the documentation of your http daemon for 
+# details. 
+
 CGI_URL                = 
+
+# The DOC_URL tag should be the absolute URL to the directory where the 
+# documentation is located. If left blank the absolute path to the 
+# documentation, with file:// prepended to it, will be used. 
+
 DOC_URL                = 
+
+# The DOC_ABSPATH tag should be the absolute path to the directory where the 
+# documentation is located. If left blank the directory on the local machine 
+# will be used. 
+
 DOC_ABSPATH            = 
+
+# The BIN_ABSPATH tag must point to the directory where the doxysearch binary 
+# is installed. 
+
 BIN_ABSPATH            = /usr/local/bin/
+
+# The EXT_DOC_PATHS tag can be used to specify one or more paths to 
+# documentation generated for other projects. This allows doxysearch to search 
+# the documentation for these projects as well. 
+
 EXT_DOC_PATHS          = 
--- a/xlhtml/html.c
+++ b/xlhtml/html.c
@@ -1,58 +1,37 @@
+/*! \file html.c
+    \brief HTML output for xlhtml
+*/
+
+/*
+   Copyright 2002  Charles N Wyble  <jackshck@yahoo.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published  by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+

 #include "xlhtml.h"
+#include "support.h"
 #include <stdio.h>



-
-extern void do_cr(void);
-extern int  center_tables;
-extern int  first_sheet;
-extern int  last_sheet;
-extern uni_string  default_font;
-extern void trim_sheet_edges(unsigned int);
-extern int  next_ws_title;
-extern void SetupExtraction(void);
-extern void update_default_font(unsigned int);
-extern void OutputString(uni_string * );
-extern int  default_fontsize;
-extern char *default_alignment; 
-extern int	aggressive;
-extern char *lastUpdated; 
-extern int  file_version;
-extern int  NoFormat;
-extern int  notAccurate;
-extern int  formula_warnings;
-extern int  NoHeaders;
-extern int  NotImplemented;
-extern int  Unsupported;
-extern int  MaxWorksheetsExceeded;
-extern int  MaxRowExceeded;
-extern int  MaxColExceeded;
-extern int  MaxStringsExceeded;
-extern int  MaxFontsExceeded;
-extern int  MaxPalExceeded;
-extern int  MaxXFExceeded;
-extern int  MaxFormatsExceeded;
-extern char colorTab[MAX_COLORS][8];
-extern char *default_text_color;
-extern char *default_background_color;
-extern char *default_image;
-extern char filename[256];
-extern int  UnicodeStrings;
-extern int  CodePage; 
-extern char	*title;
-extern void update_default_alignment(unsigned int, int);
-extern void output_cell( cell *, int); 
-extern uni_string author;
-extern int null_string(U8 *);
-extern unsigned int next_font; 
-work_sheet **ws_array;
-font_attr **font_array;
-
+/* prototypes for functions in this file */
 void output_header(void);
 void output_footer(void);

+
 void OutputTableHTML(void)
 {
 	int i, j, k;
--- a/xlhtml/support.c
+++ b/xlhtml/support.c
@@ -1,11 +1,34 @@
+/*! \file support.c
+    \brief Various support functions for xlhtml.
+*/

-/* Various support functions for xlhtml. */
+/*
+   Copyright 2002  Charles N Wyble  <jackshck@yahoo.com>

+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published  by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+
+
+#include "support.h"
 #include <stdio.h>
 #include <time.h>
 #include "../cole/cole.h"
 #include <stdlib.h>

+
+
 void print_version(void)
 {
 	printf("xlhtml %s \nCopyright (c) 1999-2002, Charles Wyble\n" 
@@ -182,8 +205,8 @@ void NumToDate(long num, int *year, int *month, int *day)
 	*year = *year % 100;
 }

-/* noaliasdub macro avoids trouble from gcc -O2 type-based alias analysis */
 typedef S32 swords[2];
+/*! avoids trouble from gcc -O2 type-based alias analysis */
 #define noaliasdub(type,ptr) \
  (((union{swords sw; F64 dub;} *)(ptr))->sw)

--- a/xlhtml/tuneable.h
+++ b/xlhtml/tuneable.h
@@ -1,7 +1,40 @@
+/*! \file tuneable.h
+    \brief Values for tuning performance of xlhtml
+*/

-#define XFORMATS_INCR		64		/*!< Increments to allocate extended formats */
-#define FONTS_INCR	 	32 		/*!< Increments to allocate fonts */
-#define WORKSHEETS_INCR		4		/*!< Increments to allocate worksheet pages */
-#define COLS_INCR		(U16)24 	/*!< Increments to allocate Columns per Worksheet page */
-#define ROWS_INCR 		(U32)128	/*!< Increments to allocate Rows per Worksheet page */
-#define STRINGS_INCR 		256UL		/*!< Increments to allocate the string array - */
+/*
+   Copyright 2002  Charles N Wyble  <jackshck@yahoo.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published  by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+
+
+/*! \brief Increments to allocate extended formats */
+#define XFORMATS_INCR		64
+
+/*! \brief Increments to allocate fonts */
+#define FONTS_INCR	 	32
+
+/*! \brief Increments to allocate worksheet pages */
+#define WORKSHEETS_INCR		4
+
+/*! \brief Increments to allocate Columns per Worksheet page */
+#define COLS_INCR		(U16)24
+
+/*! \brief Increments to allocate Rows per Worksheet page */
+#define ROWS_INCR 		(U32)128
+
+/*! \brief Increments to allocate the string array */
+#define STRINGS_INCR 		256UL
--- a/xlhtml/xlhtml.c
+++ b/xlhtml/xlhtml.c
@@ -1,7 +1,7 @@
 /*! \file xlhtml.c
-    \brief converts excel files to Html
+    \brief converts MS Excel files to useful formats

-   xlhtml generates HTML, XML, csv and tab-delimitted versions of Excel
+   xlhtml generates HTML, XML, csv and tab-delimited versions of MS Excel
   spreadsheets.
 */

@@ -21,11 +21,13 @@
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
-
 */

+
 #include "tuneable.h"
 #include "xlhtml.h"
+#include "support.h"
+


 static U16 HARD_MAX_ROWS = HARD_MAX_ROWS_97;
@@ -117,19 +119,6 @@ const char month_abbr[12][5] = {	"Jan", "Feb", "Mar", "Apr", "May", "June",

 /* Function Prototypes */

-/* These functions are in support.c */
-extern void print_version(void);
-extern void display_usage(void);
-extern void do_cr(void);
-extern void OutputTableHTML(void);
-extern S32 getLong(U8 *);
-extern U16 getShort(U8 *);
-extern void getDouble(U8 *, F64 *);
-extern int null_string(U8 *);
-extern void FracToTime(U8 *, int *, int *, int *, int *);
-extern void NumToDate(long, int *, int *, int *);
-extern void RKtoDouble(S32, F64 *);
-
 /* This function is in xml.c */
 extern void OutputTableXML(void);

@@ -137,6 +126,7 @@ extern void OutputTableXML(void);
 void OutputPartialTableAscii(void);

 /* These functions are in html.c */
+extern void OutputTableHTML(void);
 extern void output_start_html_attr(html_attr *h, unsigned int, int);
 extern void output_end_html_attr(html_attr *h);
 extern void output_footer(void);
@@ -144,21 +134,11 @@ extern void output_header(void);

 COLE_LOCATE_ACTION_FUNC scan_file;
 void main_line_processor(U16, U16, U32, U16, U8);
-void SetupExtraction(void);
 void decodeBoolErr(U16, U16, char *);
-int IsCellNumeric(cell *);
-int IsCellSafe(cell *);
-int IsCellFormula(cell *);
-void output_cell(cell *, int);
-void output_formatted_data(uni_string *, U16, int, int);
 void PrintFloatComma(char *, int, F64);
 void print_as_fraction(F64, int);
-void trim_sheet_edges(unsigned int);
-void update_default_font(unsigned int);
 void incr_f_cnt(uni_string *);
 int get_default_font(void);
-void update_default_alignment(unsigned int, int);
-void OutputString(uni_string *);
 void OutputCharCorrected(U8);
 void update_crun_info(U16 *loc, U16 *fnt_idx, U16 crun_cnt, U8 *fmt_run);
 void put_utf8(U16);
@@ -187,15 +167,7 @@ char filename[256];
 int file_version = 0;
 U32 next_string=0;
 unsigned int next_font=0, next_ws_title=0, next_xf=0;
-U8 working_buffer[WBUFF_SIZE];
-unsigned int bufidx, buflast;	/*!< Needed for working buffer */
-U8 grbit=0;			/*!< Needed by the SST Opcode FC */
-U16 crun=0, cch=0;               /*!< Needed by the SST Opcode FC */
-U32 extrst=0;			/*!< Needed by the SST Opcode FC */
-U16 nonascii = 0;		/*!< Needed by the SST Opcode FC */
 int sheet_count=-2;		/*!< Number of worksheets found */
-U16 last_opcode = -1;		/*!< Used for the continue command */
-unsigned int cont_grbit=0, cont_str_array=0;
 uni_string default_font;		/*!< Font for table */
 int default_fontsize = 3;	/*!< Default font size for table */
 char *default_alignment = 0;	/*!< Alignment for table */
@@ -240,7 +212,7 @@ int OutputXML = 0;		/*!< Output as xml */
 int DumpPage = 0;		/*!< Dump page count & max cols & rows */
 int Xtract = 0;			/*!< Extract a range on a page. */
 int MultiByte = 0;		/*!< Output as multibyte */
-int NoHeaders = 0;		/*!< Dont output html header */
+int NoHeaders = 0;		/*!< Don't output html header */


 /* Some Global Flags */
@@ -553,15 +525,10 @@ int main (int argc, char **argv)



-
-
-
-
-
 void scan_file(COLEDIRENT *cde, void *_info)
 {
 	U32 count=0;
-	U16 length=0, target=0, opcode=0, version=0;
+	U16 target=0, opcode=0, version=0;
 	U8 buf[16];
 	COLEFILE *cf;
 	COLERRNO err;
@@ -576,28 +543,27 @@ void scan_file(COLEDIRENT *cde, void *_info)
 	/* Read & process the file... */
 	while (cole_fread(cf, buf, 1, &err))
 	{
+		/* The BIFF record format:
+		 *   - byte 0: opcode
+		 *   - byte 1: version (?)
+		 *   - bytes 2 and 3: 16-bit data length
+		 *   - from byte 4: record data
+		 */
 		if (count > 3)
 			main_line_processor(opcode, version, count-4, target, buf[0]);
 		else if (count == 0)
-		{	/* Init everything */
-			length = 0;
 			opcode = (U16)buf[0];
-			target = 80;	/* ficticious number */
-		}
 		else if (count == 1)
 			version = (U16)buf[0];
 		else if (count == 2)
-			length = (U16)buf[0];
+			target = (U16)buf[0];
 		else if (count == 3)
-		{
-			length |= (U16)(buf[0]<<8);
-			target = length;
-		}
-
-		if (count == (U32)(target+3))
+			target |= (U16)(buf[0]<<8);
+		if ((count >= 3) && (count == (U32)target + 3))
 			count = 0;
 		else
 			count++;
+
 		if (MaxColExceeded || MaxRowExceeded || MaxWorksheetsExceeded)
 			break;	/* We're outta memory and therefore...done */

@@ -648,10 +614,6 @@ void scan_file(COLEDIRENT *cde, void *_info)



-
-
-
-
 void SetupExtraction(void)
 {
 	if (Xtract)
@@ -725,29 +687,41 @@ void SetupExtraction(void)
 }


-/*!******************************************************************
-*	\param count	the absolute count in the record
-*	\param last	the size of the record
-*	\param bufidx	the index into the working buffer
-*	\param buflast	the expected length of the working buffer
-********************************************************************/
+
+/*!
+ *	This function gets called once for every single byte of record data.
+ *	\brief Process the record data.
+ *	\param opcode	the opcode of the current BIFF record
+ *	\param version	the version (?) of the current BIFF record
+ *	\param count	the absolute offset in the record data
+ *	\param last	the size of the record data
+ *	\param data	the character being processed
+ */
 void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
 {
-	U16 cont_opcode = 0;
+	int cont_opcode = 0;			/* is this the CONTINUE opcode? */
+	static U16 last_opcode = -1;		/* holds last non-CONTINUE opcode */
+	static U8 working_buffer[WBUFF_SIZE];
+	static unsigned int bufidx = 0;		/* the index into the working buffer */
+	static unsigned int buflast = 0;	/* the expected length of the working buffer */
+	/* Needed by the SST Opcode 0xFC: */
+	static U8 str_options = 0;
+	static int buf_16bit = 0;
+	static int now_16bit = 0;
+	static int after_str_header = 0;
+	static U16 num_chars = 0;
+	static U16 num_fmt_runs = 0;
+	static U32 fareast_bytes = 0;
+	static U16 nonascii = 0;

-	/* If first pass, reset stuff. */
+	/* On start of record, reset stuff. */
 	if (count == 0)
 	{
-		if (opcode != 0x3C)	/* continue command */
-/*		{
-			printf("\n* * * * * * CONTINUE * * * * * * * * *\n\n");
-		}
-		else */
-		{	/* Normal path... */
+		if (opcode != 0x3C)	/* not CONTINUE opcode */
+		{
 			last_opcode = opcode;
 			bufidx = 0;
 			buflast = 0;
-			cont_str_array = 0;
 			memset(working_buffer, 0, WBUFF_SIZE);
 		}
 	}
@@ -760,8 +734,10 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
 	/* Abort processing if too big. Next opcode will reset everything. */
 	if (bufidx >= WBUFF_SIZE)
 	{
-		/*printf("OC:%02X C:%04X I:%04X BL:%04X cch:%04X gr:%04X\n", opcode, count, bufidx, buflast, cch, grbit); */
-		/*abort(); */
+		/* this will be printed many times; leave it this way since it's temporary
+		 * anyway - the buffer must be made dynamic
+		 */
+		fprintf(stderr, "Warning: working buffer overflow!\n");
 		return;
 	}

@@ -906,13 +882,13 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
 			{    /* Remember, bufidx is 1 more than it should be */
 				if ((bufidx == 8)&&(buflast == 0))
 				{	/* buflast = working_buffer[7]; */
-					cch = getShort(&working_buffer[6]);
-					buflast = cch + 9;
+					num_chars = getShort(&working_buffer[6]);
+					buflast = num_chars + 9;
 				}
 				if (bufidx == 9)
 				{
 					if (working_buffer[8] == 1)
-						buflast = (cch << 1) + 9;
+						buflast = (num_chars << 1) + 9;
 				}
 				if (buflast)
 				{
@@ -933,7 +909,7 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
 						if (working_buffer[8] == 1)
 						{
 							UnicodeStrings = 2;
-							add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(cch << 1), 0, 0);
+							add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(num_chars << 1), 0, 0);
 						}
 						else
 							add_wb_array(r, c, f, opcode, (U16)0, &working_buffer[8], len, 0, 0);
@@ -965,134 +941,157 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
 				}
 			}
 			break;
-			/************
-			*	This function has 2 entry points. 1 is the mainline FC opcode.
-			*	In this event there are several bytes that setup the type of
-			*	strings that will follow. Then there is the continue entry
-			*	point which is immediate - e.g location 0.
-			*************/
-		case 0xFC:	/* Packed String Array A.K.A. SST Shared String Table...UNI */
-			if ((count > 7)||(cont_opcode == 1)) /* Skip the 1st 8 locations they are bs */
+		case 0xFC:	/* Packed String Array A.K.A. SST - Shared String Table */
+			/* Format of the SST:
+			 *     - bytes 0-3:   total number of strings in the workbook (32-bit int)
+			 *     - bytes 4-7:   number of strings following this header (32-bit int)
+			 *     - from byte 8: strings; count is in the preceding int
+			 * Format of the individual strings:
+			 *     - bytes 0-1: LN = number of characters (not bytes!) in the string (16-bit int)
+			 *     - byte 2:    option flags (see below)
+			 *     - from byte 3:
+			 *         if rich-text flag set:
+			 *             - RT = number of rich-text formatting runs (16-bit int)
+			 *         if far-east flag set:
+			 *             - FE = far-east data size (32-bit int)
+			 *         LN characters, either 8-bit or 16-bit each
+			 *         if rich-text flag set:
+			 *             - RT-times repeated:
+			 *                 - first formatted character, zero-based (16-bit int)
+			 *                 - index to font record (16-bit int)
+			 *         if far-east flag set:
+			 *             - unknown extended data about phonetic, keyboard etc. (FE bytes)
+			 * Option flags (byte 2 in string) is a bit field:
+			 *     - bit 0 (01h): 0 = 8-bit characters,        1 = 16-bit characters
+			 *     - bit 2 (04h): 0 = no far-east info,        1 = contains far-east info
+			 *     - bit 3 (08h): 0 = no rich-text formatting, 1 = contains rich-text formatting
+			 *
+			 * The CONTINUE opcode has a special case for the SST: when a string
+			 * is split into two records, there is one additional byte at the start
+			 * of the second record, which indicates (like the option flags at the
+			 * start of the string) with its bit 0 whether the continuation has
+			 * 8-bit or 16-bit characters. Thus, the strings can start with 8-bit
+			 * characters and continue with 16-bit characters, or vice versa.
+			 */
+			if ((count == 0) && !cont_opcode)
+			{	/* initialize variables */
+				bufidx = 0;
+				after_str_header = 0;
+			}
+			if ((count < 8) && !cont_opcode)
+				/*! \todo For now, ignore the SST header. Ideally, we should
+				 *  process at most the number of strings that is stored in
+				 *  the header, ignoring the rest. Fortunately, the case when
+				 *  this would be necessary doesn't seem to occur.
+				 */
+				break;
+			if ((count == 0) && cont_opcode && after_str_header)
 			{
-/*				if ((count == 0)&&(data == 0)&&(buflast))	*/
-				if ((count == 0)&&(cont_opcode == 1)&&(buflast))
-				{
-/*					printf("Adjusting...\n"); */
-/*					printf("I:%04X BL:%04X\n", bufidx, buflast); */
-					cont_str_array = 1;
-					cont_grbit = data;
-					if ((cont_grbit & 0x01) && !(grbit & 0x01))
+				now_16bit = data & 0x01;
+				if (now_16bit && !buf_16bit)
 				{	/* previous chunk was not Unicode but this one is */
 					int i;
-						grbit |= 0x01;
+					/* we will be doubling the buffer contents
+					 *   => check if there is space
+					 */
+					if (2*buflast >= WBUFF_SIZE)
+					{
+						bufidx = WBUFF_SIZE + 1;
+						break;
+					}
+					if (bufidx >= num_chars)
+					{	/* weird - we got a unicode flag, but we're
+						 * actually already done with the string data
+						 */
+						now_16bit = 0;
+					}
+					else
+					{
+						buf_16bit = 1;
+						if (bufidx > 0)
 							for (i = bufidx-1; i >= 0; i--) {
 								working_buffer[2*i] = working_buffer[i];
 								working_buffer[2*i+1] = 0;
 							}
 						bufidx = 2 * bufidx;
-						buflast = 2 * buflast;
+						buflast = 2*num_chars + 4*num_fmt_runs + fareast_bytes;
 					}
-					return;
+				}
+				break;
 			}

-				working_buffer[bufidx] = data;
-				bufidx++;
+			working_buffer[bufidx++] = data;
+			if (buf_16bit && !now_16bit && after_str_header && (bufidx < (2*num_chars)))
+				/* ASCII -> unicode */
+				working_buffer[bufidx++] = 0;

-				if((cont_str_array)&&(grbit & 0x01)&& !(cont_grbit & 0x01))
-				{	/* ASCII -> unicode */
-					working_buffer[bufidx] = 0;
-					bufidx++;
-				}
-
-				if (buflast == 0)	/* Header processor */
+			if (! after_str_header)		/* Header processor */
 			{
 				if (bufidx == 0x03)	/* After 3 locations we have length */
-					{				    /* and type of chars... */
-						cch = getShort(&working_buffer[0]);
-						grbit = working_buffer[2];
-
-						if (grbit < 0x04)	/* Normal run */
-						{
-							nonascii = 0;
-							bufidx = 0;
-							crun = 0;
-							extrst = 0;
-							buflast = cch << (grbit & 0x01);
-
-							/* special case for empty strings */
-							if (!cch && !buflast)
+				{			/* and option flags... */
+					num_chars = getShort(&working_buffer[0]);
+					str_options = working_buffer[2];
+					now_16bit = buf_16bit = str_options & 0x01;
+				}
+				if ((bufidx == 0x03) && ((str_options & 0x0C) == 0))
+				{	/* Normal run */
+					num_fmt_runs = 0;
+					fareast_bytes = 0;
+					buflast = num_chars << buf_16bit;
+					after_str_header = 1;
+				}
+				else if ((bufidx == 0x05) && ((str_options & 0x0C) == 0x08))
+				{	/* Rich-text formatted string only */
+					num_fmt_runs = getShort(&working_buffer[3]);
+					fareast_bytes = 0;
+					buflast = (num_chars << buf_16bit) + (num_fmt_runs*4);
+					after_str_header = 1;
+				}
+				else if ((bufidx == 0x07) && ((str_options & 0x0C) == 0x04))
+				{	/* Extended (Far-East) string only */
+					num_fmt_runs = 0;
+					fareast_bytes = getLong(&working_buffer[3]);
+					buflast = (num_chars << buf_16bit) + fareast_bytes;
+					after_str_header = 1;
+				}
+				else if ((bufidx == 0x09) && ((str_options & 0x0C) == 0x0C))
+				{	/* Rich-text formatted string + Extended (Far-East) string */
+					num_fmt_runs = getShort(&working_buffer[3]);
+					fareast_bytes = getLong(&working_buffer[5]);
+					buflast = (num_chars << buf_16bit) + fareast_bytes + (num_fmt_runs*4);
+					after_str_header = 1;
+				}
+				else if (bufidx >= 0x09)
+				{	/* Houston, we have a problem. This should never happen. */
+					fprintf(stderr, "An error in SST processing occured. Please contact the author.\n");
+					bufidx = WBUFF_SIZE + 1;	/* simulate buffer overflow to skip rest of opcode */
+					break;
+				}
+				if (after_str_header)
+				{	/* the string data is now starting */
+					if (buflast == 0)
+					{	/* special case for empty strings */
 						add_str_array(0, (U8 *)0, 0, 0, 0);
+						after_str_header = 0;
+					}
 					else
+					{
 						memset(working_buffer, 0, WBUFF_SIZE);
-						}
-					}
-					else if (bufidx == 0x05)
-					{
-						if ((grbit & 0x0C) == 0x08)	/* Rich string only */
-						{
 						nonascii = 0;
+					}
 					bufidx = 0;
-							crun = getShort(&working_buffer[3]);
-							extrst = 0;
-							buflast = (cch << (grbit & 0x01)) + (crun*4);
-/*							printf("rtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",
-										buflast, cch, grbit, extrst, crun, last);
-							printf("%02X %02X %02X %02X %02X %02X\n",
-							working_buffer[0], working_buffer[1], working_buffer[2],
-							working_buffer[3], working_buffer[4], working_buffer[5]); */
-							memset(working_buffer, 0, WBUFF_SIZE);
 				}
 			}
-					else if (bufidx == 0x07)
-					{
-						if ((grbit & 0x0C) == 0x04)	/* Extended string only */
-						{
-							nonascii = 0;
-							bufidx = 0;
-							crun = 0;
-							extrst = getLong(&working_buffer[3]);
-							buflast = (cch << (grbit & 0x01)) + extrst;
-/*							printf("esbuflast:%X cch%X grbit:%X extrst:%X last:%X\n",
-										buflast, cch, grbit, extrst, last);
-							printf("%02X %02X %02X %02X %02X %02X\n",
-							working_buffer[0], working_buffer[1], working_buffer[2],
-							working_buffer[3], working_buffer[4], working_buffer[5]); */
-							memset(working_buffer, 0, WBUFF_SIZE);
-						}
-					}
-					else if (bufidx == 0x09)
-					{
-						if ((grbit & 0x0C) == 0x0C)
-						{
-							/* Rich String + Extended String **/
-							nonascii = 0;
-							bufidx = 0;
-							crun = getShort(&working_buffer[3]);
-							extrst = getLong(&working_buffer[5]);
-							buflast = (cch << (grbit & 0x01)) + extrst + (crun*4);
-/*							printf("xrtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",
-										buflast, cch, grbit, extrst, crun, last);
-							printf("%02X %02X %02X %02X %02X %02X\n",
-							working_buffer[0], working_buffer[1], working_buffer[2],
-							working_buffer[3], working_buffer[4], working_buffer[5]); */
-							memset(working_buffer, 0, WBUFF_SIZE);
-						}
-					}
-/*					printf("*%02X ", data); */
-				}
 			else	/* payload processor */
 			{
-/*					if (cont_opcode == 1)
-						printf(" %02X", data); */
 				if (data > 127)
 					nonascii = 1;
 				if (bufidx == buflast)
 				{
 					U8 uni;
-						U16 len = (U16)(cch << (grbit & 0x01));
-/*						int i;	*/
+					U16 len = (U16)(num_chars << buf_16bit);

-						if (grbit & 01)
+					if (buf_16bit)
 					{
 						uni = 2;
 						UnicodeStrings = 2;
@@ -1100,27 +1099,18 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
 					else
 						uni = nonascii;
 					working_buffer[bufidx] = 0;
-/*  						fprintf(stderr,":buflast-"); */
-/*                                                  { int i; */
-/*  						for (i=0; i<buflast; i++) */
-/*                                                    putchar(working_buffer[i]); */
-/*  						fprintf(stderr,"\nNext String:%d\n", next_string); */
-/*                                                  } */

-						if (crun)
-							add_str_array(uni, working_buffer, len, working_buffer+len, crun);
+					if (num_fmt_runs)
+						add_str_array(uni, working_buffer, len, working_buffer+len, num_fmt_runs);
 					else
 						add_str_array(uni, working_buffer, len, 0, 0);
 					if (uni > UnicodeStrings)	/* Try to "upgrade" charset */
 						UnicodeStrings = uni;
 					bufidx = 0;
-						buflast = 0;
-						cch = 0;
-						cont_str_array = 0;
+					after_str_header = 0;
 					memset(working_buffer, 0, WBUFF_SIZE);
 				}
 			}
-			}
 			break;
 		case 0xFD:	/* String Array Index A.K.A. LABELSST */
 			working_buffer[count] = data;
@@ -1806,9 +1796,6 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)



-
-
-
 /*! returns 1 on error, 0 on success */
 int ws_init(int i)
 {
@@ -1841,6 +1828,8 @@ int ws_init(int i)
 	return 0;
 }

+
+
 /*! returns 1 on error, 0 on success */
 int add_more_worksheet_ptrs(void)
 {
@@ -1877,6 +1866,8 @@ int add_more_worksheet_ptrs(void)
 	return 0;
 }

+
+
 int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols)
 {
 	cell **tc_array;
@@ -1908,6 +1899,8 @@ int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols)
 	return 0;
 }

+
+
 void add_wb_array(U16 r, U16 c, U16 xf, U16 type, U8 uni,
 					U8 *str, U16 len, U16 crun_cnt, U8 *fmt_run)
 {
--- a/xlhtml/xlhtml.h
+++ b/xlhtml/xlhtml.h
@@ -1,3 +1,30 @@
+/*! \file xlhtml.h
+    \brief Header file for xlhtml
+*/
+
+/*
+   Copyright 2002  Charles N Wyble  <jackshck@yahoo.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published  by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+
+
+#ifndef __XLHTML_H_INCLUDED
+#define __XLHTML_H_INCLUDED
+
+

 #if !(defined( __BORLANDC__ ) || defined( __WIN32__ ))
 #include "config.h"		/* Created by ./configure script */
@@ -40,7 +67,8 @@
 #define GLOBAL_UMASK (2)
 #endif

-typedef struct		/*!< This encapsulates the Unicode String	*/
+/*! \brief This encapsulates the Unicode String	*/
+typedef struct
 {
 	U8 uni;		/*!< Unicode String: 0==ASCII/8859-1, 1==windows-1252, 2==utf-8 */
 	U8 *str;	/*!< Characters of string */
@@ -49,7 +77,8 @@ typedef struct		/*!< This encapsulates the Unicode String	*/
 	U8 crun_cnt;	/*!< The count of format runs */
 } uni_string;

-typedef struct 		/*!< This is everything we need for a cell */
+/*! \brief This is everything we need for a cell */
+typedef struct
 {
 	U16 xfmt;		/*!< The high bit will tell us which version 0 =< 2; 1 == 2+ */
 	U16 type;		/*!< This will record the record type that generated the cell */
@@ -60,7 +89,8 @@ typedef struct 		/*!< This is everything we need for a cell */
 	uni_string h_link;	/*!< If a hyperlinked cell, this is the link*/
 } cell;

-typedef struct	/*!< This encapsulates some information about each worksheet */
+/*! \brief This encapsulates some information about each worksheet */
+typedef struct
 {
 	U32 first_row;
 	S32 biggest_row;
@@ -73,7 +103,8 @@ typedef struct	/*!< This encapsulates some information about each worksheet */
 	U16 spanned;
 } work_sheet;

-typedef struct	/*!< This is everything we need to know about fonts */
+/*! \brief This is everything we need to know about fonts */
+typedef struct
 {
 	U16 size;
 	U16 attr;
@@ -90,7 +121,8 @@ typedef struct
 	U16 cnt;
 } fnt_cnt;

-typedef struct		/*!< This covers the Extended Format records */
+/*! \brief This covers the Extended Format records */
+typedef struct
 {
 	U16 fnt_idx;
 	U16 fmt_idx;
@@ -103,7 +135,8 @@ typedef struct		/*!< This covers the Extended Format records */
 	U16 cell_color;
 } xf_attr;

-typedef struct		/*!< HTML Attribute */
+/*! \brief HTML Attribute */
+typedef struct
 {
 	int fflag;		/*!< Font Flag */
 	int bflag;		/*!< Bold Flag */
@@ -115,3 +148,57 @@ typedef struct		/*!< HTML Attribute */
 } html_attr;


+extern int first_sheet;
+extern int last_sheet;
+extern char filename[256];
+extern char *default_text_color;
+extern char *default_background_color;
+extern char *default_image;
+extern int aggressive;
+extern int center_tables;
+extern int NoHeaders;
+extern int formula_warnings;
+extern int Csv;
+extern xf_attr **xf_array;
+extern work_sheet **ws_array;
+extern font_attr **font_array;
+extern uni_string default_font;
+extern unsigned int next_font;
+extern unsigned int next_ws_title;
+extern int default_fontsize;
+extern char *default_alignment;
+extern char *title;
+extern uni_string author;
+extern char *lastUpdated;
+extern int file_version;
+extern char colorTab[MAX_COLORS][8];
+extern int NoFormat;
+extern int notAccurate;
+extern int NotImplemented;
+extern int Unsupported;
+extern int MaxPalExceeded;
+extern int MaxXFExceeded;
+extern int MaxFormatsExceeded;
+extern int MaxColExceeded;
+extern int MaxRowExceeded;
+extern int MaxWorksheetsExceeded;
+extern int MaxStringsExceeded;
+extern int MaxFontsExceeded;
+extern int UnicodeStrings;
+extern int CodePage;
+
+
+void OutputString (uni_string *);
+void output_cell (cell *, int);
+int IsCellNumeric (cell *);
+int IsCellSafe (cell *);
+int IsCellFormula (cell *);
+void output_formatted_data (uni_string *, U16, int, int);
+void SetupExtraction (void);
+void trim_sheet_edges (unsigned int);
+void update_default_font (unsigned int);
+void update_default_alignment (unsigned int, int);
+
+
+
+#endif
--- a/xlhtml/xml.c
+++ b/xlhtml/xml.c
@@ -1,38 +1,28 @@
+/*! \file xml.c
+    \brief XML output for xlhtml
+*/
+
+/*
+   Copyright 2002  Charles N Wyble  <jackshck@yahoo.com>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published  by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ */
+

 #include "xlhtml.h"

-extern int  first_sheet;
-extern int  last_sheet;
-extern uni_string  default_font;
-extern void trim_sheet_edges(unsigned int);
-extern int  next_ws_title;
-extern void SetupExtraction(void);
-extern void update_default_font(unsigned int);
-extern void OutputString(uni_string * );
-extern char *lastUpdated; 
-extern int  file_version;
-extern int  NoFormat;
-extern int  notAccurate;
-extern int  formula_warnings;
-extern int  NotImplemented;
-extern int  Unsupported;
-extern int  MaxWorksheetsExceeded;
-extern int  MaxRowExceeded;
-extern int  MaxColExceeded;
-extern int  MaxStringsExceeded;
-extern int  MaxFontsExceeded;
-extern int  MaxPalExceeded;
-extern int  MaxXFExceeded;
-extern int  MaxFormatsExceeded;
-extern char colorTab[MAX_COLORS];
-extern char filename[256];
-extern int  UnicodeStrings;
-extern char	*title;
-extern void update_default_alignment(unsigned int, int);
-extern void output_cell( cell *, int); 
-extern uni_string author;
-
-work_sheet **ws_array;


 void OutputTableXML(void)
@@ -87,8 +77,8 @@ void OutputTableXML(void)
 		}

 		printf( "\t\t\t<firstrow>%ld</firstrow>\n", (unsigned long)ws_array[i]->first_row );
-		printf( "\t\t\t<lastrow>%ld</lastrow>\n", (int) ws_array[i]->biggest_row );
-		printf( "\t\t\t<firstcol>%d</firstcol>\n", (long) ws_array[i]->first_col );
+		printf( "\t\t\t<lastrow>%d</lastrow>\n", (int) ws_array[i]->biggest_row );
+		printf( "\t\t\t<firstcol>%ld</firstcol>\n", (long) ws_array[i]->first_col );
 		printf( "\t\t\t<lastcol>%d</lastcol>\n", (int)ws_array[i]->biggest_col );
 		printf( "\t\t\t<rows>\n" );