diff --git a/xlhtml/ChangeLog b/xlhtml/ChangeLog
index 4b844b0..5205d49 100644
--- a/xlhtml/ChangeLog
+++ b/xlhtml/ChangeLog
@@ -1,16 +1,22 @@
xlHtml ChangeLog
HEAD
- * Temporary fix for non-Unicode 8-bit characters in utf-8 output until
- real charset conversion is in place
- * Fixed some typos and formatting
- * Fixed closing of tag
- * Fixed raw Unicode (A.B.C.D., where . is character 0x00) in output
- when string in XLS begins as 8-bit but continues as 16-bit
- * Fixed invalid XML output - missing quotes aroud size attribute
- of
- * Made rowspan and colspan attributes instead of content of
- in XML output
+ - Vaclav Dvorak:
+ * Temporary fix for non-Unicode 8-bit characters in utf-8 output until
+ real charset conversion is in place
+ * Fixed some typos and formatting
+ * Fixed closing of tag
+ * Fixed invalid XML output - missing quotes aroud size attribute
+ of
+ * Made rowspan and colspan attributes instead of content of
+ in XML output
+ * Updated doxygen.conf and in-source documentation and comments
+ * Fixed SST code when a string on the boundary of a BIFF SST and
+ CONTINUE starts as Unicode and continues as 8-bit or vice versa;
+ indication: raw Unicode (A.b.c.d., where . is character 0x00) in
+ output, or MaxStringsExceeded
+ * Enabled compilation warnings, made sure there are none
+ * More code modularization
* Added some Alpha portability fixes.
0.5 04/13/02
diff --git a/xlhtml/Makefile.am b/xlhtml/Makefile.am
index b68d6cd..0c02a1f 100644
--- a/xlhtml/Makefile.am
+++ b/xlhtml/Makefile.am
@@ -12,9 +12,9 @@ LDADD = ../cole/libcole.a -lm
xlhtml_SOURCES = support.c xlhtml.c html.c ascii.c xml.c
xldump_SOURCES = xldump.c
xlcdump_SOURCES = xlcdump.c
-#AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
+AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
-doc:
+doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
@echo Generating documentation...
doxygen doxygen.conf
@echo done.
diff --git a/xlhtml/Makefile.in b/xlhtml/Makefile.in
index cfdfa7a..ecf9d6e 100644
--- a/xlhtml/Makefile.in
+++ b/xlhtml/Makefile.in
@@ -109,6 +109,7 @@ xlcdump_LDADD = $(LDADD)
xlcdump_DEPENDENCIES = ../cole/libcole.a
xlcdump_LDFLAGS =
SCRIPTS = $(bin_SCRIPTS) $(noinst_SCRIPTS)
+AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
CFLAGS = @CFLAGS@
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
@@ -383,9 +384,7 @@ install-am install uninstall-am uninstall all-redirect all-am all \
installdirs mostlyclean-generic distclean-generic clean-generic \
maintainer-clean-generic clean mostlyclean distclean maintainer-clean
-#AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
-
-doc:
+doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
@echo Generating documentation...
doxygen doxygen.conf
@echo done.
diff --git a/xlhtml/TODO b/xlhtml/TODO
index 2249862..0759a32 100644
--- a/xlhtml/TODO
+++ b/xlhtml/TODO
@@ -1,5 +1,13 @@
-0.4 ?
-Fix nasty Unicode bug when very large shared string tables are encountereds
-Support String formulas
-Reduce memory footprint
-Support data types by using their format string
+- Update documentation
+- Implement real character set conversion (using iconv); there should be a
+ command-line option to set output charset and another to override input
+ charset. Default: auto-detection for both.
+- Define an XML DTD and stick to it. Look into the OpenOffice spreadsheet
+ format - we don't need two different spreadsheet XML formats...
+- Modularize code. Separate XLS reading into a library, make a frontend with
+ various output formats: XML, HTML, CSV...
+- Make main_line_processor()'s working buffer grow dynamically
+- Notes from Steve Grubb:
+ - Support String formulas
+ - Reduce memory footprint
+ - Support data types by using their format string
diff --git a/xlhtml/ascii.c b/xlhtml/ascii.c
index 1657bbc..03681ea 100644
--- a/xlhtml/ascii.c
+++ b/xlhtml/ascii.c
@@ -1,21 +1,29 @@
+/*! \file ascii.c
+ \brief ASCII and CSV output for xlhtml
+*/
+
+/*
+ Copyright 2002 Charles N Wyble
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
#include "xlhtml.h"
+#include "support.h"
-extern void do_cr(void);
-extern int first_sheet;
-extern int last_sheet;
-extern void OutputString(uni_string * );
-extern char filename[256];
-extern void output_cell( cell *, int);
-extern int Csv;
-work_sheet **ws_array;
-xf_attr **xf_array;
-
-extern int IsCellNumeric(cell *);
-extern int IsCellSafe(cell *);
-extern int IsCellFormula(cell *);
-extern void output_formatted_data(uni_string *, U16, int, int);
-extern void SetupExtraction(void);
void OutputPartialTableAscii(void)
diff --git a/xlhtml/doxygen.conf b/xlhtml/doxygen.conf
index c622d01..0d75b4a 100644
--- a/xlhtml/doxygen.conf
+++ b/xlhtml/doxygen.conf
@@ -1,164 +1,732 @@
-# Doxyfile 0.1
+# Doxyfile 1.2.3
+
+# This file describes the settings to be used by doxygen for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
#---------------------------------------------------------------------------
# General configuration options
#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
PROJECT_NAME = xlhtml
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
PROJECT_NUMBER =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
OUTPUT_DIRECTORY = doc
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Dutch, French, Italian, Czech, Swedish, German, Finnish, Japanese,
+# Korean, Hungarian, Spanish, Romanian, Russian, Croatian, Polish,
+# Portuguese and Slovene.
+
OUTPUT_LANGUAGE = English
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
EXTRACT_STATIC = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these class will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
HIDE_UNDOC_CLASSES = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
REPEAT_BRIEF = YES
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
ALWAYS_DETAILED_SEC = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
FULL_PATH_NAMES = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. It is allowed to use relative paths in the argument list.
+
STRIP_FROM_PATH =
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
INTERNAL_DOCS = NO
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a class diagram (in Html and LaTeX) for classes with base or
+# super classes. Setting the tag to NO turns the diagrams off.
+
CLASS_DIAGRAMS = NO
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+
SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
STRIP_CODE_COMMENTS = YES
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower case letters. If set to YES upper case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# users are adviced to set this option to NO.
+
CASE_SENSE_NAMES = YES
-SHORT_NAMES = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
HIDE_SCOPE_NAMES = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
VERBATIM_HEADERS = NO
-SHOW_INCLUDE_FILES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explict @brief command for a brief description.
+
JAVADOC_AUTOBRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# reimplements.
+
INHERIT_DOCS = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
INLINE_INFO = NO
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
SORT_MEMBER_DOCS = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
DISTRIBUTE_GROUP_DOC = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
TAB_SIZE = 8
-GENERATE_TODOLIST = YES
-GENERATE_TESTLIST = YES
-GENERATE_BUGLIST = YES
-ALIASES =
+
+# The ENABLE_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
ENABLED_SECTIONS =
-MAX_INITIALIZER_LINES = 30
-OPTIMIZE_OUTPUT_FOR_C = YES
-SHOW_USED_FILES = YES
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
-QUIET = NO
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = YES
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
WARN_IF_UNDOCUMENTED = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text.
+
WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
WARN_LOGFILE =
+
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
-INPUT = xlhtml.c
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
RECURSIVE = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
EXCLUDE =
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+
EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
EXAMPLE_PATTERNS =
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command , where
+# is the value of the INPUT_FILTER tag, and is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+
INPUT_FILTER =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse.
+
FILTER_SOURCE_FILES = NO
+
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
IGNORE_PREFIX =
+
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
HTML_OUTPUT = html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet
+
HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
GENERATE_HTMLHELP = NO
-GENERATE_CHI = NO
-BINARY_TOC = NO
-TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
ENUM_VALUES_PER_LINE = 4
-GENERATE_TREEVIEW = NO
-TREEVIEW_WIDTH = 250
+
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
LATEX_OUTPUT = latex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
LATEX_BATCHMODE = NO
+
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimised for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using a WORD or other.
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assigments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
RTF_STYLESHEET_FILE =
-RTF_EXTENSIONS_FILE =
+
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
MAN_EXTENSION = .3
-MAN_LINKS = NO
+
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation. Warning: This feature
+# is still experimental and very incomplete.
+
GENERATE_XML = NO
+
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed.
+
PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
EXPAND_AS_DEFINED =
-SKIP_FUNCTION_MACROS = YES
+
#---------------------------------------------------------------------------
# Configuration::addtions related to external references
#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tagfiles.
+
TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
ALLEXTERNALS = NO
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
PERL_PATH = /usr/bin/perl
+
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
HAVE_DOT = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
COLLABORATION_GRAPH = YES
-TEMPLATE_RELATIONS = YES
+
+# If the ENABLE_PREPROCESSING, INCLUDE_GRAPH, and HAVE_DOT tags are set to
+# YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other
+# documented files.
+
INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, INCLUDED_BY_GRAPH, and HAVE_DOT tags are set to
+# YES then doxygen will generate a graph for each documented header file showing
+# the documented files that directly or indirectly include this file
+
INCLUDED_BY_GRAPH = YES
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
GRAPHICAL_HIERARCHY = YES
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found on the path.
+
DOT_PATH =
-DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
MAX_DOT_GRAPH_HEIGHT = 1024
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
GENERATE_LEGEND = YES
-DOT_CLEANUP = YES
+
#---------------------------------------------------------------------------
# Configuration::addtions related to the search engine
#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
SEARCHENGINE = NO
+
+# The CGI_NAME tag should be the name of the CGI script that
+# starts the search engine (doxysearch) with the correct parameters.
+# A script with this name will be generated by doxygen.
+
CGI_NAME = search.cgi
+
+# The CGI_URL tag should be the absolute URL to the directory where the
+# cgi binaries are located. See the documentation of your http daemon for
+# details.
+
CGI_URL =
+
+# The DOC_URL tag should be the absolute URL to the directory where the
+# documentation is located. If left blank the absolute path to the
+# documentation, with file:// prepended to it, will be used.
+
DOC_URL =
+
+# The DOC_ABSPATH tag should be the absolute path to the directory where the
+# documentation is located. If left blank the directory on the local machine
+# will be used.
+
DOC_ABSPATH =
+
+# The BIN_ABSPATH tag must point to the directory where the doxysearch binary
+# is installed.
+
BIN_ABSPATH = /usr/local/bin/
+
+# The EXT_DOC_PATHS tag can be used to specify one or more paths to
+# documentation generated for other projects. This allows doxysearch to search
+# the documentation for these projects as well.
+
EXT_DOC_PATHS =
diff --git a/xlhtml/html.c b/xlhtml/html.c
index 8b8e2d9..24474da 100644
--- a/xlhtml/html.c
+++ b/xlhtml/html.c
@@ -1,58 +1,37 @@
+/*! \file html.c
+ \brief HTML output for xlhtml
+*/
+
+/*
+ Copyright 2002 Charles N Wyble
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
#include "xlhtml.h"
+#include "support.h"
#include
-
-extern void do_cr(void);
-extern int center_tables;
-extern int first_sheet;
-extern int last_sheet;
-extern uni_string default_font;
-extern void trim_sheet_edges(unsigned int);
-extern int next_ws_title;
-extern void SetupExtraction(void);
-extern void update_default_font(unsigned int);
-extern void OutputString(uni_string * );
-extern int default_fontsize;
-extern char *default_alignment;
-extern int aggressive;
-extern char *lastUpdated;
-extern int file_version;
-extern int NoFormat;
-extern int notAccurate;
-extern int formula_warnings;
-extern int NoHeaders;
-extern int NotImplemented;
-extern int Unsupported;
-extern int MaxWorksheetsExceeded;
-extern int MaxRowExceeded;
-extern int MaxColExceeded;
-extern int MaxStringsExceeded;
-extern int MaxFontsExceeded;
-extern int MaxPalExceeded;
-extern int MaxXFExceeded;
-extern int MaxFormatsExceeded;
-extern char colorTab[MAX_COLORS][8];
-extern char *default_text_color;
-extern char *default_background_color;
-extern char *default_image;
-extern char filename[256];
-extern int UnicodeStrings;
-extern int CodePage;
-extern char *title;
-extern void update_default_alignment(unsigned int, int);
-extern void output_cell( cell *, int);
-extern uni_string author;
-extern int null_string(U8 *);
-extern unsigned int next_font;
-work_sheet **ws_array;
-font_attr **font_array;
-
+/* prototypes for functions in this file */
void output_header(void);
void output_footer(void);
+
void OutputTableHTML(void)
{
int i, j, k;
diff --git a/xlhtml/support.c b/xlhtml/support.c
index b95d74a..0035dd9 100644
--- a/xlhtml/support.c
+++ b/xlhtml/support.c
@@ -1,11 +1,34 @@
+/*! \file support.c
+ \brief Various support functions for xlhtml.
+*/
-/* Various support functions for xlhtml. */
+/*
+ Copyright 2002 Charles N Wyble
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#include "support.h"
#include
#include
#include "../cole/cole.h"
#include
+
+
void print_version(void)
{
printf("xlhtml %s \nCopyright (c) 1999-2002, Charles Wyble\n"
@@ -182,8 +205,8 @@ void NumToDate(long num, int *year, int *month, int *day)
*year = *year % 100;
}
-/* noaliasdub macro avoids trouble from gcc -O2 type-based alias analysis */
typedef S32 swords[2];
+/*! avoids trouble from gcc -O2 type-based alias analysis */
#define noaliasdub(type,ptr) \
(((union{swords sw; F64 dub;} *)(ptr))->sw)
diff --git a/xlhtml/tuneable.h b/xlhtml/tuneable.h
index 8d4e5cc..79ae89d 100644
--- a/xlhtml/tuneable.h
+++ b/xlhtml/tuneable.h
@@ -1,7 +1,40 @@
+/*! \file tuneable.h
+ \brief Values for tuning performance of xlhtml
+*/
-#define XFORMATS_INCR 64 /*!< Increments to allocate extended formats */
-#define FONTS_INCR 32 /*!< Increments to allocate fonts */
-#define WORKSHEETS_INCR 4 /*!< Increments to allocate worksheet pages */
-#define COLS_INCR (U16)24 /*!< Increments to allocate Columns per Worksheet page */
-#define ROWS_INCR (U32)128 /*!< Increments to allocate Rows per Worksheet page */
-#define STRINGS_INCR 256UL /*!< Increments to allocate the string array - */
+/*
+ Copyright 2002 Charles N Wyble
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+/*! \brief Increments to allocate extended formats */
+#define XFORMATS_INCR 64
+
+/*! \brief Increments to allocate fonts */
+#define FONTS_INCR 32
+
+/*! \brief Increments to allocate worksheet pages */
+#define WORKSHEETS_INCR 4
+
+/*! \brief Increments to allocate Columns per Worksheet page */
+#define COLS_INCR (U16)24
+
+/*! \brief Increments to allocate Rows per Worksheet page */
+#define ROWS_INCR (U32)128
+
+/*! \brief Increments to allocate the string array */
+#define STRINGS_INCR 256UL
diff --git a/xlhtml/xlhtml.c b/xlhtml/xlhtml.c
index f3c1edc..e93fccc 100644
--- a/xlhtml/xlhtml.c
+++ b/xlhtml/xlhtml.c
@@ -1,7 +1,7 @@
/*! \file xlhtml.c
- \brief converts excel files to Html
+ \brief converts MS Excel files to useful formats
- xlhtml generates HTML, XML, csv and tab-delimitted versions of Excel
+ xlhtml generates HTML, XML, csv and tab-delimited versions of MS Excel
spreadsheets.
*/
@@ -21,11 +21,13 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
*/
+
#include "tuneable.h"
#include "xlhtml.h"
+#include "support.h"
+
static U16 HARD_MAX_ROWS = HARD_MAX_ROWS_97;
@@ -117,19 +119,6 @@ const char month_abbr[12][5] = { "Jan", "Feb", "Mar", "Apr", "May", "June",
/* Function Prototypes */
-/* These functions are in support.c */
-extern void print_version(void);
-extern void display_usage(void);
-extern void do_cr(void);
-extern void OutputTableHTML(void);
-extern S32 getLong(U8 *);
-extern U16 getShort(U8 *);
-extern void getDouble(U8 *, F64 *);
-extern int null_string(U8 *);
-extern void FracToTime(U8 *, int *, int *, int *, int *);
-extern void NumToDate(long, int *, int *, int *);
-extern void RKtoDouble(S32, F64 *);
-
/* This function is in xml.c */
extern void OutputTableXML(void);
@@ -137,6 +126,7 @@ extern void OutputTableXML(void);
void OutputPartialTableAscii(void);
/* These functions are in html.c */
+extern void OutputTableHTML(void);
extern void output_start_html_attr(html_attr *h, unsigned int, int);
extern void output_end_html_attr(html_attr *h);
extern void output_footer(void);
@@ -144,21 +134,11 @@ extern void output_header(void);
COLE_LOCATE_ACTION_FUNC scan_file;
void main_line_processor(U16, U16, U32, U16, U8);
-void SetupExtraction(void);
void decodeBoolErr(U16, U16, char *);
-int IsCellNumeric(cell *);
-int IsCellSafe(cell *);
-int IsCellFormula(cell *);
-void output_cell(cell *, int);
-void output_formatted_data(uni_string *, U16, int, int);
void PrintFloatComma(char *, int, F64);
void print_as_fraction(F64, int);
-void trim_sheet_edges(unsigned int);
-void update_default_font(unsigned int);
void incr_f_cnt(uni_string *);
int get_default_font(void);
-void update_default_alignment(unsigned int, int);
-void OutputString(uni_string *);
void OutputCharCorrected(U8);
void update_crun_info(U16 *loc, U16 *fnt_idx, U16 crun_cnt, U8 *fmt_run);
void put_utf8(U16);
@@ -187,15 +167,7 @@ char filename[256];
int file_version = 0;
U32 next_string=0;
unsigned int next_font=0, next_ws_title=0, next_xf=0;
-U8 working_buffer[WBUFF_SIZE];
-unsigned int bufidx, buflast; /*!< Needed for working buffer */
-U8 grbit=0; /*!< Needed by the SST Opcode FC */
-U16 crun=0, cch=0; /*!< Needed by the SST Opcode FC */
-U32 extrst=0; /*!< Needed by the SST Opcode FC */
-U16 nonascii = 0; /*!< Needed by the SST Opcode FC */
int sheet_count=-2; /*!< Number of worksheets found */
-U16 last_opcode = -1; /*!< Used for the continue command */
-unsigned int cont_grbit=0, cont_str_array=0;
uni_string default_font; /*!< Font for table */
int default_fontsize = 3; /*!< Default font size for table */
char *default_alignment = 0; /*!< Alignment for table */
@@ -240,7 +212,7 @@ int OutputXML = 0; /*!< Output as xml */
int DumpPage = 0; /*!< Dump page count & max cols & rows */
int Xtract = 0; /*!< Extract a range on a page. */
int MultiByte = 0; /*!< Output as multibyte */
-int NoHeaders = 0; /*!< Dont output html header */
+int NoHeaders = 0; /*!< Don't output html header */
/* Some Global Flags */
@@ -553,15 +525,10 @@ int main (int argc, char **argv)
-
-
-
-
-
void scan_file(COLEDIRENT *cde, void *_info)
{
- U32 count = 0;
- U16 length=0, target=0, opcode=0, version=0;
+ U32 count=0;
+ U16 target=0, opcode=0, version=0;
U8 buf[16];
COLEFILE *cf;
COLERRNO err;
@@ -576,28 +543,27 @@ void scan_file(COLEDIRENT *cde, void *_info)
/* Read & process the file... */
while (cole_fread(cf, buf, 1, &err))
{
+ /* The BIFF record format:
+ * - byte 0: opcode
+ * - byte 1: version (?)
+ * - bytes 2 and 3: 16-bit data length
+ * - from byte 4: record data
+ */
if (count > 3)
main_line_processor(opcode, version, count-4, target, buf[0]);
else if (count == 0)
- { /* Init everything */
- length = 0;
opcode = (U16)buf[0];
- target = 80; /* ficticious number */
- }
else if (count == 1)
version = (U16)buf[0];
else if (count == 2)
- length = (U16)buf[0];
+ target = (U16)buf[0];
else if (count == 3)
- {
- length |= (U16)(buf[0]<<8);
- target = length;
- }
-
- if (count == (U32)(target+3))
+ target |= (U16)(buf[0]<<8);
+ if ((count >= 3) && (count == (U32)target + 3))
count = 0;
else
count++;
+
if (MaxColExceeded || MaxRowExceeded || MaxWorksheetsExceeded)
break; /* We're outta memory and therefore...done */
@@ -648,10 +614,6 @@ void scan_file(COLEDIRENT *cde, void *_info)
-
-
-
-
void SetupExtraction(void)
{
if (Xtract)
@@ -725,29 +687,41 @@ void SetupExtraction(void)
}
-/*!******************************************************************
-* \param count the absolute count in the record
-* \param last the size of the record
-* \param bufidx the index into the working buffer
-* \param buflast the expected length of the working buffer
-********************************************************************/
+
+/*!
+ * This function gets called once for every single byte of record data.
+ * \brief Process the record data.
+ * \param opcode the opcode of the current BIFF record
+ * \param version the version (?) of the current BIFF record
+ * \param count the absolute offset in the record data
+ * \param last the size of the record data
+ * \param data the character being processed
+ */
void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
{
- U16 cont_opcode = 0;
-
- /* If first pass, reset stuff. */
+ int cont_opcode = 0; /* is this the CONTINUE opcode? */
+ static U16 last_opcode = -1; /* holds last non-CONTINUE opcode */
+ static U8 working_buffer[WBUFF_SIZE];
+ static unsigned int bufidx = 0; /* the index into the working buffer */
+ static unsigned int buflast = 0; /* the expected length of the working buffer */
+ /* Needed by the SST Opcode 0xFC: */
+ static U8 str_options = 0;
+ static int buf_16bit = 0;
+ static int now_16bit = 0;
+ static int after_str_header = 0;
+ static U16 num_chars = 0;
+ static U16 num_fmt_runs = 0;
+ static U32 fareast_bytes = 0;
+ static U16 nonascii = 0;
+
+ /* On start of record, reset stuff. */
if (count == 0)
{
- if (opcode != 0x3C) /* continue command */
-/* {
- printf("\n* * * * * * CONTINUE * * * * * * * * *\n\n");
- }
- else */
- { /* Normal path... */
+ if (opcode != 0x3C) /* not CONTINUE opcode */
+ {
last_opcode = opcode;
bufidx = 0;
buflast = 0;
- cont_str_array = 0;
memset(working_buffer, 0, WBUFF_SIZE);
}
}
@@ -760,8 +734,10 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
/* Abort processing if too big. Next opcode will reset everything. */
if (bufidx >= WBUFF_SIZE)
{
- /*printf("OC:%02X C:%04X I:%04X BL:%04X cch:%04X gr:%04X\n", opcode, count, bufidx, buflast, cch, grbit); */
- /*abort(); */
+ /* this will be printed many times; leave it this way since it's temporary
+ * anyway - the buffer must be made dynamic
+ */
+ fprintf(stderr, "Warning: working buffer overflow!\n");
return;
}
@@ -906,13 +882,13 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
{ /* Remember, bufidx is 1 more than it should be */
if ((bufidx == 8)&&(buflast == 0))
{ /* buflast = working_buffer[7]; */
- cch = getShort(&working_buffer[6]);
- buflast = cch + 9;
+ num_chars = getShort(&working_buffer[6]);
+ buflast = num_chars + 9;
}
if (bufidx == 9)
{
if (working_buffer[8] == 1)
- buflast = (cch << 1) + 9;
+ buflast = (num_chars << 1) + 9;
}
if (buflast)
{
@@ -933,7 +909,7 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
if (working_buffer[8] == 1)
{
UnicodeStrings = 2;
- add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(cch << 1), 0, 0);
+ add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(num_chars << 1), 0, 0);
}
else
add_wb_array(r, c, f, opcode, (U16)0, &working_buffer[8], len, 0, 0);
@@ -965,160 +941,174 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
}
}
break;
- /************
- * This function has 2 entry points. 1 is the mainline FC opcode.
- * In this event there are several bytes that setup the type of
- * strings that will follow. Then there is the continue entry
- * point which is immediate - e.g location 0.
- *************/
- case 0xFC: /* Packed String Array A.K.A. SST Shared String Table...UNI */
- if ((count > 7)||(cont_opcode == 1)) /* Skip the 1st 8 locations they are bs */
+ case 0xFC: /* Packed String Array A.K.A. SST - Shared String Table */
+ /* Format of the SST:
+ * - bytes 0-3: total number of strings in the workbook (32-bit int)
+ * - bytes 4-7: number of strings following this header (32-bit int)
+ * - from byte 8: strings; count is in the preceding int
+ * Format of the individual strings:
+ * - bytes 0-1: LN = number of characters (not bytes!) in the string (16-bit int)
+ * - byte 2: option flags (see below)
+ * - from byte 3:
+ * if rich-text flag set:
+ * - RT = number of rich-text formatting runs (16-bit int)
+ * if far-east flag set:
+ * - FE = far-east data size (32-bit int)
+ * LN characters, either 8-bit or 16-bit each
+ * if rich-text flag set:
+ * - RT-times repeated:
+ * - first formatted character, zero-based (16-bit int)
+ * - index to font record (16-bit int)
+ * if far-east flag set:
+ * - unknown extended data about phonetic, keyboard etc. (FE bytes)
+ * Option flags (byte 2 in string) is a bit field:
+ * - bit 0 (01h): 0 = 8-bit characters, 1 = 16-bit characters
+ * - bit 2 (04h): 0 = no far-east info, 1 = contains far-east info
+ * - bit 3 (08h): 0 = no rich-text formatting, 1 = contains rich-text formatting
+ *
+ * The CONTINUE opcode has a special case for the SST: when a string
+ * is split into two records, there is one additional byte at the start
+ * of the second record, which indicates (like the option flags at the
+ * start of the string) with its bit 0 whether the continuation has
+ * 8-bit or 16-bit characters. Thus, the strings can start with 8-bit
+ * characters and continue with 16-bit characters, or vice versa.
+ */
+ if ((count == 0) && !cont_opcode)
+ { /* initialize variables */
+ bufidx = 0;
+ after_str_header = 0;
+ }
+ if ((count < 8) && !cont_opcode)
+ /*! \todo For now, ignore the SST header. Ideally, we should
+ * process at most the number of strings that is stored in
+ * the header, ignoring the rest. Fortunately, the case when
+ * this would be necessary doesn't seem to occur.
+ */
+ break;
+ if ((count == 0) && cont_opcode && after_str_header)
{
-/* if ((count == 0)&&(data == 0)&&(buflast)) */
- if ((count == 0)&&(cont_opcode == 1)&&(buflast))
- {
-/* printf("Adjusting...\n"); */
-/* printf("I:%04X BL:%04X\n", bufidx, buflast); */
- cont_str_array = 1;
- cont_grbit = data;
- if ((cont_grbit & 0x01) && !(grbit & 0x01))
- { /* previous chunk was not Unicode but this one is */
- int i;
- grbit |= 0x01;
- for (i = bufidx-1; i >= 0; i--) {
- working_buffer[2*i] = working_buffer[i];
- working_buffer[2*i+1] = 0;
- }
+ now_16bit = data & 0x01;
+ if (now_16bit && !buf_16bit)
+ { /* previous chunk was not Unicode but this one is */
+ int i;
+ /* we will be doubling the buffer contents
+ * => check if there is space
+ */
+ if (2*buflast >= WBUFF_SIZE)
+ {
+ bufidx = WBUFF_SIZE + 1;
+ break;
+ }
+ if (bufidx >= num_chars)
+ { /* weird - we got a unicode flag, but we're
+ * actually already done with the string data
+ */
+ now_16bit = 0;
+ }
+ else
+ {
+ buf_16bit = 1;
+ if (bufidx > 0)
+ for (i = bufidx-1; i >= 0; i--) {
+ working_buffer[2*i] = working_buffer[i];
+ working_buffer[2*i+1] = 0;
+ }
bufidx = 2 * bufidx;
- buflast = 2 * buflast;
+ buflast = 2*num_chars + 4*num_fmt_runs + fareast_bytes;
}
- return;
}
+ break;
+ }
- working_buffer[bufidx] = data;
- bufidx++;
+ working_buffer[bufidx++] = data;
+ if (buf_16bit && !now_16bit && after_str_header && (bufidx < (2*num_chars)))
+ /* ASCII -> unicode */
+ working_buffer[bufidx++] = 0;
- if((cont_str_array)&&(grbit & 0x01)&& !(cont_grbit & 0x01))
- { /* ASCII -> unicode */
- working_buffer[bufidx] = 0;
- bufidx++;
+ if (! after_str_header) /* Header processor */
+ {
+ if (bufidx == 0x03) /* After 3 locations we have length */
+ { /* and option flags... */
+ num_chars = getShort(&working_buffer[0]);
+ str_options = working_buffer[2];
+ now_16bit = buf_16bit = str_options & 0x01;
}
-
- if (buflast == 0) /* Header processor */
- {
- if (bufidx == 0x03) /* After 3 locations we have length */
- { /* and type of chars... */
- cch = getShort(&working_buffer[0]);
- grbit = working_buffer[2];
-
- if (grbit < 0x04) /* Normal run */
- {
- nonascii = 0;
- bufidx = 0;
- crun = 0;
- extrst = 0;
- buflast = cch << (grbit & 0x01);
-
- /* special case for empty strings */
- if (!cch && !buflast)
- add_str_array(0, (U8 *)0, 0, 0, 0);
- else
- memset(working_buffer, 0, WBUFF_SIZE);
- }
- }
- else if (bufidx == 0x05)
- {
- if ((grbit & 0x0C) == 0x08) /* Rich string only */
- {
- nonascii = 0;
- bufidx = 0;
- crun = getShort(&working_buffer[3]);
- extrst = 0;
- buflast = (cch << (grbit & 0x01)) + (crun*4);
-/* printf("rtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",
- buflast, cch, grbit, extrst, crun, last);
- printf("%02X %02X %02X %02X %02X %02X\n",
- working_buffer[0], working_buffer[1], working_buffer[2],
- working_buffer[3], working_buffer[4], working_buffer[5]); */
- memset(working_buffer, 0, WBUFF_SIZE);
- }
- }
- else if (bufidx == 0x07)
- {
- if ((grbit & 0x0C) == 0x04) /* Extended string only */
- {
- nonascii = 0;
- bufidx = 0;
- crun = 0;
- extrst = getLong(&working_buffer[3]);
- buflast = (cch << (grbit & 0x01)) + extrst;
-/* printf("esbuflast:%X cch%X grbit:%X extrst:%X last:%X\n",
- buflast, cch, grbit, extrst, last);
- printf("%02X %02X %02X %02X %02X %02X\n",
- working_buffer[0], working_buffer[1], working_buffer[2],
- working_buffer[3], working_buffer[4], working_buffer[5]); */
- memset(working_buffer, 0, WBUFF_SIZE);
- }
- }
- else if (bufidx == 0x09)
- {
- if ((grbit & 0x0C) == 0x0C)
- {
- /* Rich String + Extended String **/
- nonascii = 0;
- bufidx = 0;
- crun = getShort(&working_buffer[3]);
- extrst = getLong(&working_buffer[5]);
- buflast = (cch << (grbit & 0x01)) + extrst + (crun*4);
-/* printf("xrtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",
- buflast, cch, grbit, extrst, crun, last);
- printf("%02X %02X %02X %02X %02X %02X\n",
- working_buffer[0], working_buffer[1], working_buffer[2],
- working_buffer[3], working_buffer[4], working_buffer[5]); */
- memset(working_buffer, 0, WBUFF_SIZE);
- }
- }
-/* printf("*%02X ", data); */
+ if ((bufidx == 0x03) && ((str_options & 0x0C) == 0))
+ { /* Normal run */
+ num_fmt_runs = 0;
+ fareast_bytes = 0;
+ buflast = num_chars << buf_16bit;
+ after_str_header = 1;
}
- else /* payload processor */
- {
-/* if (cont_opcode == 1)
- printf(" %02X", data); */
- if (data > 127)
- nonascii = 1;
- if (bufidx == buflast)
+ else if ((bufidx == 0x05) && ((str_options & 0x0C) == 0x08))
+ { /* Rich-text formatted string only */
+ num_fmt_runs = getShort(&working_buffer[3]);
+ fareast_bytes = 0;
+ buflast = (num_chars << buf_16bit) + (num_fmt_runs*4);
+ after_str_header = 1;
+ }
+ else if ((bufidx == 0x07) && ((str_options & 0x0C) == 0x04))
+ { /* Extended (Far-East) string only */
+ num_fmt_runs = 0;
+ fareast_bytes = getLong(&working_buffer[3]);
+ buflast = (num_chars << buf_16bit) + fareast_bytes;
+ after_str_header = 1;
+ }
+ else if ((bufidx == 0x09) && ((str_options & 0x0C) == 0x0C))
+ { /* Rich-text formatted string + Extended (Far-East) string */
+ num_fmt_runs = getShort(&working_buffer[3]);
+ fareast_bytes = getLong(&working_buffer[5]);
+ buflast = (num_chars << buf_16bit) + fareast_bytes + (num_fmt_runs*4);
+ after_str_header = 1;
+ }
+ else if (bufidx >= 0x09)
+ { /* Houston, we have a problem. This should never happen. */
+ fprintf(stderr, "An error in SST processing occured. Please contact the author.\n");
+ bufidx = WBUFF_SIZE + 1; /* simulate buffer overflow to skip rest of opcode */
+ break;
+ }
+ if (after_str_header)
+ { /* the string data is now starting */
+ if (buflast == 0)
+ { /* special case for empty strings */
+ add_str_array(0, (U8 *)0, 0, 0, 0);
+ after_str_header = 0;
+ }
+ else
{
- U8 uni;
- U16 len = (U16)(cch << (grbit & 0x01));
-/* int i; */
-
- if (grbit & 01)
- {
- uni = 2;
- UnicodeStrings = 2;
- }
- else
- uni = nonascii;
- working_buffer[bufidx] = 0;
-/* fprintf(stderr,":buflast-"); */
-/* { int i; */
-/* for (i=0; i UnicodeStrings) /* Try to "upgrade" charset */
- UnicodeStrings = uni;
- bufidx = 0;
- buflast = 0;
- cch = 0;
- cont_str_array = 0;
memset(working_buffer, 0, WBUFF_SIZE);
+ nonascii = 0;
}
+ bufidx = 0;
+ }
+ }
+ else /* payload processor */
+ {
+ if (data > 127)
+ nonascii = 1;
+ if (bufidx == buflast)
+ {
+ U8 uni;
+ U16 len = (U16)(num_chars << buf_16bit);
+
+ if (buf_16bit)
+ {
+ uni = 2;
+ UnicodeStrings = 2;
+ }
+ else
+ uni = nonascii;
+ working_buffer[bufidx] = 0;
+
+ if (num_fmt_runs)
+ add_str_array(uni, working_buffer, len, working_buffer+len, num_fmt_runs);
+ else
+ add_str_array(uni, working_buffer, len, 0, 0);
+ if (uni > UnicodeStrings) /* Try to "upgrade" charset */
+ UnicodeStrings = uni;
+ bufidx = 0;
+ after_str_header = 0;
+ memset(working_buffer, 0, WBUFF_SIZE);
}
}
break;
@@ -1806,9 +1796,6 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
-
-
-
/*! returns 1 on error, 0 on success */
int ws_init(int i)
{
@@ -1841,6 +1828,8 @@ int ws_init(int i)
return 0;
}
+
+
/*! returns 1 on error, 0 on success */
int add_more_worksheet_ptrs(void)
{
@@ -1877,6 +1866,8 @@ int add_more_worksheet_ptrs(void)
return 0;
}
+
+
int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols)
{
cell **tc_array;
@@ -1908,6 +1899,8 @@ int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols)
return 0;
}
+
+
void add_wb_array(U16 r, U16 c, U16 xf, U16 type, U8 uni,
U8 *str, U16 len, U16 crun_cnt, U8 *fmt_run)
{
diff --git a/xlhtml/xlhtml.h b/xlhtml/xlhtml.h
index b0ad64a..caa2b00 100644
--- a/xlhtml/xlhtml.h
+++ b/xlhtml/xlhtml.h
@@ -1,3 +1,30 @@
+/*! \file xlhtml.h
+ \brief Header file for xlhtml
+*/
+
+/*
+ Copyright 2002 Charles N Wyble
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+#ifndef __XLHTML_H_INCLUDED
+#define __XLHTML_H_INCLUDED
+
+
#if !(defined( __BORLANDC__ ) || defined( __WIN32__ ))
#include "config.h" /* Created by ./configure script */
@@ -19,9 +46,9 @@
#include
/* Used by packed string array Opcode: 0xFC */
-#define HARD_MAX_ROWS_97 0x7FFE /*!< Used in add_wb_array to prevent OOM */
-#define HARD_MAX_ROWS_95 0x3FFF /*!< Used in add_wb_array to prevent OOM */
-#define HARD_MAX_COLS 256 /*!< Used in add_wb_array to prevent OOM */
+#define HARD_MAX_ROWS_97 0x7FFE /*!< Used in add_wb_array to prevent OOM */
+#define HARD_MAX_ROWS_95 0x3FFF /*!< Used in add_wb_array to prevent OOM */
+#define HARD_MAX_COLS 256 /*!< Used in add_wb_array to prevent OOM */
/**********************************
*
@@ -40,27 +67,30 @@
#define GLOBAL_UMASK (2)
#endif
-typedef struct /*!< This encapsulates the Unicode String */
+/*! \brief This encapsulates the Unicode String */
+typedef struct
{
U8 uni; /*!< Unicode String: 0==ASCII/8859-1, 1==windows-1252, 2==utf-8 */
U8 *str; /*!< Characters of string */
U16 len; /*!< Length of string */
U8 *fmt_run; /*!< formatting run, short pairs: offset, index to font */
U8 crun_cnt; /*!< The count of format runs */
-}uni_string;
+} uni_string;
-typedef struct /*!< This is everything we need for a cell */
+/*! \brief This is everything we need for a cell */
+typedef struct
{
- U16 xfmt; /*!< The high bit will tell us which version 0 =< 2; 1 == 2+ */
- U16 type; /*!< This will record the record type that generated the cell */
+ U16 xfmt; /*!< The high bit will tell us which version 0 =< 2; 1 == 2+ */
+ U16 type; /*!< This will record the record type that generated the cell */
U16 spanned; /*!< If 1 don't output */
uni_string ustr; /*!< The cell's displayed contents */
U16 rowspan; /*!< rows to span */
U16 colspan; /*!< columns to span */
uni_string h_link; /*!< If a hyperlinked cell, this is the link*/
-}cell;
+} cell;
-typedef struct /*!< This encapsulates some information about each worksheet */
+/*! \brief This encapsulates some information about each worksheet */
+typedef struct
{
U32 first_row;
S32 biggest_row;
@@ -71,9 +101,10 @@ typedef struct /*!< This encapsulates some information about each worksheet */
uni_string ws_title;
cell **c_array;
U16 spanned;
-}work_sheet;
+} work_sheet;
-typedef struct /*!< This is everything we need to know about fonts */
+/*! \brief This is everything we need to know about fonts */
+typedef struct
{
U16 size;
U16 attr;
@@ -82,15 +113,16 @@ typedef struct /*!< This is everything we need to know about fonts */
U16 super;
U8 underline;
uni_string name;
-}font_attr;
+} font_attr;
typedef struct
{
uni_string *name;
U16 cnt;
-}fnt_cnt;
+} fnt_cnt;
-typedef struct /*!< This covers the Extended Format records */
+/*! \brief This covers the Extended Format records */
+typedef struct
{
U16 fnt_idx;
U16 fmt_idx;
@@ -101,9 +133,10 @@ typedef struct /*!< This covers the Extended Format records */
U16 b_l_color;
U32 b_t_color;
U16 cell_color;
-}xf_attr;
+} xf_attr;
-typedef struct /*!< HTML Attribute */
+/*! \brief HTML Attribute */
+typedef struct
{
int fflag; /*!< Font Flag */
int bflag; /*!< Bold Flag */
@@ -112,6 +145,60 @@ typedef struct /*!< HTML Attribute */
int uflag; /*!< Underline flag */
int sbflag; /*!< Subscript */
int spflag; /*!< Superscript */
-}html_attr;
+} html_attr;
+extern int first_sheet;
+extern int last_sheet;
+extern char filename[256];
+extern char *default_text_color;
+extern char *default_background_color;
+extern char *default_image;
+extern int aggressive;
+extern int center_tables;
+extern int NoHeaders;
+extern int formula_warnings;
+extern int Csv;
+extern xf_attr **xf_array;
+extern work_sheet **ws_array;
+extern font_attr **font_array;
+extern uni_string default_font;
+extern unsigned int next_font;
+extern unsigned int next_ws_title;
+extern int default_fontsize;
+extern char *default_alignment;
+extern char *title;
+extern uni_string author;
+extern char *lastUpdated;
+extern int file_version;
+extern char colorTab[MAX_COLORS][8];
+extern int NoFormat;
+extern int notAccurate;
+extern int NotImplemented;
+extern int Unsupported;
+extern int MaxPalExceeded;
+extern int MaxXFExceeded;
+extern int MaxFormatsExceeded;
+extern int MaxColExceeded;
+extern int MaxRowExceeded;
+extern int MaxWorksheetsExceeded;
+extern int MaxStringsExceeded;
+extern int MaxFontsExceeded;
+extern int UnicodeStrings;
+extern int CodePage;
+
+
+void OutputString (uni_string *);
+void output_cell (cell *, int);
+int IsCellNumeric (cell *);
+int IsCellSafe (cell *);
+int IsCellFormula (cell *);
+void output_formatted_data (uni_string *, U16, int, int);
+void SetupExtraction (void);
+void trim_sheet_edges (unsigned int);
+void update_default_font (unsigned int);
+void update_default_alignment (unsigned int, int);
+
+
+
+#endif
diff --git a/xlhtml/xml.c b/xlhtml/xml.c
index 9e908d3..c9a105d 100644
--- a/xlhtml/xml.c
+++ b/xlhtml/xml.c
@@ -1,38 +1,28 @@
+/*! \file xml.c
+ \brief XML output for xlhtml
+*/
+
+/*
+ Copyright 2002 Charles N Wyble
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
#include "xlhtml.h"
-extern int first_sheet;
-extern int last_sheet;
-extern uni_string default_font;
-extern void trim_sheet_edges(unsigned int);
-extern int next_ws_title;
-extern void SetupExtraction(void);
-extern void update_default_font(unsigned int);
-extern void OutputString(uni_string * );
-extern char *lastUpdated;
-extern int file_version;
-extern int NoFormat;
-extern int notAccurate;
-extern int formula_warnings;
-extern int NotImplemented;
-extern int Unsupported;
-extern int MaxWorksheetsExceeded;
-extern int MaxRowExceeded;
-extern int MaxColExceeded;
-extern int MaxStringsExceeded;
-extern int MaxFontsExceeded;
-extern int MaxPalExceeded;
-extern int MaxXFExceeded;
-extern int MaxFormatsExceeded;
-extern char colorTab[MAX_COLORS];
-extern char filename[256];
-extern int UnicodeStrings;
-extern char *title;
-extern void update_default_alignment(unsigned int, int);
-extern void output_cell( cell *, int);
-extern uni_string author;
-
-work_sheet **ws_array;
void OutputTableXML(void)
@@ -87,8 +77,8 @@ void OutputTableXML(void)
}
printf( "\t\t\t%ld\n", (unsigned long)ws_array[i]->first_row );
- printf( "\t\t\t%ld\n", (int) ws_array[i]->biggest_row );
- printf( "\t\t\t%d\n", (long) ws_array[i]->first_col );
+ printf( "\t\t\t%d\n", (int) ws_array[i]->biggest_row );
+ printf( "\t\t\t%ld\n", (long) ws_array[i]->first_col );
printf( "\t\t\t%d\n", (int)ws_array[i]->biggest_col );
printf( "\t\t\t\n" );
@@ -114,7 +104,7 @@ void OutputTableXML(void)
printf( "\t\t\t\n" );
printf("\t\t\n");
}
- printf( "\t\n" );
+ printf( "\t\n" );
/* Print the author's name in itallics... */
if (author.str)
@@ -173,5 +163,5 @@ void OutputTableXML(void)
/* Output Credit */
printf("\tCreated with xlhtml %s\n", VERSION);
printf("\thttp://chicago.sf.net/xlhtml/\n");
- printf( "\n" );
+ printf( "\n" );
}
| |